diff options
Diffstat (limited to 'tvix/castore/src')
-rw-r--r-- | tvix/castore/src/directoryservice/directory_graph.rs | 56 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/object_store.rs | 8 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/order_validator.rs | 32 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/tests/mod.rs | 11 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/traverse.rs | 58 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/utils.rs | 17 | ||||
-rw-r--r-- | tvix/castore/src/fixtures.rs | 78 | ||||
-rw-r--r-- | tvix/castore/src/fs/fuse/tests.rs | 74 | ||||
-rw-r--r-- | tvix/castore/src/fs/inodes.rs | 12 | ||||
-rw-r--r-- | tvix/castore/src/import/mod.rs | 47 | ||||
-rw-r--r-- | tvix/castore/src/nodes/directory.rs | 114 | ||||
-rw-r--r-- | tvix/castore/src/nodes/directory_node.rs | 35 | ||||
-rw-r--r-- | tvix/castore/src/nodes/file_node.rs | 36 | ||||
-rw-r--r-- | tvix/castore/src/nodes/mod.rs | 46 | ||||
-rw-r--r-- | tvix/castore/src/nodes/symlink_node.rs | 21 | ||||
-rw-r--r-- | tvix/castore/src/nodes/symlink_target.rs | 82 | ||||
-rw-r--r-- | tvix/castore/src/proto/mod.rs | 64 | ||||
-rw-r--r-- | tvix/castore/src/tests/import.rs | 24 |
18 files changed, 447 insertions, 368 deletions
diff --git a/tvix/castore/src/directoryservice/directory_graph.rs b/tvix/castore/src/directoryservice/directory_graph.rs index ffb25de9a854..2f932fe05d8a 100644 --- a/tvix/castore/src/directoryservice/directory_graph.rs +++ b/tvix/castore/src/directoryservice/directory_graph.rs @@ -10,7 +10,7 @@ use petgraph::{ use tracing::instrument; use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator}; -use crate::{B3Digest, Directory, DirectoryNode}; +use crate::{B3Digest, Directory, Node}; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -18,6 +18,8 @@ pub enum Error { ValidationError(String), } +type Edge = (B3Digest, u64); + /// This can be used to validate and/or re-order a Directory closure (DAG of /// connected Directories), and their insertion order. /// @@ -55,7 +57,7 @@ pub struct DirectoryGraph<O> { // // The option in the edge weight tracks the pending validation state of the respective edge, for example if // the child has not been added yet. - graph: DiGraph<Option<Directory>, Option<DirectoryNode>>, + graph: DiGraph<Option<Directory>, Option<Edge>>, // A lookup table from directory digest to node index. digest_to_node_ix: HashMap<B3Digest, NodeIndex>, @@ -64,18 +66,18 @@ pub struct DirectoryGraph<O> { } pub struct ValidatedDirectoryGraph { - graph: DiGraph<Option<Directory>, Option<DirectoryNode>>, + graph: DiGraph<Option<Directory>, Option<Edge>>, root: Option<NodeIndex>, } -fn check_edge(dir: &DirectoryNode, dir_name: &[u8], child: &Directory) -> Result<(), Error> { +fn check_edge(dir: &Edge, dir_name: &[u8], child: &Directory) -> Result<(), Error> { // Ensure the size specified in the child node matches our records. - if dir.size() != child.size() { + if dir.1 != child.size() { return Err(Error::ValidationError(format!( "'{}' has wrong size, specified {}, recorded {}", dir_name.as_bstr(), - dir.size(), + dir.1, child.size(), ))); } @@ -141,21 +143,23 @@ impl<O: OrderValidator> DirectoryGraph<O> { } // set up edges to all child directories - for (subdir_name, subdir_node) in directory.directories() { - let child_ix = *self - .digest_to_node_ix - .entry(subdir_node.digest().clone()) - .or_insert_with(|| self.graph.add_node(None)); - - let pending_edge_check = match &self.graph[child_ix] { - Some(child) => { - // child is already available, validate the edge now - check_edge(subdir_node, subdir_name, child)?; - None - } - None => Some(subdir_node.clone()), // pending validation - }; - self.graph.add_edge(ix, child_ix, pending_edge_check); + for (name, node) in directory.nodes() { + if let Node::Directory { digest, size } = node { + let child_ix = *self + .digest_to_node_ix + .entry(digest.clone()) + .or_insert_with(|| self.graph.add_node(None)); + + let pending_edge_check = match &self.graph[child_ix] { + Some(child) => { + // child is already available, validate the edge now + check_edge(&(digest.to_owned(), *size), name, child)?; + None + } + None => Some((digest.to_owned(), *size)), // pending validation + }; + self.graph.add_edge(ix, child_ix, pending_edge_check); + } } // validate the edges from parents to this node @@ -270,7 +274,7 @@ impl ValidatedDirectoryGraph { #[cfg(test)] mod tests { use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C}; - use crate::{Directory, DirectoryNode, Node}; + use crate::{Directory, Node}; use lazy_static::lazy_static; use rstest::rstest; @@ -281,10 +285,10 @@ mod tests { let mut dir = Directory::new(); dir.add( "foo".into(), - Node::Directory(DirectoryNode::new( - DIRECTORY_A.digest(), - DIRECTORY_A.size() + 42, // wrong! - ))).unwrap(); + Node::Directory{ + digest: DIRECTORY_A.digest(), + size: DIRECTORY_A.size() + 42, // wrong! + }).unwrap(); dir }; } diff --git a/tvix/castore/src/directoryservice/object_store.rs b/tvix/castore/src/directoryservice/object_store.rs index 6e71c2356def..5b5281abcd2f 100644 --- a/tvix/castore/src/directoryservice/object_store.rs +++ b/tvix/castore/src/directoryservice/object_store.rs @@ -21,7 +21,7 @@ use super::{ RootToLeavesValidator, }; use crate::composition::{CompositionContext, ServiceBuilder}; -use crate::{proto, B3Digest, Error}; +use crate::{proto, B3Digest, Error, Node}; /// Stores directory closures in an object store. /// Notably, this makes use of the option to disallow accessing child directories except when @@ -85,7 +85,11 @@ impl DirectoryService for ObjectStoreDirectoryService { #[instrument(skip(self, directory), fields(directory.digest = %directory.digest()))] async fn put(&self, directory: Directory) -> Result<B3Digest, Error> { - if directory.directories().next().is_some() { + // Ensure the directory doesn't contain other directory children + if directory + .nodes() + .any(|(_, e)| matches!(e, Node::Directory { .. })) + { return Err(Error::InvalidRequest( "only put_multiple_start is supported by the ObjectStoreDirectoryService for directories with children".into(), )); diff --git a/tvix/castore/src/directoryservice/order_validator.rs b/tvix/castore/src/directoryservice/order_validator.rs index 6daf4a624d29..973af92e1294 100644 --- a/tvix/castore/src/directoryservice/order_validator.rs +++ b/tvix/castore/src/directoryservice/order_validator.rs @@ -2,7 +2,7 @@ use std::collections::HashSet; use tracing::warn; use super::Directory; -use crate::B3Digest; +use crate::{B3Digest, Node}; pub trait OrderValidator { /// Update the order validator's state with the directory @@ -48,9 +48,11 @@ impl RootToLeavesValidator { self.expected_digests.insert(directory.digest()); } - for (_, subdir_node) in directory.directories() { - // Allow the children to appear next - self.expected_digests.insert(subdir_node.digest().clone()); + // Allow the children to appear next + for (_, node) in directory.nodes() { + if let Node::Directory { digest, .. } = node { + self.expected_digests.insert(digest.clone()); + } } } } @@ -79,14 +81,20 @@ impl OrderValidator for LeavesToRootValidator { fn add_directory(&mut self, directory: &Directory) -> bool { let digest = directory.digest(); - for (_, subdir_node) in directory.directories() { - if !self.allowed_references.contains(subdir_node.digest()) { - warn!( - directory.digest = %digest, - subdirectory.digest = %subdir_node.digest(), - "unexpected directory reference" - ); - return false; + for (_, node) in directory.nodes() { + if let Node::Directory { + digest: subdir_node_digest, + .. + } = node + { + if !self.allowed_references.contains(subdir_node_digest) { + warn!( + directory.digest = %digest, + subdirectory.digest = %subdir_node_digest, + "unexpected directory reference" + ); + return false; + } } } diff --git a/tvix/castore/src/directoryservice/tests/mod.rs b/tvix/castore/src/directoryservice/tests/mod.rs index c0a0f22c989f..2bb9f05bf8e9 100644 --- a/tvix/castore/src/directoryservice/tests/mod.rs +++ b/tvix/castore/src/directoryservice/tests/mod.rs @@ -9,7 +9,7 @@ use rstest_reuse::{self, *}; use super::DirectoryService; use crate::directoryservice; use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C, DIRECTORY_D}; -use crate::{Directory, DirectoryNode, Node}; +use crate::{Directory, Node}; mod utils; use self::utils::make_grpc_directory_service_client; @@ -220,12 +220,13 @@ async fn upload_reject_wrong_size(directory_service: impl DirectoryService) { let mut dir = Directory::new(); dir.add( "foo".into(), - Node::Directory(DirectoryNode::new( - DIRECTORY_A.digest(), - DIRECTORY_A.size() + 42, // wrong! - )), + Node::Directory { + digest: DIRECTORY_A.digest(), + size: DIRECTORY_A.size() + 42, // wrong! + }, ) .unwrap(); + dir }; diff --git a/tvix/castore/src/directoryservice/traverse.rs b/tvix/castore/src/directoryservice/traverse.rs index de2ea33c7f9d..86a90175ef10 100644 --- a/tvix/castore/src/directoryservice/traverse.rs +++ b/tvix/castore/src/directoryservice/traverse.rs @@ -15,26 +15,24 @@ where let mut parent_node = root_node; for component in path.as_ref().components() { match parent_node { - Node::File(_) | Node::Symlink(_) => { + Node::File { .. } | Node::Symlink { .. } => { // There's still some path left, but the parent node is no directory. // This means the path doesn't exist, as we can't reach it. return Ok(None); } - Node::Directory(directory_node) => { + Node::Directory { digest, .. } => { // fetch the linked node from the directory_service. - let directory = directory_service - .as_ref() - .get(directory_node.digest()) - .await? - .ok_or_else(|| { - // If we didn't get the directory node that's linked, that's a store inconsistency, bail out! - warn!("directory {} does not exist", directory_node.digest()); - - Error::StorageError(format!( - "directory {} does not exist", - directory_node.digest() - )) - })?; + let directory = + directory_service + .as_ref() + .get(&digest) + .await? + .ok_or_else(|| { + // If we didn't get the directory node that's linked, that's a store inconsistency, bail out! + warn!("directory {} does not exist", digest); + + Error::StorageError(format!("directory {} does not exist", digest)) + })?; // look for the component in the [Directory]. if let Some((_child_name, child_node)) = directory @@ -59,8 +57,8 @@ where mod tests { use crate::{ directoryservice, - fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP}, - DirectoryNode, Node, PathBuf, + fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP, EMPTY_BLOB_DIGEST}, + Node, PathBuf, }; use super::descend_to; @@ -82,23 +80,23 @@ mod tests { handle.close().await.expect("must upload"); // construct the node for DIRECTORY_COMPLICATED - let node_directory_complicated = Node::Directory(DirectoryNode::new( - DIRECTORY_COMPLICATED.digest(), - DIRECTORY_COMPLICATED.size(), - )); + let node_directory_complicated = Node::Directory { + digest: DIRECTORY_COMPLICATED.digest(), + size: DIRECTORY_COMPLICATED.size(), + }; // construct the node for DIRECTORY_COMPLICATED - let node_directory_with_keep = Node::Directory( - DIRECTORY_COMPLICATED - .directories() - .next() - .unwrap() - .1 - .clone(), - ); + let node_directory_with_keep = Node::Directory { + digest: DIRECTORY_WITH_KEEP.digest(), + size: DIRECTORY_WITH_KEEP.size(), + }; // construct the node for the .keep file - let node_file_keep = Node::File(DIRECTORY_WITH_KEEP.files().next().unwrap().1.clone()); + let node_file_keep = Node::File { + digest: EMPTY_BLOB_DIGEST.clone(), + size: 0, + executable: false, + }; // traversal to an empty subpath should return the root node. { diff --git a/tvix/castore/src/directoryservice/utils.rs b/tvix/castore/src/directoryservice/utils.rs index 4b060707d4cc..d073c2c3c8ec 100644 --- a/tvix/castore/src/directoryservice/utils.rs +++ b/tvix/castore/src/directoryservice/utils.rs @@ -2,6 +2,7 @@ use super::Directory; use super::DirectoryService; use crate::B3Digest; use crate::Error; +use crate::Node; use async_stream::try_stream; use futures::stream::BoxStream; use std::collections::{HashSet, VecDeque}; @@ -57,15 +58,15 @@ pub fn traverse_directory<'a, DS: DirectoryService + 'static>( // enqueue all child directory digests to the work queue, as // long as they're not part of the worklist or already sent. // This panics if the digest looks invalid, it's supposed to be checked first. - for (_, child_directory_node) in current_directory.directories() { - let child_digest = child_directory_node.digest(); - - if worklist_directory_digests.contains(child_digest) - || sent_directory_digests.contains(child_digest) - { - continue; + for (_, child_directory_node) in current_directory.nodes() { + if let Node::Directory{digest: child_digest, ..} = child_directory_node { + if worklist_directory_digests.contains(child_digest) + || sent_directory_digests.contains(child_digest) + { + continue; + } + worklist_directory_digests.push_back(child_digest.clone()); } - worklist_directory_digests.push_back(child_digest.clone()); } yield current_directory; diff --git a/tvix/castore/src/fixtures.rs b/tvix/castore/src/fixtures.rs index b57540d177e2..ddeacaf1d5b9 100644 --- a/tvix/castore/src/fixtures.rs +++ b/tvix/castore/src/fixtures.rs @@ -1,6 +1,4 @@ -use crate::{ - B3Digest, {Directory, DirectoryNode, FileNode, Node, SymlinkNode}, -}; +use crate::{B3Digest, Directory, Node}; use lazy_static::lazy_static; pub const HELLOWORLD_BLOB_CONTENTS: &[u8] = b"Hello World!"; @@ -37,11 +35,11 @@ lazy_static! { let mut dir = Directory::new(); dir.add( ".keep".into(), - Node::File(FileNode::new( - EMPTY_BLOB_DIGEST.clone(), - 0, - false - ))).unwrap(); + Node::File{ + digest: EMPTY_BLOB_DIGEST.clone(), + size: 0, + executable: false + }).unwrap(); dir }; @@ -49,22 +47,22 @@ lazy_static! { let mut dir = Directory::new(); dir.add( "keep".into(), - Node::Directory(DirectoryNode::new( - DIRECTORY_WITH_KEEP.digest(), - DIRECTORY_WITH_KEEP.size() - ))).unwrap(); + Node::Directory{ + digest: DIRECTORY_WITH_KEEP.digest(), + size: DIRECTORY_WITH_KEEP.size() + }).unwrap(); dir.add( ".keep".into(), - Node::File(FileNode::new( - EMPTY_BLOB_DIGEST.clone(), - 0, - false - ))).unwrap(); + Node::File{ + digest: EMPTY_BLOB_DIGEST.clone(), + size: 0, + executable: false + }).unwrap(); dir.add( "aa".into(), - Node::Symlink(SymlinkNode::new( - b"/nix/store/somewhereelse".to_vec().into() - ).unwrap())).unwrap(); + Node::Symlink{ + target: "/nix/store/somewhereelse".try_into().unwrap() + }).unwrap(); dir }; @@ -73,10 +71,10 @@ lazy_static! { let mut dir = Directory::new(); dir.add( "a".into(), - Node::Directory(DirectoryNode::new( - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ))).unwrap(); + Node::Directory{ + digest: DIRECTORY_A.digest(), + size: DIRECTORY_A.size(), + }).unwrap(); dir }; @@ -84,16 +82,16 @@ lazy_static! { let mut dir = Directory::new(); dir.add( "a".into(), - Node::Directory(DirectoryNode::new( - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ))).unwrap(); + Node::Directory{ + digest: DIRECTORY_A.digest(), + size: DIRECTORY_A.size(), + }).unwrap(); dir.add( "a'".into(), - Node::Directory(DirectoryNode::new( - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ))).unwrap(); + Node::Directory{ + digest: DIRECTORY_A.digest(), + size: DIRECTORY_A.size(), + }).unwrap(); dir }; @@ -101,16 +99,16 @@ lazy_static! { let mut dir = Directory::new(); dir.add( "a".into(), - Node::Directory(DirectoryNode::new( - DIRECTORY_A.digest(), - DIRECTORY_A.size(), - ))).unwrap(); + Node::Directory{ + digest: DIRECTORY_A.digest(), + size: DIRECTORY_A.size(), + }).unwrap(); dir.add( "b".into(), - Node::Directory(DirectoryNode::new( - DIRECTORY_B.digest(), - DIRECTORY_B.size(), - ))).unwrap(); + Node::Directory{ + digest: DIRECTORY_B.digest(), + size: DIRECTORY_B.size(), + }).unwrap(); dir }; diff --git a/tvix/castore/src/fs/fuse/tests.rs b/tvix/castore/src/fs/fuse/tests.rs index 30bbfc46235d..e5e2e66c3b62 100644 --- a/tvix/castore/src/fs/fuse/tests.rs +++ b/tvix/castore/src/fs/fuse/tests.rs @@ -16,7 +16,7 @@ use crate::fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST use crate::{ blobservice::{BlobService, MemoryBlobService}, directoryservice::{DirectoryService, MemoryDirectoryService}, - fixtures, {DirectoryNode, FileNode, Node, SymlinkNode}, + fixtures, Node, }; const BLOB_A_NAME: &str = "00000000000000000000000000000000-test"; @@ -68,11 +68,11 @@ async fn populate_blob_a( root_nodes.insert( BLOB_A_NAME.into(), - Node::File(FileNode::new( - fixtures::BLOB_A_DIGEST.clone(), - fixtures::BLOB_A.len() as u64, - false, - )), + Node::File { + digest: fixtures::BLOB_A_DIGEST.clone(), + size: fixtures::BLOB_A.len() as u64, + executable: false, + }, ); } @@ -88,11 +88,11 @@ async fn populate_blob_b( root_nodes.insert( BLOB_B_NAME.into(), - Node::File(FileNode::new( - fixtures::BLOB_B_DIGEST.clone(), - fixtures::BLOB_B.len() as u64, - false, - )), + Node::File { + digest: fixtures::BLOB_B_DIGEST.clone(), + size: fixtures::BLOB_B.len() as u64, + executable: false, + }, ); } @@ -112,18 +112,20 @@ async fn populate_blob_helloworld( root_nodes.insert( HELLOWORLD_BLOB_NAME.into(), - Node::File(FileNode::new( - fixtures::HELLOWORLD_BLOB_DIGEST.clone(), - fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64, - true, - )), + Node::File { + digest: fixtures::HELLOWORLD_BLOB_DIGEST.clone(), + size: fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64, + executable: true, + }, ); } async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( SYMLINK_NAME.into(), - Node::Symlink(SymlinkNode::new(BLOB_A_NAME.into()).unwrap()), + Node::Symlink { + target: BLOB_A_NAME.try_into().unwrap(), + }, ); } @@ -132,7 +134,9 @@ async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) { async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( SYMLINK_NAME2.into(), - Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into()).unwrap()), + Node::Symlink { + target: "/nix/store/somewhereelse".try_into().unwrap(), + }, ); } @@ -156,10 +160,10 @@ async fn populate_directory_with_keep( root_nodes.insert( DIRECTORY_WITH_KEEP_NAME.into(), - Node::Directory(DirectoryNode::new( - fixtures::DIRECTORY_WITH_KEEP.digest(), - fixtures::DIRECTORY_WITH_KEEP.size(), - )), + Node::Directory { + digest: fixtures::DIRECTORY_WITH_KEEP.digest(), + size: fixtures::DIRECTORY_WITH_KEEP.size(), + }, ); } @@ -168,10 +172,10 @@ async fn populate_directory_with_keep( async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( DIRECTORY_WITH_KEEP_NAME.into(), - Node::Directory(DirectoryNode::new( - fixtures::DIRECTORY_WITH_KEEP.digest(), - fixtures::DIRECTORY_WITH_KEEP.size(), - )), + Node::Directory { + digest: fixtures::DIRECTORY_WITH_KEEP.digest(), + size: fixtures::DIRECTORY_WITH_KEEP.size(), + }, ); } @@ -179,11 +183,11 @@ async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Byte async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>) { root_nodes.insert( BLOB_A_NAME.into(), - Node::File(FileNode::new( - fixtures::BLOB_A_DIGEST.clone(), - fixtures::BLOB_A.len() as u64, - false, - )), + Node::File { + digest: fixtures::BLOB_A_DIGEST.clone(), + size: fixtures::BLOB_A.len() as u64, + executable: false, + }, ); } @@ -213,10 +217,10 @@ async fn populate_directory_complicated( root_nodes.insert( DIRECTORY_COMPLICATED_NAME.into(), - Node::Directory(DirectoryNode::new( - fixtures::DIRECTORY_COMPLICATED.digest(), - fixtures::DIRECTORY_COMPLICATED.size(), - )), + Node::Directory { + digest: fixtures::DIRECTORY_COMPLICATED.digest(), + size: fixtures::DIRECTORY_COMPLICATED.size(), + }, ); } diff --git a/tvix/castore/src/fs/inodes.rs b/tvix/castore/src/fs/inodes.rs index e49d28f8e52e..782ffff716d2 100644 --- a/tvix/castore/src/fs/inodes.rs +++ b/tvix/castore/src/fs/inodes.rs @@ -25,11 +25,15 @@ impl InodeData { /// Constructs a new InodeData by consuming a [Node]. pub fn from_node(node: &Node) -> Self { match node { - Node::Directory(n) => { - Self::Directory(DirectoryInodeData::Sparse(n.digest().clone(), n.size())) + Node::Directory { digest, size } => { + Self::Directory(DirectoryInodeData::Sparse(digest.clone(), *size)) } - Node::File(n) => Self::Regular(n.digest().clone(), n.size(), n.executable()), - Node::Symlink(n) => Self::Symlink(n.target().clone()), + Node::File { + digest, + size, + executable, + } => Self::Regular(digest.clone(), *size, *executable), + Node::Symlink { target } => Self::Symlink(target.clone().into()), } } diff --git a/tvix/castore/src/import/mod.rs b/tvix/castore/src/import/mod.rs index 408166beffb4..9fce22585f60 100644 --- a/tvix/castore/src/import/mod.rs +++ b/tvix/castore/src/import/mod.rs @@ -6,7 +6,7 @@ use crate::directoryservice::{DirectoryPutter, DirectoryService}; use crate::path::{Path, PathBuf}; -use crate::{B3Digest, Directory, DirectoryNode, FileNode, Node, SymlinkNode}; +use crate::{B3Digest, Directory, Node}; use futures::{Stream, StreamExt}; use tracing::Level; @@ -84,22 +84,31 @@ where IngestionError::UploadDirectoryError(entry.path().to_owned(), e) })?; - Node::Directory(DirectoryNode::new(directory_digest, directory_size)) - } - IngestionEntry::Symlink { ref target, .. } => { - Node::Symlink(SymlinkNode::new(target.to_owned().into()).map_err(|e| { - IngestionError::UploadDirectoryError( - entry.path().to_owned(), - crate::Error::StorageError(e.to_string()), - ) - })?) + Node::Directory { + digest: directory_digest, + size: directory_size, + } } + IngestionEntry::Symlink { ref target, .. } => Node::Symlink { + target: bytes::Bytes::copy_from_slice(target).try_into().map_err( + |e: crate::ValidateNodeError| { + IngestionError::UploadDirectoryError( + entry.path().to_owned(), + crate::Error::StorageError(e.to_string()), + ) + }, + )?, + }, IngestionEntry::Regular { size, executable, digest, .. - } => Node::File(FileNode::new(digest.clone(), *size, *executable)), + } => Node::File { + digest: digest.clone(), + size: *size, + executable: *executable, + }, }; let parent = entry @@ -153,8 +162,8 @@ where #[cfg(debug_assertions)] { - if let Node::Directory(directory_node) = &root_node { - debug_assert_eq!(&root_directory_digest, directory_node.digest()) + if let Node::Directory { digest, .. } = &root_node { + debug_assert_eq!(&root_directory_digest, digest); } else { unreachable!("Tvix bug: directory putter initialized but no root directory node"); } @@ -201,7 +210,7 @@ mod test { use crate::fixtures::{DIRECTORY_COMPLICATED, DIRECTORY_WITH_KEEP, EMPTY_BLOB_DIGEST}; use crate::{directoryservice::MemoryDirectoryService, fixtures::DUMMY_DIGEST}; - use crate::{Directory, DirectoryNode, FileNode, Node, SymlinkNode}; + use crate::{Directory, Node}; use super::ingest_entries; use super::IngestionEntry; @@ -213,18 +222,18 @@ mod test { executable: true, digest: DUMMY_DIGEST.clone(), }], - Node::File(FileNode::new(DUMMY_DIGEST.clone(), 42, true)) + Node::File{digest: DUMMY_DIGEST.clone(), size: 42, executable: true} )] #[case::single_symlink(vec![IngestionEntry::Symlink { path: "foo".parse().unwrap(), target: b"blub".into(), }], - Node::Symlink(SymlinkNode::new("blub".into()).unwrap()) + Node::Symlink{target: "blub".try_into().unwrap()} )] #[case::single_dir(vec![IngestionEntry::Dir { path: "foo".parse().unwrap(), }], - Node::Directory(DirectoryNode::new(Directory::default().digest(), Directory::default().size())) + Node::Directory{digest: Directory::default().digest(), size: Directory::default().size()} )] #[case::dir_with_keep(vec![ IngestionEntry::Regular { @@ -237,7 +246,7 @@ mod test { path: "foo".parse().unwrap(), }, ], - Node::Directory(DirectoryNode::new(DIRECTORY_WITH_KEEP.digest(), DIRECTORY_WITH_KEEP.size())) + Node::Directory{ digest: DIRECTORY_WITH_KEEP.digest(), size: DIRECTORY_WITH_KEEP.size()} )] /// This is intentionally a bit unsorted, though it still satisfies all /// requirements we have on the order of elements in the stream. @@ -265,7 +274,7 @@ mod test { path: "blub".parse().unwrap(), }, ], - Node::Directory(DirectoryNode::new(DIRECTORY_COMPLICATED.digest(), DIRECTORY_COMPLICATED.size())) + Node::Directory{ digest: DIRECTORY_COMPLICATED.digest(), size: DIRECTORY_COMPLICATED.size() } )] #[tokio::test] async fn test_ingestion(#[case] entries: Vec<IngestionEntry>, #[case] exp_root_node: Node) { diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs index d58b7822a434..a2c520358929 100644 --- a/tvix/castore/src/nodes/directory.rs +++ b/tvix/castore/src/nodes/directory.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use crate::{errors::DirectoryError, proto, B3Digest, DirectoryNode, FileNode, Node, SymlinkNode}; +use crate::{errors::DirectoryError, proto, B3Digest, Node}; /// A Directory contains nodes, which can be Directory, File or Symlink nodes. /// It attached names to these nodes, which is the basename in that directory. @@ -27,7 +27,14 @@ impl Directory { pub fn size(&self) -> u64 { // It's impossible to create a Directory where the size overflows, because we // check before every add() that the size won't overflow. - (self.nodes.len() as u64) + self.directories().map(|(_name, dn)| dn.size()).sum::<u64>() + (self.nodes.len() as u64) + + self + .nodes() + .map(|(_name, n)| match n { + Node::Directory { size, .. } => 1 + size, + Node::File { .. } | Node::Symlink { .. } => 1, + }) + .sum::<u64>() } /// Calculates the digest of a Directory, which is the blake3 hash of a @@ -43,40 +50,6 @@ impl Directory { self.nodes.iter() } - /// Allows iterating over the FileNode entries of this directory. - /// For each, it returns a tuple of its name and node. - /// The elements are sorted by their names. - pub fn files(&self) -> impl Iterator<Item = (&bytes::Bytes, &FileNode)> + Send + Sync + '_ { - self.nodes.iter().filter_map(|(name, node)| match node { - Node::File(n) => Some((name, n)), - _ => None, - }) - } - - /// Allows iterating over the DirectoryNode entries (subdirectories) of this directory. - /// For each, it returns a tuple of its name and node. - /// The elements are sorted by their names. - pub fn directories( - &self, - ) -> impl Iterator<Item = (&bytes::Bytes, &DirectoryNode)> + Send + Sync + '_ { - self.nodes.iter().filter_map(|(name, node)| match node { - Node::Directory(n) => Some((name, n)), - _ => None, - }) - } - - /// Allows iterating over the SymlinkNode entries of this directory - /// For each, it returns a tuple of its name and node. - /// The elements are sorted by their names. - pub fn symlinks( - &self, - ) -> impl Iterator<Item = (&bytes::Bytes, &SymlinkNode)> + Send + Sync + '_ { - self.nodes.iter().filter_map(|(name, node)| match node { - Node::Symlink(n) => Some((name, n)), - _ => None, - }) - } - /// Checks a Node name for validity as a directory entry /// We disallow slashes, null bytes, '.', '..' and the empty string. pub(crate) fn is_valid_name(name: &[u8]) -> bool { @@ -106,7 +79,7 @@ impl Directory { self.size(), 1, match node { - Node::Directory(ref dir) => dir.size(), + Node::Directory { size, .. } => size, _ => 0, }, ]) @@ -130,7 +103,7 @@ fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> { #[cfg(test)] mod test { - use super::{Directory, DirectoryNode, FileNode, Node, SymlinkNode}; + use super::{Directory, Node}; use crate::fixtures::DUMMY_DIGEST; use crate::DirectoryError; @@ -140,49 +113,76 @@ mod test { d.add( "b".into(), - Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, ) .unwrap(); d.add( "a".into(), - Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, ) .unwrap(); d.add( "z".into(), - Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, ) .unwrap(); d.add( "f".into(), - Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + Node::File { + digest: DUMMY_DIGEST.clone(), + size: 1, + executable: true, + }, ) .unwrap(); d.add( "c".into(), - Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + Node::File { + digest: DUMMY_DIGEST.clone(), + size: 1, + executable: true, + }, ) .unwrap(); d.add( "g".into(), - Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)), + Node::File { + digest: DUMMY_DIGEST.clone(), + size: 1, + executable: true, + }, ) .unwrap(); d.add( "t".into(), - Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + Node::Symlink { + target: "a".try_into().unwrap(), + }, ) .unwrap(); d.add( "o".into(), - Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + Node::Symlink { + target: "a".try_into().unwrap(), + }, ) .unwrap(); d.add( "e".into(), - Node::Symlink(SymlinkNode::new("a".into()).unwrap()), + Node::Symlink { + target: "a".try_into().unwrap(), + }, ) .unwrap(); @@ -198,7 +198,10 @@ mod test { assert_eq!( d.add( "foo".into(), - Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), u64::MAX)) + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: u64::MAX + } ), Err(DirectoryError::SizeOverflow) ); @@ -210,7 +213,10 @@ mod test { d.add( "a".into(), - Node::Directory(DirectoryNode::new(DUMMY_DIGEST.clone(), 1)), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, ) .unwrap(); assert_eq!( @@ -218,7 +224,11 @@ mod test { "{}", d.add( "a".into(), - Node::File(FileNode::new(DUMMY_DIGEST.clone(), 1, true)) + Node::File { + digest: DUMMY_DIGEST.clone(), + size: 1, + executable: true + } ) .expect_err("adding duplicate dir entry must fail") ), @@ -233,7 +243,9 @@ mod test { assert!( dir.add( "".into(), // wrong! can not be added to directory - Node::Symlink(SymlinkNode::new("doesntmatter".into(),).unwrap()) + Node::Symlink { + target: "doesntmatter".try_into().unwrap(), + }, ) .is_err(), "invalid symlink entry be rejected" diff --git a/tvix/castore/src/nodes/directory_node.rs b/tvix/castore/src/nodes/directory_node.rs deleted file mode 100644 index e6e6312a7a0c..000000000000 --- a/tvix/castore/src/nodes/directory_node.rs +++ /dev/null @@ -1,35 +0,0 @@ -use crate::B3Digest; - -/// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest]. -/// It also records a`size`. -/// Such a node is either an element in the [Directory] it itself is contained in, -/// or a standalone root node./ -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct DirectoryNode { - /// The blake3 hash of a Directory message, serialized in protobuf canonical form. - digest: B3Digest, - /// Number of child elements in the Directory referred to by `digest`. - /// Calculated by summing up the numbers of nodes, and for each directory. - /// its size field. Can be used for inode allocation. - /// This field is precisely as verifiable as any other Merkle tree edge. - /// Resolve `digest`, and you can compute it incrementally. Resolve the entire - /// tree, and you can fully compute it from scratch. - /// A credulous implementation won't reject an excessive size, but this is - /// harmless: you'll have some ordinals without nodes. Undersizing is obvious - /// and easy to reject: you won't have an ordinal for some nodes. - size: u64, -} - -impl DirectoryNode { - pub fn new(digest: B3Digest, size: u64) -> Self { - Self { digest, size } - } - - pub fn digest(&self) -> &B3Digest { - &self.digest - } - - pub fn size(&self) -> u64 { - self.size - } -} diff --git a/tvix/castore/src/nodes/file_node.rs b/tvix/castore/src/nodes/file_node.rs deleted file mode 100644 index 73abe21e59b4..000000000000 --- a/tvix/castore/src/nodes/file_node.rs +++ /dev/null @@ -1,36 +0,0 @@ -use crate::B3Digest; - -/// A FileNode represents a regular or executable file in a Directory or at the root. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct FileNode { - /// The blake3 digest of the file contents - digest: B3Digest, - - /// The file content size - size: u64, - - /// Whether the file is executable - executable: bool, -} - -impl FileNode { - pub fn new(digest: B3Digest, size: u64, executable: bool) -> Self { - Self { - digest, - size, - executable, - } - } - - pub fn digest(&self) -> &B3Digest { - &self.digest - } - - pub fn size(&self) -> u64 { - self.size - } - - pub fn executable(&self) -> bool { - self.executable - } -} diff --git a/tvix/castore/src/nodes/mod.rs b/tvix/castore/src/nodes/mod.rs index 47f39ae8f94a..0c7b89de917b 100644 --- a/tvix/castore/src/nodes/mod.rs +++ b/tvix/castore/src/nodes/mod.rs @@ -1,20 +1,48 @@ //! This holds types describing nodes in the tvix-castore model. mod directory; -mod directory_node; -mod file_node; -mod symlink_node; +mod symlink_target; +use crate::B3Digest; pub use directory::Directory; -pub use directory_node::DirectoryNode; -pub use file_node::FileNode; -pub use symlink_node::SymlinkNode; +use symlink_target::SymlinkTarget; /// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode]. /// Nodes themselves don't have names, what gives them names is either them /// being inside a [Directory], or a root node with its own name attached to it. #[derive(Debug, Clone, PartialEq, Eq)] pub enum Node { - Directory(DirectoryNode), - File(FileNode), - Symlink(SymlinkNode), + /// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest]. + /// It also records a`size`. + /// Such a node is either an element in the [Directory] it itself is contained in, + /// or a standalone root node. + Directory { + /// The blake3 hash of a Directory message, serialized in protobuf canonical form. + digest: B3Digest, + /// Number of child elements in the Directory referred to by `digest`. + /// Calculated by summing up the numbers of nodes, and for each directory, + /// its size field. Can be used for inode allocation. + /// This field is precisely as verifiable as any other Merkle tree edge. + /// Resolve `digest`, and you can compute it incrementally. Resolve the entire + /// tree, and you can fully compute it from scratch. + /// A credulous implementation won't reject an excessive size, but this is + /// harmless: you'll have some ordinals without nodes. Undersizing is obvious + /// and easy to reject: you won't have an ordinal for some nodes. + size: u64, + }, + /// A FileNode represents a regular or executable file in a Directory or at the root. + File { + /// The blake3 digest of the file contents + digest: B3Digest, + + /// The file content size + size: u64, + + /// Whether the file is executable + executable: bool, + }, + /// A SymlinkNode represents a symbolic link in a Directory or at the root. + Symlink { + /// The target of the symlink. + target: SymlinkTarget, + }, } diff --git a/tvix/castore/src/nodes/symlink_node.rs b/tvix/castore/src/nodes/symlink_node.rs deleted file mode 100644 index 6b9df96a5dd3..000000000000 --- a/tvix/castore/src/nodes/symlink_node.rs +++ /dev/null @@ -1,21 +0,0 @@ -use crate::ValidateNodeError; - -/// A SymlinkNode represents a symbolic link in a Directory or at the root. -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct SymlinkNode { - /// The target of the symlink. - target: bytes::Bytes, -} - -impl SymlinkNode { - pub fn new(target: bytes::Bytes) -> Result<Self, ValidateNodeError> { - if target.is_empty() || target.contains(&b'\0') { - return Err(ValidateNodeError::InvalidSymlinkTarget(target)); - } - Ok(Self { target }) - } - - pub fn target(&self) -> &bytes::Bytes { - &self.target - } -} diff --git a/tvix/castore/src/nodes/symlink_target.rs b/tvix/castore/src/nodes/symlink_target.rs new file mode 100644 index 000000000000..838cdaaeda5b --- /dev/null +++ b/tvix/castore/src/nodes/symlink_target.rs @@ -0,0 +1,82 @@ +// TODO: split out this error +use crate::ValidateNodeError; + +use bstr::ByteSlice; +use std::fmt::{self, Debug, Display}; + +/// A wrapper type for symlink targets. +/// Internally uses a [bytes::Bytes], but disallows empty targets and those +/// containing null bytes. +#[repr(transparent)] +#[derive(Clone, PartialEq, Eq)] +pub struct SymlinkTarget { + inner: bytes::Bytes, +} + +impl AsRef<[u8]> for SymlinkTarget { + fn as_ref(&self) -> &[u8] { + self.inner.as_ref() + } +} + +impl From<SymlinkTarget> for bytes::Bytes { + fn from(value: SymlinkTarget) -> Self { + value.inner + } +} + +impl TryFrom<bytes::Bytes> for SymlinkTarget { + type Error = ValidateNodeError; + + fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> { + if value.is_empty() || value.contains(&b'\0') { + return Err(ValidateNodeError::InvalidSymlinkTarget(value)); + } + + Ok(Self { inner: value }) + } +} + +impl TryFrom<&'static [u8]> for SymlinkTarget { + type Error = ValidateNodeError; + + fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> { + if value.is_empty() || value.contains(&b'\0') { + return Err(ValidateNodeError::InvalidSymlinkTarget( + bytes::Bytes::from_static(value), + )); + } + + Ok(Self { + inner: bytes::Bytes::from_static(value), + }) + } +} + +impl TryFrom<&str> for SymlinkTarget { + type Error = ValidateNodeError; + + fn try_from(value: &str) -> Result<Self, Self::Error> { + if value.is_empty() { + return Err(ValidateNodeError::InvalidSymlinkTarget( + bytes::Bytes::copy_from_slice(value.as_bytes()), + )); + } + + Ok(Self { + inner: bytes::Bytes::copy_from_slice(value.as_bytes()), + }) + } +} + +impl Debug for SymlinkTarget { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(self.inner.as_bstr(), f) + } +} + +impl Display for SymlinkTarget { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(self.inner.as_bstr(), f) + } +} diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index 7cb1cecd27fa..c12eb4ee3c91 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -157,21 +157,25 @@ impl From<&crate::Directory> for Directory { for (name, node) in directory.nodes() { match node { - crate::Node::File(n) => files.push(FileNode { + crate::Node::File { + digest, + size, + executable, + } => files.push(FileNode { name: name.clone(), - digest: n.digest().to_owned().into(), - size: n.size(), - executable: n.executable(), + digest: digest.to_owned().into(), + size: *size, + executable: *executable, }), - crate::Node::Directory(n) => directories.push(DirectoryNode { + crate::Node::Directory { digest, size } => directories.push(DirectoryNode { name: name.clone(), - digest: n.digest().to_owned().into(), - size: n.size(), + digest: digest.to_owned().into(), + size: *size, }), - crate::Node::Symlink(n) => { + crate::Node::Symlink { target } => { symlinks.push(SymlinkNode { name: name.clone(), - target: n.target().to_owned(), + target: target.to_owned().into(), }); } } @@ -192,7 +196,10 @@ impl Node { let digest = B3Digest::try_from(n.digest) .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; - let node = crate::Node::Directory(crate::DirectoryNode::new(digest, n.size)); + let node = crate::Node::Directory { + digest, + size: n.size, + }; Ok((n.name, node)) } @@ -200,16 +207,22 @@ impl Node { let digest = B3Digest::try_from(n.digest) .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; - let node = crate::Node::File(crate::FileNode::new(digest, n.size, n.executable)); + let node = crate::Node::File { + digest, + size: n.size, + executable: n.executable, + }; Ok((n.name, node)) } node::Node::Symlink(n) => { - let node = crate::Node::Symlink( - crate::SymlinkNode::new(n.target) + let node = crate::Node::Symlink { + target: n + .target + .try_into() .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e))?, - ); + }; Ok((n.name, node)) } @@ -218,27 +231,30 @@ impl Node { /// Construsts a [Node] from a name and [crate::Node]. pub fn from_name_and_node(name: bytes::Bytes, n: crate::Node) -> Self { - // TODO: make these pub(crate) so we can avoid cloning? match n { - crate::Node::Directory(directory_node) => Self { + crate::Node::Directory { digest, size } => Self { node: Some(node::Node::Directory(DirectoryNode { name, - digest: directory_node.digest().to_owned().into(), - size: directory_node.size(), + digest: digest.into(), + size, })), }, - crate::Node::File(file_node) => Self { + crate::Node::File { + digest, + size, + executable, + } => Self { node: Some(node::Node::File(FileNode { name, - digest: file_node.digest().to_owned().into(), - size: file_node.size(), - executable: file_node.executable(), + digest: digest.into(), + size, + executable, })), }, - crate::Node::Symlink(symlink_node) => Self { + crate::Node::Symlink { target } => Self { node: Some(node::Node::Symlink(SymlinkNode { name, - target: symlink_node.target().to_owned(), + target: target.into(), })), }, } diff --git a/tvix/castore/src/tests/import.rs b/tvix/castore/src/tests/import.rs index bbf28a2981bf..32c2c363689f 100644 --- a/tvix/castore/src/tests/import.rs +++ b/tvix/castore/src/tests/import.rs @@ -2,7 +2,7 @@ use crate::blobservice::{self, BlobService}; use crate::directoryservice; use crate::fixtures::*; use crate::import::fs::ingest_path; -use crate::{DirectoryNode, Node, SymlinkNode}; +use crate::Node; use tempfile::TempDir; @@ -30,7 +30,9 @@ async fn symlink() { .expect("must succeed"); assert_eq!( - Node::Symlink(SymlinkNode::new("/nix/store/somewhereelse".into(),).unwrap()), + Node::Symlink { + target: "/nix/store/somewhereelse".try_into().unwrap() + }, root_node, ) } @@ -53,11 +55,11 @@ async fn single_file() { .expect("must succeed"); assert_eq!( - Node::File(crate::FileNode::new( - HELLOWORLD_BLOB_DIGEST.clone(), - HELLOWORLD_BLOB_CONTENTS.len() as u64, - false, - )), + Node::File { + digest: HELLOWORLD_BLOB_DIGEST.clone(), + size: HELLOWORLD_BLOB_CONTENTS.len() as u64, + executable: false, + }, root_node, ); @@ -88,10 +90,10 @@ async fn complicated() { // ensure root_node matched expectations assert_eq!( - Node::Directory(DirectoryNode::new( - DIRECTORY_COMPLICATED.digest().clone(), - DIRECTORY_COMPLICATED.size(), - )), + Node::Directory { + digest: DIRECTORY_COMPLICATED.digest().clone(), + size: DIRECTORY_COMPLICATED.size(), + }, root_node, ); |