diff options
author | Florian Klink <flokli@flokli.de> | 2024-08-16T14·32+0300 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-08-17T15·59+0000 |
commit | 5ec93b57e6a263eef91ee583aba9f04581e4a66b (patch) | |
tree | 896407c00900d630a38ee82176ff12e0870f7a20 /tvix/castore/src | |
parent | 8ea7d2b60eb4052d934820078c31ff25786376a4 (diff) |
refactor(tvix/castore): add PathComponent type for checked components r/8506
This encodes a verified component on the type level. Internally, it contains a bytes::Bytes. The castore Path/PathBuf component() and file_name() methods now return this type, the old ones returning bytes were renamed to component_bytes() and component_file_name() respectively. We can drop the directory_reject_invalid_name test - it's not possible anymore to pass an invalid name to Directories::add. Invalid names in the Directory proto are still being tested to be rejected in the validate_invalid_names tests. Change-Id: Ide4d16415dfd50b7e2d7e0c36d42a3bbeeb9b6c5 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12217 Autosubmit: flokli <flokli@flokli.de> Reviewed-by: Connor Brewster <cbrewster@hey.com> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/castore/src')
-rw-r--r-- | tvix/castore/src/directoryservice/directory_graph.rs | 12 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/tests/mod.rs | 2 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/traverse.rs | 2 | ||||
-rw-r--r-- | tvix/castore/src/errors.rs | 10 | ||||
-rw-r--r-- | tvix/castore/src/fixtures.rs | 18 | ||||
-rw-r--r-- | tvix/castore/src/fs/fuse/tests.rs | 44 | ||||
-rw-r--r-- | tvix/castore/src/fs/inodes.rs | 6 | ||||
-rw-r--r-- | tvix/castore/src/fs/mod.rs | 41 | ||||
-rw-r--r-- | tvix/castore/src/fs/root_nodes.rs | 12 | ||||
-rw-r--r-- | tvix/castore/src/import/mod.rs | 5 | ||||
-rw-r--r-- | tvix/castore/src/lib.rs | 2 | ||||
-rw-r--r-- | tvix/castore/src/nodes/directory.rs | 64 | ||||
-rw-r--r-- | tvix/castore/src/nodes/mod.rs | 2 | ||||
-rw-r--r-- | tvix/castore/src/path/component.rs | 102 | ||||
-rw-r--r-- | tvix/castore/src/path/mod.rs (renamed from tvix/castore/src/path.rs) | 44 | ||||
-rw-r--r-- | tvix/castore/src/proto/mod.rs | 27 | ||||
-rw-r--r-- | tvix/castore/src/proto/tests/directory.rs | 2 |
17 files changed, 249 insertions, 146 deletions
diff --git a/tvix/castore/src/directoryservice/directory_graph.rs b/tvix/castore/src/directoryservice/directory_graph.rs index 2f932fe05d8a..3c2d54702973 100644 --- a/tvix/castore/src/directoryservice/directory_graph.rs +++ b/tvix/castore/src/directoryservice/directory_graph.rs @@ -1,7 +1,5 @@ use std::collections::HashMap; -use bstr::ByteSlice; - use petgraph::{ graph::{DiGraph, NodeIndex}, visit::{Bfs, DfsPostOrder, EdgeRef, IntoNodeIdentifiers, Walker}, @@ -10,7 +8,7 @@ use petgraph::{ use tracing::instrument; use super::order_validator::{LeavesToRootValidator, OrderValidator, RootToLeavesValidator}; -use crate::{B3Digest, Directory, Node}; +use crate::{path::PathComponent, B3Digest, Directory, Node}; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -71,12 +69,12 @@ pub struct ValidatedDirectoryGraph { root: Option<NodeIndex>, } -fn check_edge(dir: &Edge, dir_name: &[u8], child: &Directory) -> Result<(), Error> { +fn check_edge(dir: &Edge, dir_name: &PathComponent, child: &Directory) -> Result<(), Error> { // Ensure the size specified in the child node matches our records. if dir.1 != child.size() { return Err(Error::ValidationError(format!( "'{}' has wrong size, specified {}, recorded {}", - dir_name.as_bstr(), + dir_name, dir.1, child.size(), ))); @@ -179,7 +177,7 @@ impl<O: OrderValidator> DirectoryGraph<O> { .expect("edge is already validated"); // TODO: where's the name here? - check_edge(&edge_weight, b"??", &directory)?; + check_edge(&edge_weight, &"??".try_into().unwrap(), &directory)?; } // finally, store the directory information in the node weight @@ -284,7 +282,7 @@ mod tests { pub static ref BROKEN_PARENT_DIRECTORY: Directory = { let mut dir = Directory::new(); dir.add( - "foo".into(), + "foo".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size() + 42, // wrong! diff --git a/tvix/castore/src/directoryservice/tests/mod.rs b/tvix/castore/src/directoryservice/tests/mod.rs index 2bb9f05bf8e9..01e42130456f 100644 --- a/tvix/castore/src/directoryservice/tests/mod.rs +++ b/tvix/castore/src/directoryservice/tests/mod.rs @@ -219,7 +219,7 @@ async fn upload_reject_wrong_size(directory_service: impl DirectoryService) { let wrong_parent_directory = { let mut dir = Directory::new(); dir.add( - "foo".into(), + "foo".try_into().unwrap(), Node::Directory { digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size() + 42, // wrong! diff --git a/tvix/castore/src/directoryservice/traverse.rs b/tvix/castore/src/directoryservice/traverse.rs index 86a90175ef10..63b99ba4ea5e 100644 --- a/tvix/castore/src/directoryservice/traverse.rs +++ b/tvix/castore/src/directoryservice/traverse.rs @@ -13,7 +13,7 @@ where DS: AsRef<dyn DirectoryService>, { let mut parent_node = root_node; - for component in path.as_ref().components() { + for component in path.as_ref().components_bytes() { match parent_node { Node::File { .. } | Node::Symlink { .. } => { // There's still some path left, but the parent node is no directory. diff --git a/tvix/castore/src/errors.rs b/tvix/castore/src/errors.rs index e03cfa5a5207..6c213cb06743 100644 --- a/tvix/castore/src/errors.rs +++ b/tvix/castore/src/errors.rs @@ -3,6 +3,8 @@ use thiserror::Error; use tokio::task::JoinError; use tonic::Status; +use crate::path::PathComponent; + /// Errors related to communication with the store. #[derive(Debug, Error, PartialEq)] pub enum Error { @@ -37,11 +39,11 @@ impl From<crate::digests::Error> for ValidateNodeError { #[derive(Debug, thiserror::Error, PartialEq)] pub enum DirectoryError { /// Multiple elements with the same name encountered - #[error("{:?} is a duplicate name", .0.as_bstr())] - DuplicateName(Vec<u8>), + #[error("{:?} is a duplicate name", .0)] + DuplicateName(PathComponent), /// Node failed validation - #[error("invalid node with name {:?}: {:?}", .0.as_bstr(), .1.to_string())] - InvalidNode(bytes::Bytes, ValidateNodeError), + #[error("invalid node with name {}: {:?}", .0, .1.to_string())] + InvalidNode(PathComponent, ValidateNodeError), #[error("Total size exceeds u32::MAX")] SizeOverflow, /// Invalid name encountered diff --git a/tvix/castore/src/fixtures.rs b/tvix/castore/src/fixtures.rs index ddeacaf1d5b9..00cf3682d194 100644 --- a/tvix/castore/src/fixtures.rs +++ b/tvix/castore/src/fixtures.rs @@ -34,7 +34,7 @@ lazy_static! { pub static ref DIRECTORY_WITH_KEEP: Directory = { let mut dir = Directory::new(); dir.add( - ".keep".into(), + ".keep".try_into().unwrap(), Node::File{ digest: EMPTY_BLOB_DIGEST.clone(), size: 0, @@ -46,20 +46,20 @@ lazy_static! { pub static ref DIRECTORY_COMPLICATED: Directory = { let mut dir = Directory::new(); dir.add( - "keep".into(), + "keep".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_WITH_KEEP.digest(), size: DIRECTORY_WITH_KEEP.size() }).unwrap(); dir.add( - ".keep".into(), + ".keep".try_into().unwrap(), Node::File{ digest: EMPTY_BLOB_DIGEST.clone(), size: 0, executable: false }).unwrap(); dir.add( - "aa".into(), + "aa".try_into().unwrap(), Node::Symlink{ target: "/nix/store/somewhereelse".try_into().unwrap() }).unwrap(); @@ -70,7 +70,7 @@ lazy_static! { pub static ref DIRECTORY_B: Directory = { let mut dir = Directory::new(); dir.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size(), @@ -81,13 +81,13 @@ lazy_static! { pub static ref DIRECTORY_C: Directory = { let mut dir = Directory::new(); dir.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size(), }).unwrap(); dir.add( - "a'".into(), + "a'".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size(), @@ -98,13 +98,13 @@ lazy_static! { pub static ref DIRECTORY_D: Directory = { let mut dir = Directory::new(); dir.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_A.digest(), size: DIRECTORY_A.size(), }).unwrap(); dir.add( - "b".into(), + "b".try_into().unwrap(), Node::Directory{ digest: DIRECTORY_B.digest(), size: DIRECTORY_B.size(), diff --git a/tvix/castore/src/fs/fuse/tests.rs b/tvix/castore/src/fs/fuse/tests.rs index e5e2e66c3b62..9e01204d5da7 100644 --- a/tvix/castore/src/fs/fuse/tests.rs +++ b/tvix/castore/src/fs/fuse/tests.rs @@ -1,5 +1,4 @@ use bstr::ByteSlice; -use bytes::Bytes; use std::{ collections::BTreeMap, ffi::{OsStr, OsString}, @@ -12,12 +11,15 @@ use tempfile::TempDir; use tokio_stream::{wrappers::ReadDirStream, StreamExt}; use super::FuseDaemon; -use crate::fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST}; use crate::{ blobservice::{BlobService, MemoryBlobService}, directoryservice::{DirectoryService, MemoryDirectoryService}, fixtures, Node, }; +use crate::{ + fs::{TvixStoreFs, XATTR_NAME_BLOB_DIGEST, XATTR_NAME_DIRECTORY_DIGEST}, + PathComponent, +}; const BLOB_A_NAME: &str = "00000000000000000000000000000000-test"; const BLOB_B_NAME: &str = "55555555555555555555555555555555-test"; @@ -37,7 +39,7 @@ fn gen_svcs() -> (Arc<dyn BlobService>, Arc<dyn DirectoryService>) { fn do_mount<P: AsRef<Path>, BS, DS>( blob_service: BS, directory_service: DS, - root_nodes: BTreeMap<bytes::Bytes, Node>, + root_nodes: BTreeMap<PathComponent, Node>, mountpoint: P, list_root: bool, show_xattr: bool, @@ -58,7 +60,7 @@ where async fn populate_blob_a( blob_service: &Arc<dyn BlobService>, - root_nodes: &mut BTreeMap<Bytes, Node>, + root_nodes: &mut BTreeMap<PathComponent, Node>, ) { let mut bw = blob_service.open_write().await; tokio::io::copy(&mut Cursor::new(fixtures::BLOB_A.to_vec()), &mut bw) @@ -67,7 +69,7 @@ async fn populate_blob_a( bw.close().await.expect("must succeed closing"); root_nodes.insert( - BLOB_A_NAME.into(), + BLOB_A_NAME.try_into().unwrap(), Node::File { digest: fixtures::BLOB_A_DIGEST.clone(), size: fixtures::BLOB_A.len() as u64, @@ -78,7 +80,7 @@ async fn populate_blob_a( async fn populate_blob_b( blob_service: &Arc<dyn BlobService>, - root_nodes: &mut BTreeMap<Bytes, Node>, + root_nodes: &mut BTreeMap<PathComponent, Node>, ) { let mut bw = blob_service.open_write().await; tokio::io::copy(&mut Cursor::new(fixtures::BLOB_B.to_vec()), &mut bw) @@ -87,7 +89,7 @@ async fn populate_blob_b( bw.close().await.expect("must succeed closing"); root_nodes.insert( - BLOB_B_NAME.into(), + BLOB_B_NAME.try_into().unwrap(), Node::File { digest: fixtures::BLOB_B_DIGEST.clone(), size: fixtures::BLOB_B.len() as u64, @@ -99,7 +101,7 @@ async fn populate_blob_b( /// adds a blob containing helloworld and marks it as executable async fn populate_blob_helloworld( blob_service: &Arc<dyn BlobService>, - root_nodes: &mut BTreeMap<Bytes, Node>, + root_nodes: &mut BTreeMap<PathComponent, Node>, ) { let mut bw = blob_service.open_write().await; tokio::io::copy( @@ -111,7 +113,7 @@ async fn populate_blob_helloworld( bw.close().await.expect("must succeed closing"); root_nodes.insert( - HELLOWORLD_BLOB_NAME.into(), + HELLOWORLD_BLOB_NAME.try_into().unwrap(), Node::File { digest: fixtures::HELLOWORLD_BLOB_DIGEST.clone(), size: fixtures::HELLOWORLD_BLOB_CONTENTS.len() as u64, @@ -120,9 +122,9 @@ async fn populate_blob_helloworld( ); } -async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) { +async fn populate_symlink(root_nodes: &mut BTreeMap<PathComponent, Node>) { root_nodes.insert( - SYMLINK_NAME.into(), + SYMLINK_NAME.try_into().unwrap(), Node::Symlink { target: BLOB_A_NAME.try_into().unwrap(), }, @@ -131,9 +133,9 @@ async fn populate_symlink(root_nodes: &mut BTreeMap<Bytes, Node>) { /// This writes a symlink pointing to /nix/store/somewhereelse, /// which is the same symlink target as "aa" inside DIRECTORY_COMPLICATED. -async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) { +async fn populate_symlink2(root_nodes: &mut BTreeMap<PathComponent, Node>) { root_nodes.insert( - SYMLINK_NAME2.into(), + SYMLINK_NAME2.try_into().unwrap(), Node::Symlink { target: "/nix/store/somewhereelse".try_into().unwrap(), }, @@ -143,7 +145,7 @@ async fn populate_symlink2(root_nodes: &mut BTreeMap<Bytes, Node>) { async fn populate_directory_with_keep( blob_service: &Arc<dyn BlobService>, directory_service: &Arc<dyn DirectoryService>, - root_nodes: &mut BTreeMap<Bytes, Node>, + root_nodes: &mut BTreeMap<PathComponent, Node>, ) { // upload empty blob let mut bw = blob_service.open_write().await; @@ -159,7 +161,7 @@ async fn populate_directory_with_keep( .expect("must succeed uploading"); root_nodes.insert( - DIRECTORY_WITH_KEEP_NAME.into(), + DIRECTORY_WITH_KEEP_NAME.try_into().unwrap(), Node::Directory { digest: fixtures::DIRECTORY_WITH_KEEP.digest(), size: fixtures::DIRECTORY_WITH_KEEP.size(), @@ -169,9 +171,9 @@ async fn populate_directory_with_keep( /// Create a root node for DIRECTORY_WITH_KEEP, but don't upload the Directory /// itself. -async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Bytes, Node>) { +async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<PathComponent, Node>) { root_nodes.insert( - DIRECTORY_WITH_KEEP_NAME.into(), + DIRECTORY_WITH_KEEP_NAME.try_into().unwrap(), Node::Directory { digest: fixtures::DIRECTORY_WITH_KEEP.digest(), size: fixtures::DIRECTORY_WITH_KEEP.size(), @@ -180,9 +182,9 @@ async fn populate_directorynode_without_directory(root_nodes: &mut BTreeMap<Byte } /// Insert BLOB_A, but don't provide the blob .keep is pointing to. -async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>) { +async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<PathComponent, Node>) { root_nodes.insert( - BLOB_A_NAME.into(), + BLOB_A_NAME.try_into().unwrap(), Node::File { digest: fixtures::BLOB_A_DIGEST.clone(), size: fixtures::BLOB_A.len() as u64, @@ -194,7 +196,7 @@ async fn populate_filenode_without_blob(root_nodes: &mut BTreeMap<Bytes, Node>) async fn populate_directory_complicated( blob_service: &Arc<dyn BlobService>, directory_service: &Arc<dyn DirectoryService>, - root_nodes: &mut BTreeMap<Bytes, Node>, + root_nodes: &mut BTreeMap<PathComponent, Node>, ) { // upload empty blob let mut bw = blob_service.open_write().await; @@ -216,7 +218,7 @@ async fn populate_directory_complicated( .expect("must succeed uploading"); root_nodes.insert( - DIRECTORY_COMPLICATED_NAME.into(), + DIRECTORY_COMPLICATED_NAME.try_into().unwrap(), Node::Directory { digest: fixtures::DIRECTORY_COMPLICATED.digest(), size: fixtures::DIRECTORY_COMPLICATED.size(), diff --git a/tvix/castore/src/fs/inodes.rs b/tvix/castore/src/fs/inodes.rs index 782ffff716d2..2696fdede378 100644 --- a/tvix/castore/src/fs/inodes.rs +++ b/tvix/castore/src/fs/inodes.rs @@ -2,7 +2,7 @@ //! about inodes, which present tvix-castore nodes in a filesystem. use std::time::Duration; -use crate::{B3Digest, Node}; +use crate::{path::PathComponent, B3Digest, Node}; #[derive(Clone, Debug)] pub enum InodeData { @@ -17,8 +17,8 @@ pub enum InodeData { /// lookup and did fetch the data. #[derive(Clone, Debug)] pub enum DirectoryInodeData { - Sparse(B3Digest, u64), // digest, size - Populated(B3Digest, Vec<(u64, bytes::Bytes, Node)>), // [(child_inode, name, node)] + Sparse(B3Digest, u64), // digest, size + Populated(B3Digest, Vec<(u64, PathComponent, Node)>), // [(child_inode, name, node)] } impl InodeData { diff --git a/tvix/castore/src/fs/mod.rs b/tvix/castore/src/fs/mod.rs index f819adb549f5..d196266ab438 100644 --- a/tvix/castore/src/fs/mod.rs +++ b/tvix/castore/src/fs/mod.rs @@ -18,10 +18,10 @@ use self::{ use crate::{ blobservice::{BlobReader, BlobService}, directoryservice::DirectoryService, - {B3Digest, Node}, + path::PathComponent, + B3Digest, Node, }; use bstr::ByteVec; -use bytes::Bytes; use fuse_backend_rs::abi::fuse_abi::{stat64, OpenOptions}; use fuse_backend_rs::api::filesystem::{ Context, FileSystem, FsOptions, GetxattrReply, ListxattrReply, ROOT_ID, @@ -87,7 +87,7 @@ pub struct TvixStoreFs<BS, DS, RN> { show_xattr: bool, /// This maps a given basename in the root to the inode we allocated for the node. - root_nodes: RwLock<HashMap<Bytes, u64>>, + root_nodes: RwLock<HashMap<PathComponent, u64>>, /// This keeps track of inodes and data alongside them. inode_tracker: RwLock<InodeTracker>, @@ -103,7 +103,7 @@ pub struct TvixStoreFs<BS, DS, RN> { u64, ( Span, - Arc<Mutex<mpsc::Receiver<(usize, Result<(Bytes, Node), crate::Error>)>>>, + Arc<Mutex<mpsc::Receiver<(usize, Result<(PathComponent, Node), crate::Error>)>>>, ), >, >, @@ -154,7 +154,7 @@ where /// Retrieves the inode for a given root node basename, if present. /// This obtains a read lock on self.root_nodes. - fn get_inode_for_root_name(&self, name: &[u8]) -> Option<u64> { + fn get_inode_for_root_name(&self, name: &PathComponent) -> Option<u64> { self.root_nodes.read().get(name).cloned() } @@ -170,7 +170,7 @@ where fn get_directory_children( &self, ino: u64, - ) -> io::Result<(B3Digest, Vec<(u64, bytes::Bytes, Node)>)> { + ) -> io::Result<(B3Digest, Vec<(u64, PathComponent, Node)>)> { let data = self.inode_tracker.read().get(ino).unwrap(); match *data { // if it's populated already, return children. @@ -200,7 +200,7 @@ where let children = { let mut inode_tracker = self.inode_tracker.write(); - let children: Vec<(u64, Bytes, Node)> = directory + let children: Vec<(u64, PathComponent, Node)> = directory .nodes() .map(|(child_name, child_node)| { let inode_data = InodeData::from_node(child_node); @@ -240,12 +240,12 @@ where /// In the case the name can't be found, a libc::ENOENT is returned. fn name_in_root_to_ino_and_data( &self, - name: &std::ffi::CStr, + name: &PathComponent, ) -> io::Result<(u64, Arc<InodeData>)> { // Look up the inode for that root node. // If there's one, [self.inode_tracker] MUST also contain the data, // which we can then return. - if let Some(inode) = self.get_inode_for_root_name(name.to_bytes()) { + if let Some(inode) = self.get_inode_for_root_name(name) { return Ok(( inode, self.inode_tracker @@ -259,7 +259,8 @@ where // We don't have it yet, look it up in [self.root_nodes]. match self.tokio_handle.block_on({ let root_nodes_provider = self.root_nodes_provider.clone(); - async move { root_nodes_provider.get_by_basename(name.to_bytes()).await } + let name = name.clone(); + async move { root_nodes_provider.get_by_basename(&name).await } }) { // if there was an error looking up the root node, propagate up an IO error. Err(_e) => Err(io::Error::from_raw_os_error(libc::EIO)), @@ -269,7 +270,7 @@ where Ok(Some(root_node)) => { // Let's check if someone else beat us to updating the inode tracker and // root_nodes map. This avoids locking inode_tracker for writing. - if let Some(ino) = self.root_nodes.read().get(name.to_bytes()) { + if let Some(ino) = self.root_nodes.read().get(name) { return Ok(( *ino, self.inode_tracker.read().get(*ino).expect("must exist"), @@ -285,7 +286,7 @@ where // self.root_nodes. let inode_data = InodeData::from_node(&root_node); let ino = inode_tracker.put(inode_data.clone()); - root_nodes.insert(bytes::Bytes::copy_from_slice(name.to_bytes()), ino); + root_nodes.insert(name.to_owned(), ino); Ok((ino, Arc::new(inode_data))) } @@ -341,13 +342,17 @@ where ) -> io::Result<fuse_backend_rs::api::filesystem::Entry> { debug!("lookup"); + // convert the CStr to a PathComponent + // If it can't be converted, we definitely don't have anything here. + let name: PathComponent = name.try_into().map_err(|_| std::io::ErrorKind::NotFound)?; + // This goes from a parent inode to a node. // - If the parent is [ROOT_ID], we need to check // [self.root_nodes] (fetching from a [RootNode] provider if needed) // - Otherwise, lookup the parent in [self.inode_tracker] (which must be // a [InodeData::Directory]), and find the child with that name. if parent == ROOT_ID { - let (ino, inode_data) = self.name_in_root_to_ino_and_data(name)?; + let (ino, inode_data) = self.name_in_root_to_ino_and_data(&name)?; debug!(inode_data=?&inode_data, ino=ino, "Some"); return Ok(inode_data.as_fuse_entry(ino)); @@ -360,7 +365,7 @@ where // Search for that name in the list of children and return the FileAttrs. // in the children, find the one with the desired name. - if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == name.to_bytes()) { + if let Some((child_ino, _, _)) = children.iter().find(|(_, n, _)| n == &name) { // lookup the child [InodeData] in [self.inode_tracker]. // We know the inodes for children have already been allocated. let child_inode_data = self.inode_tracker.read().get(*child_ino).unwrap(); @@ -479,7 +484,7 @@ where ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &name, + name: name.as_ref(), })?; // If the buffer is full, add_entry will return `Ok(0)`. if written == 0 { @@ -503,7 +508,7 @@ where ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &child_name, + name: child_name.as_ref(), })?; // If the buffer is full, add_entry will return `Ok(0)`. if written == 0 { @@ -569,7 +574,7 @@ where ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &name, + name: name.as_ref(), }, inode_data.as_fuse_entry(ino), )?; @@ -594,7 +599,7 @@ where ino, offset: offset + (i as u64) + 1, type_: inode_data.as_fuse_type(), - name: &name, + name: name.as_ref(), }, inode_data.as_fuse_entry(ino), )?; diff --git a/tvix/castore/src/fs/root_nodes.rs b/tvix/castore/src/fs/root_nodes.rs index 62f0b62196a6..5ed1a4d8d6c0 100644 --- a/tvix/castore/src/fs/root_nodes.rs +++ b/tvix/castore/src/fs/root_nodes.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use crate::{Error, Node}; +use crate::{path::PathComponent, Error, Node}; use futures::stream::BoxStream; use tonic::async_trait; @@ -10,12 +10,12 @@ use tonic::async_trait; pub trait RootNodes: Send + Sync { /// Looks up a root CA node based on the basename of the node in the root /// directory of the filesystem. - async fn get_by_basename(&self, name: &[u8]) -> Result<Option<Node>, Error>; + async fn get_by_basename(&self, name: &PathComponent) -> Result<Option<Node>, Error>; /// Lists all root CA nodes in the filesystem, as a tuple of (base)name /// and Node. /// An error can be returned in case listing is not allowed. - fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>>; + fn list(&self) -> BoxStream<Result<(PathComponent, Node), Error>>; } #[async_trait] @@ -23,13 +23,13 @@ pub trait RootNodes: Send + Sync { /// the key is the node name. impl<T> RootNodes for T where - T: AsRef<BTreeMap<bytes::Bytes, Node>> + Send + Sync, + T: AsRef<BTreeMap<PathComponent, Node>> + Send + Sync, { - async fn get_by_basename(&self, name: &[u8]) -> Result<Option<Node>, Error> { + async fn get_by_basename(&self, name: &PathComponent) -> Result<Option<Node>, Error> { Ok(self.as_ref().get(name).cloned()) } - fn list(&self) -> BoxStream<Result<(bytes::Bytes, Node), Error>> { + fn list(&self) -> BoxStream<Result<(PathComponent, Node), Error>> { Box::pin(tokio_stream::iter( self.as_ref() .iter() diff --git a/tvix/castore/src/import/mod.rs b/tvix/castore/src/import/mod.rs index 9fce22585f60..a7c459fdb56a 100644 --- a/tvix/castore/src/import/mod.rs +++ b/tvix/castore/src/import/mod.rs @@ -123,9 +123,8 @@ where .path() .file_name() // If this is the root node, it will have an empty name. - .unwrap_or_default() - .to_owned() - .into(); + .unwrap_or_else(|| "".try_into().unwrap()) + .to_owned(); // record node in parent directory, creating a new [Directory] if not there yet. directories diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs index f56ddf827722..6e1b36231b9d 100644 --- a/tvix/castore/src/lib.rs +++ b/tvix/castore/src/lib.rs @@ -14,7 +14,7 @@ mod nodes; pub use nodes::*; mod path; -pub use path::{Path, PathBuf}; +pub use path::{Path, PathBuf, PathComponent}; pub mod import; pub mod proto; diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs index a2c520358929..03db30b52408 100644 --- a/tvix/castore/src/nodes/directory.rs +++ b/tvix/castore/src/nodes/directory.rs @@ -1,6 +1,6 @@ use std::collections::BTreeMap; -use crate::{errors::DirectoryError, proto, B3Digest, Node}; +use crate::{errors::DirectoryError, path::PathComponent, proto, B3Digest, Node}; /// A Directory contains nodes, which can be Directory, File or Symlink nodes. /// It attached names to these nodes, which is the basename in that directory. @@ -10,7 +10,7 @@ use crate::{errors::DirectoryError, proto, B3Digest, Node}; /// - MUST be unique across all three lists #[derive(Default, Debug, Clone, PartialEq, Eq)] pub struct Directory { - nodes: BTreeMap<bytes::Bytes, Node>, + nodes: BTreeMap<PathComponent, Node>, } impl Directory { @@ -46,31 +46,17 @@ impl Directory { /// Allows iterating over all nodes (directories, files and symlinks) /// For each, it returns a tuple of its name and node. /// The elements are sorted by their names. - pub fn nodes(&self) -> impl Iterator<Item = (&bytes::Bytes, &Node)> + Send + Sync + '_ { + pub fn nodes(&self) -> impl Iterator<Item = (&PathComponent, &Node)> + Send + Sync + '_ { self.nodes.iter() } - /// Checks a Node name for validity as a directory entry - /// We disallow slashes, null bytes, '.', '..' and the empty string. - pub(crate) fn is_valid_name(name: &[u8]) -> bool { - !(name.is_empty() - || name == b".." - || name == b"." - || name.contains(&0x00) - || name.contains(&b'/')) - } - /// Adds the specified [Node] to the [Directory] with a given name. /// /// Inserting an element that already exists with the same name in the directory will yield an /// error. /// Inserting an element will validate that its name fulfills the /// requirements for directory entries and yield an error if it is not. - pub fn add(&mut self, name: bytes::Bytes, node: Node) -> Result<(), DirectoryError> { - if !Self::is_valid_name(&name) { - return Err(DirectoryError::InvalidName(name)); - } - + pub fn add(&mut self, name: PathComponent, node: Node) -> Result<(), DirectoryError> { // Check that the even after adding this new directory entry, the size calculation will not // overflow // FUTUREWORK: add some sort of batch add interface which only does this check once with @@ -91,7 +77,7 @@ impl Directory { Ok(()) } std::collections::btree_map::Entry::Occupied(occupied) => { - Err(DirectoryError::DuplicateName(occupied.key().to_vec())) + Err(DirectoryError::DuplicateName(occupied.key().to_owned())) } } } @@ -112,7 +98,7 @@ mod test { let mut d = Directory::new(); d.add( - "b".into(), + "b".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: 1, @@ -120,7 +106,7 @@ mod test { ) .unwrap(); d.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: 1, @@ -128,7 +114,7 @@ mod test { ) .unwrap(); d.add( - "z".into(), + "z".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: 1, @@ -137,7 +123,7 @@ mod test { .unwrap(); d.add( - "f".into(), + "f".try_into().unwrap(), Node::File { digest: DUMMY_DIGEST.clone(), size: 1, @@ -146,7 +132,7 @@ mod test { ) .unwrap(); d.add( - "c".into(), + "c".try_into().unwrap(), Node::File { digest: DUMMY_DIGEST.clone(), size: 1, @@ -155,7 +141,7 @@ mod test { ) .unwrap(); d.add( - "g".into(), + "g".try_into().unwrap(), Node::File { digest: DUMMY_DIGEST.clone(), size: 1, @@ -165,21 +151,21 @@ mod test { .unwrap(); d.add( - "t".into(), + "t".try_into().unwrap(), Node::Symlink { target: "a".try_into().unwrap(), }, ) .unwrap(); d.add( - "o".into(), + "o".try_into().unwrap(), Node::Symlink { target: "a".try_into().unwrap(), }, ) .unwrap(); d.add( - "e".into(), + "e".try_into().unwrap(), Node::Symlink { target: "a".try_into().unwrap(), }, @@ -197,7 +183,7 @@ mod test { assert_eq!( d.add( - "foo".into(), + "foo".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: u64::MAX @@ -212,7 +198,7 @@ mod test { let mut d = Directory::new(); d.add( - "a".into(), + "a".try_into().unwrap(), Node::Directory { digest: DUMMY_DIGEST.clone(), size: 1, @@ -223,7 +209,7 @@ mod test { format!( "{}", d.add( - "a".into(), + "a".try_into().unwrap(), Node::File { digest: DUMMY_DIGEST.clone(), size: 1, @@ -235,20 +221,4 @@ mod test { "\"a\" is a duplicate name" ); } - - /// Attempt to add a directory entry with a name which should be rejected. - #[tokio::test] - async fn directory_reject_invalid_name() { - let mut dir = Directory::new(); - assert!( - dir.add( - "".into(), // wrong! can not be added to directory - Node::Symlink { - target: "doesntmatter".try_into().unwrap(), - }, - ) - .is_err(), - "invalid symlink entry be rejected" - ); - } } diff --git a/tvix/castore/src/nodes/mod.rs b/tvix/castore/src/nodes/mod.rs index 0c7b89de917b..684e65f89b25 100644 --- a/tvix/castore/src/nodes/mod.rs +++ b/tvix/castore/src/nodes/mod.rs @@ -4,7 +4,7 @@ mod symlink_target; use crate::B3Digest; pub use directory::Directory; -use symlink_target::SymlinkTarget; +pub use symlink_target::SymlinkTarget; /// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode]. /// Nodes themselves don't have names, what gives them names is either them diff --git a/tvix/castore/src/path/component.rs b/tvix/castore/src/path/component.rs new file mode 100644 index 000000000000..f755f06e62a8 --- /dev/null +++ b/tvix/castore/src/path/component.rs @@ -0,0 +1,102 @@ +// TODO: split out this error +use crate::DirectoryError; + +use bstr::ByteSlice; +use std::fmt::{self, Debug, Display}; + +/// A wrapper type for validated path components in the castore model. +/// Internally uses a [bytes::Bytes], but disallows +/// slashes, and null bytes to be present, as well as +/// '.', '..' and the empty string. +#[repr(transparent)] +#[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] +pub struct PathComponent { + pub(super) inner: bytes::Bytes, +} + +impl AsRef<[u8]> for PathComponent { + fn as_ref(&self) -> &[u8] { + self.inner.as_ref() + } +} + +impl From<PathComponent> for bytes::Bytes { + fn from(value: PathComponent) -> Self { + value.inner + } +} + +pub(super) fn is_valid_name<B: AsRef<[u8]>>(name: B) -> bool { + let v = name.as_ref(); + + !v.is_empty() && v != *b".." && v != *b"." && !v.contains(&0x00) && !v.contains(&b'/') +} + +impl TryFrom<bytes::Bytes> for PathComponent { + type Error = DirectoryError; + + fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> { + if !is_valid_name(&value) { + return Err(DirectoryError::InvalidName(value)); + } + + Ok(Self { inner: value }) + } +} + +impl TryFrom<&'static [u8]> for PathComponent { + type Error = DirectoryError; + + fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> { + if !is_valid_name(value) { + return Err(DirectoryError::InvalidName(bytes::Bytes::from_static( + value, + ))); + } + Ok(Self { + inner: bytes::Bytes::from_static(value), + }) + } +} + +impl TryFrom<&str> for PathComponent { + type Error = DirectoryError; + + fn try_from(value: &str) -> Result<Self, Self::Error> { + if !is_valid_name(value) { + return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice( + value.as_bytes(), + ))); + } + Ok(Self { + inner: bytes::Bytes::copy_from_slice(value.as_bytes()), + }) + } +} + +impl TryFrom<&std::ffi::CStr> for PathComponent { + type Error = DirectoryError; + + fn try_from(value: &std::ffi::CStr) -> Result<Self, Self::Error> { + if !is_valid_name(value.to_bytes()) { + return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice( + value.to_bytes(), + ))); + } + Ok(Self { + inner: bytes::Bytes::copy_from_slice(value.to_bytes()), + }) + } +} + +impl Debug for PathComponent { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(self.inner.as_bstr(), f) + } +} + +impl Display for PathComponent { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(self.inner.as_bstr(), f) + } +} diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path/mod.rs index 8a55e9f5a1d3..5a68f1add416 100644 --- a/tvix/castore/src/path.rs +++ b/tvix/castore/src/path/mod.rs @@ -1,5 +1,5 @@ //! Contains data structures to deal with Paths in the tvix-castore model. - +use bstr::ByteSlice; use std::{ borrow::Borrow, fmt::{self, Debug, Display}, @@ -8,9 +8,8 @@ use std::{ str::FromStr, }; -use bstr::ByteSlice; - -use crate::Directory; +mod component; +pub use component::PathComponent; /// Represents a Path in the castore model. /// These are always relative, and platform-independent, which distinguishes @@ -38,7 +37,7 @@ impl Path { if !bytes.is_empty() { // Ensure all components are valid castore node names. for component in bytes.split_str(b"/") { - if !Directory::is_valid_name(component) { + if !component::is_valid_name(component) { return None; } } @@ -83,10 +82,26 @@ impl Path { Ok(v) } + /// Provides an iterator over the components of the path, + /// which are invividual [PathComponent]. + /// In case the path is empty, an empty iterator is returned. + pub fn components(&self) -> impl Iterator<Item = PathComponent> + '_ { + let mut iter = self.inner.split_str(&b"/"); + + // We don't want to return an empty element, consume it if it's the only one. + if self.inner.is_empty() { + let _ = iter.next(); + } + + iter.map(|b| PathComponent { + inner: bytes::Bytes::copy_from_slice(b), + }) + } + /// Produces an iterator over the components of the path, which are /// individual byte slices. /// In case the path is empty, an empty iterator is returned. - pub fn components(&self) -> impl Iterator<Item = &[u8]> { + pub fn components_bytes(&self) -> impl Iterator<Item = &[u8]> { let mut iter = self.inner.split_str(&b"/"); // We don't want to return an empty element, consume it if it's the only one. @@ -97,11 +112,16 @@ impl Path { iter } - /// Returns the final component of the Path, if there is one. - pub fn file_name(&self) -> Option<&[u8]> { + /// Returns the final component of the Path, if there is one, in bytes. + pub fn file_name(&self) -> Option<PathComponent> { self.components().last() } + /// Returns the final component of the Path, if there is one, in bytes. + pub fn file_name_bytes(&self) -> Option<&[u8]> { + self.components_bytes().last() + } + pub fn as_bytes(&self) -> &[u8] { &self.inner } @@ -213,7 +233,7 @@ impl PathBuf { /// Adjoins `name` to self. pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> { - if !Directory::is_valid_name(name) { + if !component::is_valid_name(name) { return Err(std::io::ErrorKind::InvalidData.into()); } @@ -333,7 +353,7 @@ mod test { assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch"); assert_eq!( num_components, - p.components().count(), + p.components_bytes().count(), "number of components mismatch" ); } @@ -400,10 +420,10 @@ mod test { #[case("a", vec!["a"])] #[case("a/b", vec!["a", "b"])] #[case("a/b/c", vec!["a","b", "c"])] - pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) { + pub fn components_bytes(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) { assert_eq!( exp_components, - p.components() + p.components_bytes() .map(|x| x.to_str().unwrap()) .collect::<Vec<_>>() ); diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index c12eb4ee3c91..f32f3a0687a7 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -5,7 +5,7 @@ use prost::Message; mod grpc_blobservice_wrapper; mod grpc_directoryservice_wrapper; -use crate::{B3Digest, DirectoryError}; +use crate::{path::PathComponent, B3Digest, DirectoryError}; pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper; pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper; @@ -162,19 +162,19 @@ impl From<&crate::Directory> for Directory { size, executable, } => files.push(FileNode { - name: name.clone(), + name: name.to_owned().into(), digest: digest.to_owned().into(), size: *size, executable: *executable, }), crate::Node::Directory { digest, size } => directories.push(DirectoryNode { - name: name.clone(), + name: name.to_owned().into(), digest: digest.to_owned().into(), size: *size, }), crate::Node::Symlink { target } => { symlinks.push(SymlinkNode { - name: name.clone(), + name: name.to_owned().into(), target: target.to_owned().into(), }); } @@ -190,22 +190,24 @@ impl From<&crate::Directory> for Directory { impl Node { /// Converts a proto [Node] to a [crate::Node], and splits off the name. - pub fn into_name_and_node(self) -> Result<(bytes::Bytes, crate::Node), DirectoryError> { + pub fn into_name_and_node(self) -> Result<(PathComponent, crate::Node), DirectoryError> { match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? { node::Node::Directory(n) => { + let name: PathComponent = n.name.try_into()?; let digest = B3Digest::try_from(n.digest) - .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; + .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?; let node = crate::Node::Directory { digest, size: n.size, }; - Ok((n.name, node)) + Ok((name, node)) } node::Node::File(n) => { + let name: PathComponent = n.name.try_into()?; let digest = B3Digest::try_from(n.digest) - .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e.into()))?; + .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?; let node = crate::Node::File { digest, @@ -213,23 +215,26 @@ impl Node { executable: n.executable, }; - Ok((n.name, node)) + Ok((name, node)) } node::Node::Symlink(n) => { + let name: PathComponent = n.name.try_into()?; let node = crate::Node::Symlink { target: n .target .try_into() - .map_err(|e| DirectoryError::InvalidNode(n.name.to_owned(), e))?, + .map_err(|e| DirectoryError::InvalidNode(name.clone(), e))?, }; - Ok((n.name, node)) + Ok((name, node)) } } } /// Construsts a [Node] from a name and [crate::Node]. + /// The name is a [bytes::Bytes], not a [PathComponent], as we have use an + /// empty name in some places. pub fn from_name_and_node(name: bytes::Bytes, n: crate::Node) -> Self { match n { crate::Node::Directory { digest, size } => Self { diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs index 215bce7275dd..1e7cdb89c118 100644 --- a/tvix/castore/src/proto/tests/directory.rs +++ b/tvix/castore/src/proto/tests/directory.rs @@ -301,7 +301,7 @@ fn validate_sorting() { }; match crate::Directory::try_from(d).expect_err("must fail") { DirectoryError::DuplicateName(s) => { - assert_eq!(s, b"a"); + assert_eq!(s.as_ref(), b"a"); } _ => panic!("unexpected error"), } |