From b25d98a84e9830c3f800ca8c84d9df09d6b1296d Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Mon, 29 May 2023 15:11:31 +0300 Subject: feat(tvix/store/fuse): initial implementation This is a first implementation of a FUSE filesystem, mounting tvix-store to a given location. This is mostly meant as one additional lens into a store, and could be used for builds. It's not meant to be used as a general-purpose thing. It still has some rough edges: - It doesn't implement open/close, so it doesn't use file handles. Which means, we need to open blobs for partial reads over and over again. - It doesn't implement seek, as BlobReader doesn't implement seek yet. - It doesn't track "lifetimes" of inodes by listening on forget, meaning it might hold more data in memory than necessary. - As we don't have store composition (and a caching layer) yet, operations might be slow. Change-Id: Ib1812ed761dfaf6aeb548443ae939c87530b7be8 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8667 Tested-by: BuildkiteCI Autosubmit: flokli Reviewed-by: tazjin --- tvix/store/src/fuse/file_attr.rs | 56 +++ tvix/store/src/fuse/inode_tracker.rs | 458 ++++++++++++++++++++ tvix/store/src/fuse/inodes.rs | 78 ++++ tvix/store/src/fuse/mod.rs | 417 +++++++++++++++++- tvix/store/src/fuse/tests.rs | 797 +++++++++++++++++++++++++++++++++++ 5 files changed, 1804 insertions(+), 2 deletions(-) create mode 100644 tvix/store/src/fuse/file_attr.rs create mode 100644 tvix/store/src/fuse/inode_tracker.rs create mode 100644 tvix/store/src/fuse/inodes.rs create mode 100644 tvix/store/src/fuse/tests.rs (limited to 'tvix/store') diff --git a/tvix/store/src/fuse/file_attr.rs b/tvix/store/src/fuse/file_attr.rs new file mode 100644 index 000000000000..b2b971d9e409 --- /dev/null +++ b/tvix/store/src/fuse/file_attr.rs @@ -0,0 +1,56 @@ +use std::time::SystemTime; + +use super::inodes::{DirectoryInodeData, InodeData}; +use fuser::FileAttr; + +/// The [FileAttr] describing the root +pub const ROOT_FILE_ATTR: FileAttr = FileAttr { + ino: fuser::FUSE_ROOT_ID, + size: 0, + blksize: 1024, + blocks: 0, + atime: SystemTime::UNIX_EPOCH, + mtime: SystemTime::UNIX_EPOCH, + ctime: SystemTime::UNIX_EPOCH, + crtime: SystemTime::UNIX_EPOCH, + kind: fuser::FileType::Directory, + perm: 0o555, + nlink: 0, + uid: 0, + gid: 0, + rdev: 0, + flags: 0, +}; + +/// for given &Node and inode, construct a [FileAttr] +pub fn gen_file_attr(inode_data: &InodeData, inode: u64) -> FileAttr { + FileAttr { + ino: inode, + size: match inode_data { + InodeData::Regular(_, size, _) => *size as u64, + InodeData::Symlink(target) => target.len() as u64, + InodeData::Directory(DirectoryInodeData::Sparse(_, size)) => *size as u64, + InodeData::Directory(DirectoryInodeData::Populated(_, ref children)) => { + children.len() as u64 + } + }, + // FUTUREWORK: play with this numbers, as it affects read sizes for client applications. + blksize: 1024, + blocks: 0, + atime: SystemTime::UNIX_EPOCH, + mtime: SystemTime::UNIX_EPOCH, + ctime: SystemTime::UNIX_EPOCH, + crtime: SystemTime::UNIX_EPOCH, + kind: inode_data.into(), + perm: match inode_data { + InodeData::Regular(..) => 0o444, + InodeData::Symlink(_) => 0o444, + InodeData::Directory(..) => 0o555, + }, + nlink: 0, + uid: 0, + gid: 0, + rdev: 0, + flags: 0, + } +} diff --git a/tvix/store/src/fuse/inode_tracker.rs b/tvix/store/src/fuse/inode_tracker.rs new file mode 100644 index 000000000000..06434d25b37d --- /dev/null +++ b/tvix/store/src/fuse/inode_tracker.rs @@ -0,0 +1,458 @@ +use std::{collections::HashMap, sync::Arc}; + +use crate::{proto, B3Digest}; + +use super::inodes::{DirectoryInodeData, InodeData}; + +/// InodeTracker keeps track of inodes, stores data being these inodes and deals +/// with inode allocation. +pub struct InodeTracker { + data: HashMap>, + + // lookup table for blobs by their B3Digest + blob_digest_to_inode: HashMap, + + // lookup table for symlinks by their target + symlink_target_to_inode: HashMap, + + // lookup table for directories by their B3Digest. + // Note the corresponding directory may not be present in data yet. + directory_digest_to_inode: HashMap, + + // the next inode to allocate + next_inode: u64, +} + +impl Default for InodeTracker { + fn default() -> Self { + Self { + data: Default::default(), + + blob_digest_to_inode: Default::default(), + symlink_target_to_inode: Default::default(), + directory_digest_to_inode: Default::default(), + + next_inode: 2, + } + } +} + +impl InodeTracker { + // Retrieves data for a given inode, if it exists. + pub fn get(&self, ino: u64) -> Option> { + self.data.get(&ino).cloned() + } + + // Stores data and returns the inode for it. + // In case an inode has already been allocated for the same data, that inode + // is returned, otherwise a new one is allocated. + // In case data is a [InodeData::Directory], inodes for all items are looked + // up + pub fn put(&mut self, data: InodeData) -> u64 { + match data { + InodeData::Regular(ref digest, _, _) => { + match self.blob_digest_to_inode.get(digest) { + Some(found_ino) => { + // We already have it, return the inode. + *found_ino + } + None => self.insert_and_increment(data), + } + } + InodeData::Symlink(ref target) => { + match self.symlink_target_to_inode.get(target) { + Some(found_ino) => { + // We already have it, return the inode. + *found_ino + } + None => self.insert_and_increment(data), + } + } + InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => { + // check the lookup table if the B3Digest is known. + match self.directory_digest_to_inode.get(digest) { + Some(found_ino) => { + // We already have it, return the inode. + *found_ino + } + None => { + // insert and return the inode + self.insert_and_increment(data) + } + } + } + // Inserting [DirectoryInodeData::Populated] usually replaces an + // existing [DirectoryInodeData::Sparse] one. + InodeData::Directory(DirectoryInodeData::Populated(ref digest, ref children)) => { + let dir_ino = self.directory_digest_to_inode.get(digest); + if let Some(dir_ino) = dir_ino { + let dir_ino = *dir_ino; + + // We know the data must exist, as we found it in [directory_digest_to_inode]. + let needs_update = match **self.data.get(&dir_ino).unwrap() { + InodeData::Regular(..) | InodeData::Symlink(_) => { + panic!("unexpected type at inode {}", dir_ino); + } + // already populated, nothing to do + InodeData::Directory(DirectoryInodeData::Populated(..)) => false, + // in case the actual data is sparse, replace it with the populated one. + // this allocates inodes for new children in the process. + InodeData::Directory(DirectoryInodeData::Sparse( + ref old_digest, + ref _old_size, + )) => { + // sanity checking to ensure we update the right node + debug_assert_eq!(old_digest, digest); + + true + } + }; + + if needs_update { + // populate inode fields in children + let children = self.allocate_inodes_for_children(children.to_vec()); + + // update sparse data with populated data + self.data.insert( + dir_ino, + Arc::new(InodeData::Directory(DirectoryInodeData::Populated( + digest.clone(), + children, + ))), + ); + } + + dir_ino + } else { + // populate inode fields in children + let children = self.allocate_inodes_for_children(children.to_vec()); + // insert and return InodeData + self.insert_and_increment(InodeData::Directory(DirectoryInodeData::Populated( + digest.clone(), + children, + ))) + } + } + } + } + + // Consume a list of children with zeroed inodes, and allocate (or fetch existing) inodes. + fn allocate_inodes_for_children( + &mut self, + children: Vec<(u64, proto::node::Node)>, + ) -> Vec<(u64, proto::node::Node)> { + // allocate new inodes for all children + let mut children_new: Vec<(u64, proto::node::Node)> = Vec::new(); + + for (child_ino, ref child_node) in children { + debug_assert_eq!(0, child_ino, "expected child inode to be 0"); + let child_ino = match child_node { + proto::node::Node::Directory(directory_node) => { + // Try putting the sparse data in. If we already have a + // populated version, it'll not update it. + self.put(directory_node.into()) + } + proto::node::Node::File(file_node) => self.put(file_node.into()), + proto::node::Node::Symlink(symlink_node) => self.put(symlink_node.into()), + }; + + children_new.push((child_ino, child_node.clone())) + } + children_new + } + + // Inserts the data and returns the inode it was stored at, while + // incrementing next_inode. + fn insert_and_increment(&mut self, data: InodeData) -> u64 { + let ino = self.next_inode; + // insert into lookup tables + match data { + InodeData::Regular(ref digest, _, _) => { + self.blob_digest_to_inode.insert(digest.clone(), ino); + } + InodeData::Symlink(ref target) => { + self.symlink_target_to_inode.insert(target.to_string(), ino); + } + InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => { + self.directory_digest_to_inode.insert(digest.clone(), ino); + } + // This is currently not used outside test fixtures. + // Usually a [DirectoryInodeData::Sparse] is inserted and later + // "upgraded" with more data. + // However, as a future optimization, a lookup for a PathInfo could trigger a + // [DirectoryService::get_recursive()] request that "forks into + // background" and prepopulates all Directories in a closure. + InodeData::Directory(DirectoryInodeData::Populated(ref digest, _)) => { + self.directory_digest_to_inode.insert(digest.clone(), ino); + } + } + // Insert data + self.data.insert(ino, Arc::new(data)); + + // increment inode counter and return old inode. + self.next_inode += 1; + ino + } +} + +#[cfg(test)] +mod tests { + use crate::fuse::inodes::DirectoryInodeData; + use crate::proto; + use crate::tests::fixtures; + + use super::InodeData; + use super::InodeTracker; + + /// Getting something non-existent should be none + #[test] + fn get_nonexistent() { + let inode_tracker = InodeTracker::default(); + assert!(inode_tracker.get(1).is_none()); + } + + /// Put of a regular file should allocate a uid, which should be the same when inserting again. + #[test] + fn put_regular() { + let mut inode_tracker = InodeTracker::default(); + let f = InodeData::Regular( + fixtures::BLOB_A_DIGEST.clone(), + fixtures::BLOB_A.len() as u32, + false, + ); + + // put it in + let ino = inode_tracker.put(f.clone()); + + // a get should return the right data + let data = inode_tracker.get(ino).expect("must be some"); + match *data { + InodeData::Regular(ref digest, _, _) => { + assert_eq!(&fixtures::BLOB_A_DIGEST.clone(), digest); + } + InodeData::Symlink(_) | InodeData::Directory(..) => panic!("wrong type"), + } + + // another put should return the same ino + assert_eq!(ino, inode_tracker.put(f)); + + // inserting another file should return a different ino + assert_ne!( + ino, + inode_tracker.put(InodeData::Regular( + fixtures::BLOB_B_DIGEST.clone(), + fixtures::BLOB_B.len() as u32, + false, + )) + ); + } + + // Put of a symlink should allocate a uid, which should be the same when inserting again + #[test] + fn put_symlink() { + let mut inode_tracker = InodeTracker::default(); + let f = InodeData::Symlink("target".to_string()); + + // put it in + let ino = inode_tracker.put(f.clone()); + + // a get should return the right data + let data = inode_tracker.get(ino).expect("must be some"); + match *data { + InodeData::Symlink(ref target) => { + assert_eq!("target", target); + } + InodeData::Regular(..) | InodeData::Directory(..) => panic!("wrong type"), + } + + // another put should return the same ino + assert_eq!(ino, inode_tracker.put(f)); + + // inserting another file should return a different ino + assert_ne!( + ino, + inode_tracker.put(InodeData::Symlink("target2".to_string())) + ); + } + + // TODO: put sparse directory + + /// Put a directory into the inode tracker, which refers to a file not seen yet. + #[test] + fn put_directory_leaf() { + let mut inode_tracker = InodeTracker::default(); + + // this is a directory with a single item, a ".keep" file pointing to a 0 bytes blob. + let dir: InodeData = fixtures::DIRECTORY_WITH_KEEP.clone().into(); + + // put it in + let dir_ino = inode_tracker.put(dir.clone()); + + // a get should return the right data + let data = inode_tracker.get(dir_ino).expect("must be some"); + match *data { + InodeData::Directory(super::DirectoryInodeData::Sparse(..)) => { + panic!("wrong type"); + } + InodeData::Directory(super::DirectoryInodeData::Populated( + ref directory_digest, + ref children, + )) => { + // ensure the directory digest matches + assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), directory_digest); + + // ensure the child is populated, with a different inode than + // the parent, and the data matches expectations. + assert_eq!(1, children.len()); + let (child_ino, child_node) = children.first().unwrap(); + assert_ne!(dir_ino, *child_ino); + assert_eq!( + &proto::node::Node::File( + fixtures::DIRECTORY_WITH_KEEP.files.first().unwrap().clone() + ), + child_node + ); + + // ensure looking up that inode directly returns the data + let child_data = inode_tracker.get(*child_ino).expect("must exist"); + match *child_data { + InodeData::Regular(ref digest, size, executable) => { + assert_eq!(&fixtures::EMPTY_BLOB_DIGEST.clone(), digest); + assert_eq!(0, size); + assert_eq!(false, executable); + } + InodeData::Symlink(_) | InodeData::Directory(..) => panic!("wrong type"), + } + } + InodeData::Symlink(_) | InodeData::Regular(..) => panic!("wrong type"), + } + } + + /// Put a directory into the inode tracker, referring to files, directories + /// and symlinks not seen yet. + #[test] + fn put_directory_complicated() { + let mut inode_tracker = InodeTracker::default(); + + // this is a directory with a single item, a ".keep" file pointing to a 0 bytes blob. + let dir_complicated: InodeData = fixtures::DIRECTORY_COMPLICATED.clone().into(); + + // put it in + let dir_complicated_ino = inode_tracker.put(dir_complicated.clone()); + + // a get should return the right data + let dir_data = inode_tracker + .get(dir_complicated_ino) + .expect("must be some"); + + let child_dir_ino = match *dir_data { + InodeData::Directory(DirectoryInodeData::Sparse(..)) => { + panic!("wrong type"); + } + InodeData::Directory(DirectoryInodeData::Populated( + ref directory_digest, + ref children, + )) => { + // assert the directory digest matches + assert_eq!(&fixtures::DIRECTORY_COMPLICATED.digest(), directory_digest); + + // ensure there's three children, all with different inodes + assert_eq!(3, children.len()); + let mut seen_inodes = Vec::from([dir_complicated_ino]); + + // check the first child (.keep) + { + let (child_ino, child_node) = &children[0]; + assert!(!seen_inodes.contains(&child_ino)); + assert_eq!( + &proto::node::Node::File(fixtures::DIRECTORY_COMPLICATED.files[0].clone()), + child_node + ); + seen_inodes.push(*child_ino); + } + + // check the second child (aa) + { + let (child_ino, child_node) = &children[1]; + assert!(!seen_inodes.contains(&child_ino)); + assert_eq!( + &proto::node::Node::Symlink( + fixtures::DIRECTORY_COMPLICATED.symlinks[0].clone() + ), + child_node + ); + seen_inodes.push(*child_ino); + } + + // check the third child (keep) + { + let (child_ino, child_node) = &children[2]; + assert!(!seen_inodes.contains(&child_ino)); + assert_eq!( + &proto::node::Node::Directory( + fixtures::DIRECTORY_COMPLICATED.directories[0].clone() + ), + child_node + ); + seen_inodes.push(*child_ino); + + // return the child_ino + *child_ino + } + } + InodeData::Regular(..) | InodeData::Symlink(_) => panic!("wrong type"), + }; + + // get of the inode for child_ino + let child_dir_data = inode_tracker.get(child_dir_ino).expect("must be some"); + // it should be a sparse InodeData::Directory with the right digest. + match *child_dir_data { + InodeData::Directory(DirectoryInodeData::Sparse( + ref child_dir_digest, + child_dir_size, + )) => { + assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), child_dir_digest); + assert_eq!(fixtures::DIRECTORY_WITH_KEEP.size(), child_dir_size); + } + InodeData::Directory(DirectoryInodeData::Populated(..)) + | InodeData::Regular(..) + | InodeData::Symlink(_) => { + panic!("wrong type") + } + } + + // put DIRECTORY_WITH_KEEP, which should return the same ino as [child_dir_ino], + // but update the sparse object to a populated one at the same time. + let child_dir_ino2 = inode_tracker.put(fixtures::DIRECTORY_WITH_KEEP.clone().into()); + assert_eq!(child_dir_ino, child_dir_ino2); + + // get the data + match *inode_tracker.get(child_dir_ino).expect("must be some") { + // it should be a populated InodeData::Directory with the right digest! + InodeData::Directory(DirectoryInodeData::Populated( + ref directory_digest, + ref children, + )) => { + // ensure the directory digest matches + assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), directory_digest); + + // ensure the child is populated, with a different inode than + // the parent, and the data matches expectations. + assert_eq!(1, children.len()); + let (child_node_inode, child_node) = children.first().unwrap(); + assert_ne!(dir_complicated_ino, *child_node_inode); + assert_eq!( + &proto::node::Node::File( + fixtures::DIRECTORY_WITH_KEEP.files.first().unwrap().clone() + ), + child_node + ); + } + InodeData::Directory(DirectoryInodeData::Sparse(..)) + | InodeData::Regular(..) + | InodeData::Symlink(_) => panic!("wrong type"), + } + } +} + +// TODO: add test inserting a populated one first, then ensure an update doesn't degrade it back to sparse. diff --git a/tvix/store/src/fuse/inodes.rs b/tvix/store/src/fuse/inodes.rs new file mode 100644 index 000000000000..c16509f424dd --- /dev/null +++ b/tvix/store/src/fuse/inodes.rs @@ -0,0 +1,78 @@ +///! This module contains all the data structures used to track information +///! about inodes, which present tvix-store nodes in a filesystem. +use crate::{proto, B3Digest}; + +#[derive(Clone, Debug)] +pub enum InodeData { + Regular(B3Digest, u32, bool), // digest, size, executable + Symlink(String), // target + Directory(DirectoryInodeData), // either [DirectoryInodeData:Sparse] or [DirectoryInodeData:Populated] +} + +/// This encodes the two different states of [InodeData::Directory]. +/// Either the data still is sparse (we only saw a [proto::DirectoryNode], but +/// didn't fetch the [proto::Directory] struct yet, +/// or we processed a lookup and did fetch the data. +#[derive(Clone, Debug)] +pub enum DirectoryInodeData { + Sparse(B3Digest, u32), // digest, size + Populated(B3Digest, Vec<(u64, proto::node::Node)>), // [(child_inode, node)] +} + +impl From<&proto::node::Node> for InodeData { + fn from(value: &proto::node::Node) -> Self { + match value { + proto::node::Node::Directory(directory_node) => directory_node.into(), + proto::node::Node::File(file_node) => file_node.into(), + proto::node::Node::Symlink(symlink_node) => symlink_node.into(), + } + } +} + +impl From<&proto::SymlinkNode> for InodeData { + fn from(value: &proto::SymlinkNode) -> Self { + InodeData::Symlink(value.target.clone()) + } +} + +impl From<&proto::FileNode> for InodeData { + fn from(value: &proto::FileNode) -> Self { + InodeData::Regular( + B3Digest::from_vec(value.digest.clone()).unwrap(), + value.size, + value.executable, + ) + } +} + +/// Converts a DirectoryNode to a sparsely populated InodeData::Directory. +impl From<&proto::DirectoryNode> for InodeData { + fn from(value: &proto::DirectoryNode) -> Self { + InodeData::Directory(DirectoryInodeData::Sparse( + B3Digest::from_vec(value.digest.clone()).unwrap(), + value.size, + )) + } +} + +/// converts a proto::Directory to a InodeData::Directory(DirectoryInodeData::Populated(..)). +/// The inodes for each child are 0, because it's up to the InodeTracker to allocate them. +impl From for InodeData { + fn from(value: proto::Directory) -> Self { + let digest = value.digest(); + + let children: Vec<(u64, proto::node::Node)> = value.nodes().map(|node| (0, node)).collect(); + + InodeData::Directory(DirectoryInodeData::Populated(digest, children)) + } +} + +impl From<&InodeData> for fuser::FileType { + fn from(val: &InodeData) -> Self { + match val { + InodeData::Regular(..) => fuser::FileType::RegularFile, + InodeData::Symlink(_) => fuser::FileType::Symlink, + InodeData::Directory(..) => fuser::FileType::Directory, + } + } +} diff --git a/tvix/store/src/fuse/mod.rs b/tvix/store/src/fuse/mod.rs index 7206cf3076d0..ac0bf29dab09 100644 --- a/tvix/store/src/fuse/mod.rs +++ b/tvix/store/src/fuse/mod.rs @@ -1,12 +1,74 @@ +mod file_attr; +mod inode_tracker; +mod inodes; + +#[cfg(test)] +mod tests; + use crate::{ - blobservice::BlobService, directoryservice::DirectoryService, pathinfoservice::PathInfoService, + blobservice::BlobService, + directoryservice::DirectoryService, + fuse::{ + file_attr::gen_file_attr, + inodes::{DirectoryInodeData, InodeData}, + }, + pathinfoservice::PathInfoService, + proto::{node::Node, NamedNode}, + B3Digest, Error, }; +use fuser::{FileAttr, ReplyAttr, Request}; +use nix_compat::store_path::StorePath; +use std::io::Read; use std::sync::Arc; +use std::{collections::HashMap, time::Duration}; +use tracing::{debug, info_span, warn}; +use self::inode_tracker::InodeTracker; + +/// This implements a read-only FUSE filesystem for a tvix-store +/// with the passed [BlobService], [DirectoryService] and [PathInfoService]. +/// +/// We don't allow listing on the root mountpoint (inode 0). +/// In the future, this might be made configurable once a listing method is +/// added to [self.path_info_service], and then show all store paths in that +/// store. +/// +/// Linux uses inodes in filesystems. When implementing FUSE, most calls are +/// *for* a given inode. +/// +/// This means, we need to have a stable mapping of inode numbers to the +/// corresponding store nodes. +/// +/// We internally delegate all inode allocation and state keeping to a +/// [InodeTracker], and store the currently "explored" store paths together with +/// root inode of the root. +/// +/// There's some places where inodes are allocated / data inserted into +/// [self.inode_tracker], if not allocated before already: +/// - Processing a `lookup` request, either in the mount root, or somewhere +/// deeper +/// - Processing a `readdir` request +/// +/// Things pointing to the same contents get the same inodes, irrespective of +/// their own location. +/// This means: +/// - Symlinks with the same target will get the same inode. +/// - Regular/executable files with the same contents will get the same inode +/// - Directories with the same contents will get the same inode. +/// +/// Due to the above being valid across the whole store, and considering the +/// merkle structure is a DAG, not a tree, this also means we can't do "bucketed +/// allocation", aka reserve Directory.size inodes for each PathInfo. pub struct FUSE { blob_service: Arc, directory_service: Arc, path_info_service: Arc, + + /// This maps a given StorePath to the inode we allocated for the root inode. + store_paths: HashMap, + + /// This keeps track of inodes and data alongside them. + inode_tracker: InodeTracker, } impl FUSE { @@ -19,8 +81,359 @@ impl FUSE { blob_service, directory_service, path_info_service, + + store_paths: HashMap::default(), + inode_tracker: Default::default(), + } + } + + /// This will turn a lookup request for [std::ffi::OsStr] in the root to + /// a ino and [InodeData]. + /// It will peek in [self.store_paths], and then either look it up from + /// [self.inode_tracker], + /// or otherwise fetch from [self.path_info_service], and then insert into + /// [self.inode_tracker]. + fn name_in_root_to_ino_and_data( + &mut self, + name: &std::ffi::OsStr, + ) -> Result)>, Error> { + // parse the name into a [StorePath]. + let store_path = if let Some(name) = name.to_str() { + match StorePath::from_string(name) { + Ok(store_path) => store_path, + Err(e) => { + debug!(e=?e, "unable to parse as store path"); + // This is not an error, but a "ENOENT", as someone can stat + // a file inside the root that's no valid store path + return Ok(None); + } + } + } else { + debug!("{name:?} is no string"); + // same here. + return Ok(None); + }; + + if let Some(ino) = self.store_paths.get(&store_path) { + // If we already have that store path, lookup the inode from + // self.store_paths and then get the data from [self.inode_tracker], + // which in the case of a [InodeData::Directory] will be fully + // populated. + Ok(Some(( + *ino, + self.inode_tracker.get(*ino).expect("must exist"), + ))) + } else { + // If we don't have it, look it up in PathInfoService. + match self.path_info_service.get(store_path.digest)? { + // the pathinfo doesn't exist, so the file doesn't exist. + None => Ok(None), + Some(path_info) => { + // The pathinfo does exist, so there must be a root node + let root_node = path_info.node.unwrap().node.unwrap(); + + // The name must match what's passed in the lookup, otherwise we return nothing. + if root_node.get_name() != store_path.to_string() { + return Ok(None); + } + + // insert the (sparse) inode data and register in + // self.store_paths. + // FUTUREWORK: change put to return the data after + // inserting, so we don't need to lookup a second + // time? + let ino = self.inode_tracker.put((&root_node).into()); + self.store_paths.insert(store_path, ino); + + Ok(Some((ino, self.inode_tracker.get(ino).unwrap()))) + } + } + } + } + + /// This will lookup a directory by digest, and will turn it into a + /// [InodeData::Directory(DirectoryInodeData::Populated(..))]. + /// This is both used to initially insert the root node of a store path, + /// as well as when looking up an intermediate DirectoryNode. + fn fetch_directory_inode_data(&self, directory_digest: &B3Digest) -> Result { + match self.directory_service.get(directory_digest) { + Err(e) => { + warn!(e = e.to_string(), directory.digest=%directory_digest, "failed to get directory"); + Err(e) + } + // If the Directory can't be found, this is a hole, bail out. + Ok(None) => { + tracing::error!(directory.digest=%directory_digest, "directory not found in directory service"); + Err(Error::StorageError(format!( + "directory {} not found", + directory_digest + ))) + } + Ok(Some(directory)) => Ok(directory.into()), } } } -impl fuser::Filesystem for FUSE {} +impl fuser::Filesystem for FUSE { + #[tracing::instrument(skip_all, fields(rq.inode = ino))] + fn getattr(&mut self, _req: &Request, ino: u64, reply: ReplyAttr) { + debug!("getattr"); + + if ino == fuser::FUSE_ROOT_ID { + reply.attr(&Duration::MAX, &file_attr::ROOT_FILE_ATTR); + return; + } + + match self.inode_tracker.get(ino) { + None => reply.error(libc::ENOENT), + Some(node) => { + debug!(node = ?node, "found node"); + reply.attr(&Duration::MAX, &file_attr::gen_file_attr(&node, ino)); + } + } + } + + #[tracing::instrument(skip_all, fields(rq.parent_inode = parent_ino, rq.name = ?name))] + fn lookup( + &mut self, + _req: &Request, + parent_ino: u64, + name: &std::ffi::OsStr, + reply: fuser::ReplyEntry, + ) { + debug!("lookup"); + + // This goes from a parent inode to a node. + // - If the parent is [fuser::FUSE_ROOT_ID], we need to check + // [self.store_paths] (fetching from PathInfoService if needed) + // - Otherwise, lookup the parent in [self.inode_tracker] (which must be + // a [InodeData::Directory]), and find the child with that name. + if parent_ino == fuser::FUSE_ROOT_ID { + match self.name_in_root_to_ino_and_data(name) { + Err(e) => { + warn!("{}", e); + reply.error(libc::EIO); + } + Ok(None) => { + reply.error(libc::ENOENT); + } + Ok(Some((ino, inode_data))) => { + warn!(inode_data=?&inode_data, ino=ino, "Some"); + reply_with_entry(reply, &gen_file_attr(&inode_data, ino)); + } + } + } else { + // This is the "lookup for "a" inside inode 42. + // We already know that inode 42 must be a directory. + // It might not be populated yet, so if it isn't, we do (by + // fetching from [self.directory_service]), and save the result in + // [self.inode_tracker]. + // Now it for sure is populated, so we search for that name in the + // list of children and return the FileAttrs. + + let parent_data = self.inode_tracker.get(parent_ino).unwrap(); + let parent_data = match *parent_data { + InodeData::Regular(..) | InodeData::Symlink(_) => { + // if the parent inode was not a directory, this doesn't make sense + reply.error(libc::ENOTDIR); + return; + } + InodeData::Directory(DirectoryInodeData::Sparse(ref parent_digest, _)) => { + match self.fetch_directory_inode_data(parent_digest) { + Ok(new_data) => { + // update data in [self.inode_tracker] with populated variant. + // FUTUREWORK: change put to return the data after + // inserting, so we don't need to lookup a second + // time? + let ino = self.inode_tracker.put(new_data); + self.inode_tracker.get(ino).unwrap() + } + Err(_e) => { + reply.error(libc::EIO); + return; + } + } + } + InodeData::Directory(DirectoryInodeData::Populated(..)) => parent_data, + }; + + // now parent_data can only be a [InodeData::Directory(DirectoryInodeData::Populated(..))]. + let (parent_digest, children) = if let InodeData::Directory( + DirectoryInodeData::Populated(ref parent_digest, ref children), + ) = *parent_data + { + (parent_digest, children) + } else { + panic!("unexpected type") + }; + let span = info_span!("lookup", directory.digest = %parent_digest); + let _enter = span.enter(); + + // in the children, find the one with the desired name. + if let Some((child_ino, _)) = children.iter().find(|e| e.1.get_name() == name) { + // lookup the child [InodeData] in [self.inode_tracker]. + // We know the inodes for children have already been allocated. + let child_inode_data = self.inode_tracker.get(*child_ino).unwrap(); + + // Reply with the file attributes for the child. + // For child directories, we still have all data we need to reply. + reply_with_entry(reply, &gen_file_attr(&child_inode_data, *child_ino)); + } else { + // Child not found, return ENOENT. + reply.error(libc::ENOENT); + } + } + } + + // TODO: readdirplus? + + #[tracing::instrument(skip_all, fields(rq.inode = ino, rq.offset = offset))] + fn readdir( + &mut self, + _req: &Request<'_>, + ino: u64, + _fh: u64, + offset: i64, + mut reply: fuser::ReplyDirectory, + ) { + debug!("readdir"); + + if ino == fuser::FUSE_ROOT_ID { + reply.error(libc::EPERM); // same error code as ipfs/kubo + return; + } + + // lookup the inode data. + let dir_inode_data = self.inode_tracker.get(ino).unwrap(); + let dir_inode_data = match *dir_inode_data { + InodeData::Regular(..) | InodeData::Symlink(..) => { + warn!("Not a directory"); + reply.error(libc::ENOTDIR); + return; + } + InodeData::Directory(DirectoryInodeData::Sparse(ref directory_digest, _)) => { + match self.fetch_directory_inode_data(directory_digest) { + Ok(new_data) => { + // update data in [self.inode_tracker] with populated variant. + // FUTUREWORK: change put to return the data after + // inserting, so we don't need to lookup a second + // time? + let ino = self.inode_tracker.put(new_data); + self.inode_tracker.get(ino).unwrap() + } + Err(_e) => { + reply.error(libc::EIO); + return; + } + } + } + InodeData::Directory(DirectoryInodeData::Populated(..)) => dir_inode_data, + }; + + // now parent_data can only be InodeData::Directory(DirectoryInodeData::Populated(..)) + if let InodeData::Directory(DirectoryInodeData::Populated(ref _digest, ref children)) = + *dir_inode_data + { + for (i, (ino, child_node)) in children.iter().skip(offset as usize).enumerate() { + // the second parameter will become the "offset" parameter on the next call. + let full = reply.add( + *ino, + offset + i as i64 + 1_i64, + match child_node { + Node::Directory(_) => fuser::FileType::Directory, + Node::File(_) => fuser::FileType::RegularFile, + Node::Symlink(_) => fuser::FileType::Symlink, + }, + child_node.get_name(), + ); + if full { + break; + } + } + reply.ok(); + } else { + panic!("unexpected type") + } + } + + /// TODO: implement open + close? + + #[tracing::instrument(skip_all, fields(rq.inode = ino, rq.offset = offset, rq.size = size))] + fn read( + &mut self, + _req: &Request<'_>, + ino: u64, + _fh: u64, + offset: i64, + size: u32, + _flags: i32, + _lock_owner: Option, + reply: fuser::ReplyData, + ) { + debug!("read"); + + if ino == fuser::FUSE_ROOT_ID { + reply.error(libc::ENOSYS); + return; + } + // lookup the inode + match *self.inode_tracker.get(ino).unwrap() { + // read is invalid on non-files. + InodeData::Directory(..) | InodeData::Symlink(_) => { + warn!("is directory"); + reply.error(libc::EISDIR); + } + InodeData::Regular(ref blob_digest, _blob_size, _) => { + let span = info_span!("read", blob.digest = %blob_digest); + let _enter = span.enter(); + + match self.blob_service.open_read(blob_digest) { + Ok(None) => { + warn!("blob not found"); + reply.error(libc::EIO); + } + Err(e) => { + warn!(e=?e, "error opening blob"); + reply.error(libc::EIO); + } + Ok(Some(blob_reader)) => { + let data: std::io::Result> = blob_reader + .bytes() + // TODO: this is obviously terrible. blobreader should implement seek. + .skip(offset.try_into().unwrap()) + .take(size.try_into().unwrap()) + .collect(); + + match data { + Ok(data) => { + // respond with the requested data + reply.data(&data); + } + Err(e) => reply.error(e.raw_os_error().unwrap()), + } + } + } + } + } + } + + #[tracing::instrument(skip_all, fields(rq.inode = ino))] + fn readlink(&mut self, _req: &Request<'_>, ino: u64, reply: fuser::ReplyData) { + if ino == fuser::FUSE_ROOT_ID { + reply.error(libc::ENOSYS); + return; + } + + // lookup the inode + match *self.inode_tracker.get(ino).unwrap() { + InodeData::Directory(..) | InodeData::Regular(..) => { + reply.error(libc::EINVAL); + } + InodeData::Symlink(ref target) => reply.data(target.as_bytes()), + } + } +} + +fn reply_with_entry(reply: fuser::ReplyEntry, file_attr: &FileAttr) { + reply.entry(&Duration::MAX, file_attr, 1 /* TODO: generation */); +} diff --git a/tvix/store/src/fuse/tests.rs b/tvix/store/src/fuse/tests.rs new file mode 100644 index 000000000000..6350c8fba482 --- /dev/null +++ b/tvix/store/src/fuse/tests.rs @@ -0,0 +1,797 @@ +use std::fs; +use std::io::Cursor; +use std::os::unix::prelude::MetadataExt; +use std::path::Path; +use std::sync::Arc; + +use tempfile::TempDir; + +use crate::blobservice::BlobService; +use crate::directoryservice::DirectoryService; +use crate::pathinfoservice::PathInfoService; +use crate::proto::{DirectoryNode, FileNode, PathInfo}; +use crate::tests::fixtures; +use crate::tests::utils::{gen_blob_service, gen_directory_service, gen_pathinfo_service}; +use crate::{proto, FUSE}; + +const BLOB_A_NAME: &str = "00000000000000000000000000000000-test"; +const SYMLINK_NAME: &str = "11111111111111111111111111111111-test"; +const SYMLINK_NAME2: &str = "44444444444444444444444444444444-test"; +const DIRECTORY_WITH_KEEP_NAME: &str = "22222222222222222222222222222222-test"; +const DIRECTORY_COMPLICATED_NAME: &str = "33333333333333333333333333333333-test"; + +fn setup_and_mount, F>( + mountpoint: P, + setup_fn: F, +) -> Result +where + F: Fn(Arc, Arc, Arc), +{ + let blob_service = gen_blob_service(); + let directory_service = gen_directory_service(); + let path_info_service = gen_pathinfo_service(blob_service.clone(), directory_service.clone()); + + setup_fn( + blob_service.clone(), + directory_service.clone(), + path_info_service.clone(), + ); + + let fs = FUSE::new(blob_service, directory_service, path_info_service); + fuser::spawn_mount2(fs, mountpoint, &[]) +} + +fn populate_blob_a( + blob_service: Arc, + _directory_service: Arc, + path_info_service: Arc, +) { + // Upload BLOB_A + let mut bw = blob_service.open_write(); + std::io::copy(&mut Cursor::new(fixtures::BLOB_A.to_vec()), &mut bw) + .expect("must succeed uploading"); + bw.close().expect("must succeed closing"); + + // Create a PathInfo for it + let path_info = PathInfo { + node: Some(proto::Node { + node: Some(proto::node::Node::File(FileNode { + name: BLOB_A_NAME.to_string(), + digest: fixtures::BLOB_A_DIGEST.to_vec(), + size: fixtures::BLOB_A.len() as u32, + executable: false, + })), + }), + ..Default::default() + }; + path_info_service.put(path_info).expect("must succeed"); +} + +fn populate_symlink( + _blob_service: Arc, + _directory_service: Arc, + path_info_service: Arc, +) { + // Create a PathInfo for it + let path_info = PathInfo { + node: Some(proto::Node { + node: Some(proto::node::Node::Symlink(proto::SymlinkNode { + name: SYMLINK_NAME.to_string(), + target: BLOB_A_NAME.to_string(), + })), + }), + ..Default::default() + }; + path_info_service.put(path_info).expect("must succeed"); +} + +/// This writes a symlink pointing to /nix/store/somewhereelse, +/// which is the same symlink target as "aa" inside DIRECTORY_COMPLICATED. +fn populate_symlink2( + _blob_service: Arc, + _directory_service: Arc, + path_info_service: Arc, +) { + // Create a PathInfo for it + let path_info = PathInfo { + node: Some(proto::Node { + node: Some(proto::node::Node::Symlink(proto::SymlinkNode { + name: SYMLINK_NAME2.to_string(), + target: "/nix/store/somewhereelse".to_string(), + })), + }), + ..Default::default() + }; + path_info_service.put(path_info).expect("must succeed"); +} + +fn populate_directory_with_keep( + blob_service: Arc, + directory_service: Arc, + path_info_service: Arc, +) { + // upload empty blob + let mut bw = blob_service.open_write(); + assert_eq!( + fixtures::EMPTY_BLOB_DIGEST.to_vec(), + bw.close().expect("must succeed closing").to_vec(), + ); + + // upload directory + directory_service + .put(fixtures::DIRECTORY_WITH_KEEP.clone()) + .expect("must succeed uploading"); + + // upload pathinfo + let path_info = PathInfo { + node: Some(proto::Node { + node: Some(proto::node::Node::Directory(DirectoryNode { + name: DIRECTORY_WITH_KEEP_NAME.to_string(), + digest: fixtures::DIRECTORY_WITH_KEEP.digest().to_vec(), + size: fixtures::DIRECTORY_WITH_KEEP.size(), + })), + }), + ..Default::default() + }; + path_info_service.put(path_info).expect("must succeed"); +} + +/// Insert [PathInfo] for DIRECTORY_WITH_KEEP, but don't provide the Directory +/// itself. +fn populate_pathinfo_without_directory( + _: Arc, + _: Arc, + path_info_service: Arc, +) { + // upload pathinfo + let path_info = PathInfo { + node: Some(proto::Node { + node: Some(proto::node::Node::Directory(DirectoryNode { + name: DIRECTORY_WITH_KEEP_NAME.to_string(), + digest: fixtures::DIRECTORY_WITH_KEEP.digest().to_vec(), + size: fixtures::DIRECTORY_WITH_KEEP.size(), + })), + }), + ..Default::default() + }; + path_info_service.put(path_info).expect("must succeed"); +} + +/// Insert , but don't provide the blob .keep is pointing to +fn populate_blob_a_without_blob( + _: Arc, + _: Arc, + path_info_service: Arc, +) { + // Create a PathInfo for blob A + let path_info = PathInfo { + node: Some(proto::Node { + node: Some(proto::node::Node::File(FileNode { + name: BLOB_A_NAME.to_string(), + digest: fixtures::BLOB_A_DIGEST.to_vec(), + size: fixtures::BLOB_A.len() as u32, + executable: false, + })), + }), + ..Default::default() + }; + path_info_service.put(path_info).expect("must succeed"); +} + +fn populate_directory_complicated( + blob_service: Arc, + directory_service: Arc, + path_info_service: Arc, +) { + // upload empty blob + let mut bw = blob_service.open_write(); + assert_eq!( + fixtures::EMPTY_BLOB_DIGEST.to_vec(), + bw.close().expect("must succeed closing").to_vec(), + ); + + // upload inner directory + directory_service + .put(fixtures::DIRECTORY_WITH_KEEP.clone()) + .expect("must succeed uploading"); + + // uplodad parent directory + directory_service + .put(fixtures::DIRECTORY_COMPLICATED.clone()) + .expect("must succeed uploading"); + + // upload pathinfo + let path_info = PathInfo { + node: Some(proto::Node { + node: Some(proto::node::Node::Directory(DirectoryNode { + name: DIRECTORY_COMPLICATED_NAME.to_string(), + digest: fixtures::DIRECTORY_COMPLICATED.digest().to_vec(), + size: fixtures::DIRECTORY_COMPLICATED.size(), + })), + }), + ..Default::default() + }; + path_info_service.put(path_info).expect("must succeed"); +} + +/// Ensure mounting itself doesn't fail +#[test] +fn mount() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), |_, _, _| {}).expect("must succeed"); + + fuser_session.join() +} + +/// Ensure listing the root isn't allowed +#[test] +fn root() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), |_, _, _| {}).expect("must succeed"); + + { + // read_dir succeeds, but getting the first element will fail. + let mut it = fs::read_dir(tmpdir).expect("must succeed"); + + let err = it.next().expect("must be some").expect_err("must be err"); + assert_eq!(std::io::ErrorKind::PermissionDenied, err.kind()); + } + + fuser_session.join() +} + +/// Ensure we can stat a file at the root +#[test] +fn stat_file_at_root() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), populate_blob_a).expect("must succeed"); + + let p = tmpdir.path().join(BLOB_A_NAME); + + // peek at the file metadata + let metadata = fs::metadata(p).expect("must succeed"); + + assert!(metadata.is_file()); + assert!(metadata.permissions().readonly()); + assert_eq!(fixtures::BLOB_A.len() as u64, metadata.len()); + + fuser_session.join() +} + +/// Ensure we can read a file at the root +#[test] +fn read_file_at_root() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), populate_blob_a).expect("must succeed"); + + let p = tmpdir.path().join(BLOB_A_NAME); + + // read the file contents + let data = fs::read(p).expect("must succeed"); + + // ensure size and contents match + assert_eq!(fixtures::BLOB_A.len(), data.len()); + assert_eq!(fixtures::BLOB_A.to_vec(), data); + + fuser_session.join() +} + +/// Read the target of a symlink +#[test] +fn symlink_readlink() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), populate_symlink).expect("must succeed"); + let p = tmpdir.path().join(SYMLINK_NAME); + + let target = fs::read_link(&p).expect("must succeed"); + assert_eq!(BLOB_A_NAME, target.to_str().unwrap()); + + // peek at the file metadata, which follows symlinks. + // this must fail, as we didn't populate the target. + let e = fs::metadata(&p).expect_err("must fail"); + assert_eq!(std::io::ErrorKind::NotFound, e.kind()); + + // peeking at the file metadata without following symlinks will succeed. + let metadata = fs::symlink_metadata(&p).expect("must succeed"); + assert!(metadata.is_symlink()); + + // reading from the symlink (which follows) will fail, because the target doesn't exist. + let e = fs::read(p).expect_err("must fail"); + assert_eq!(std::io::ErrorKind::NotFound, e.kind()); + + fuser_session.join() +} + +/// Read and stat a regular file through a symlink pointing to it. +#[test] +fn read_stat_through_symlink() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), |bs: Arc<_>, ds: Arc<_>, ps: Arc<_>| { + populate_blob_a(bs.clone(), ds.clone(), ps.clone()); + populate_symlink(bs, ds, ps); + }) + .expect("must succeed"); + + let p_symlink = tmpdir.path().join(SYMLINK_NAME); + let p_blob = tmpdir.path().join(SYMLINK_NAME); + + // peek at the file metadata, which follows symlinks. + // this must now return the same metadata as when statting at the target directly. + let metadata_symlink = fs::metadata(&p_symlink).expect("must succeed"); + let metadata_blob = fs::metadata(&p_blob).expect("must succeed"); + assert_eq!(metadata_blob.file_type(), metadata_symlink.file_type()); + assert_eq!(metadata_blob.len(), metadata_symlink.len()); + + // reading from the symlink (which follows) will return the same data as if + // we were reading from the file directly. + assert_eq!( + std::fs::read(p_blob).expect("must succeed"), + std::fs::read(p_symlink).expect("must succeed"), + ); + + fuser_session.join() +} + +/// Read a directory in the root, and validate some attributes. +#[test] +fn read_stat_directory() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = + setup_and_mount(tmpdir.path(), populate_directory_with_keep).expect("must succeed"); + + let p = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME); + + // peek at the metadata of the directory + let metadata = fs::metadata(&p).expect("must succeed"); + assert!(metadata.is_dir()); + assert!(metadata.permissions().readonly()); + + fuser_session.join() +} + +#[test] +/// Read a blob inside a directory. This ensures we successfully populate directory data. +fn read_blob_inside_dir() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = + setup_and_mount(tmpdir.path(), populate_directory_with_keep).expect("must succeed"); + + let p = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME).join(".keep"); + + // peek at metadata. + let metadata = fs::metadata(&p).expect("must succeed"); + assert!(metadata.is_file()); + assert!(metadata.permissions().readonly()); + + // read from it + let data = fs::read(&p).expect("must succeed"); + assert_eq!(fixtures::EMPTY_BLOB_CONTENTS.to_vec(), data); + + fuser_session.join() +} + +#[test] +/// Read a blob inside a directory inside a directory. This ensures we properly +/// populate directories as we traverse down the structure. +fn read_blob_deep_inside_dir() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = + setup_and_mount(tmpdir.path(), populate_directory_complicated).expect("must succeed"); + + let p = tmpdir + .path() + .join(DIRECTORY_COMPLICATED_NAME) + .join("keep") + .join(".keep"); + + // peek at metadata. + let metadata = fs::metadata(&p).expect("must succeed"); + assert!(metadata.is_file()); + assert!(metadata.permissions().readonly()); + + // read from it + let data = fs::read(&p).expect("must succeed"); + assert_eq!(fixtures::EMPTY_BLOB_CONTENTS.to_vec(), data); + + fuser_session.join() +} + +/// Ensure readdir works. +#[test] +fn readdir() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = + setup_and_mount(tmpdir.path(), populate_directory_complicated).expect("must succeed"); + + let p = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME); + + { + // read_dir should succeed. Collect all elements + let elements: Vec<_> = fs::read_dir(p) + .expect("must succeed") + .map(|e| e.expect("must not be err")) + .collect(); + + assert_eq!(3, elements.len(), "number of elements should be 3"); // rust skips . and .. + + // We explicitly look at specific positions here, because we always emit + // them ordered. + + // ".keep", 0 byte file. + let e = &elements[0]; + assert_eq!(".keep", e.file_name()); + assert!(e.file_type().expect("must succeed").is_file()); + assert_eq!(0, e.metadata().expect("must succeed").len()); + + // "aa", symlink. + let e = &elements[1]; + assert_eq!("aa", e.file_name()); + assert!(e.file_type().expect("must succeed").is_symlink()); + + // "keep", directory + let e = &elements[2]; + assert_eq!("keep", e.file_name()); + assert!(e.file_type().expect("must succeed").is_dir()); + } + + fuser_session.join() +} + +#[test] +/// Do a readdir deeper inside a directory, without doing readdir or stat in the parent directory. +fn readdir_deep() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = + setup_and_mount(tmpdir.path(), populate_directory_complicated).expect("must succeed"); + + let p = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME).join("keep"); + + { + // read_dir should succeed. Collect all elements + let elements: Vec<_> = fs::read_dir(p) + .expect("must succeed") + .map(|e| e.expect("must not be err")) + .collect(); + + assert_eq!(1, elements.len(), "number of elements should be 1"); // rust skips . and .. + + // ".keep", 0 byte file. + let e = &elements[0]; + assert_eq!(".keep", e.file_name()); + assert!(e.file_type().expect("must succeed").is_file()); + assert_eq!(0, e.metadata().expect("must succeed").len()); + } + + fuser_session.join() +} + +/// Check attributes match how they show up in /nix/store normally. +#[test] +fn check_attributes() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), |bs: Arc<_>, ds: Arc<_>, ps: Arc<_>| { + populate_blob_a(bs.clone(), ds.clone(), ps.clone()); + populate_directory_with_keep(bs.clone(), ds.clone(), ps.clone()); + populate_symlink(bs, ds, ps); + }) + .expect("must succeed"); + + let p_file = tmpdir.path().join(BLOB_A_NAME); + let p_directory = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME); + let p_symlink = tmpdir.path().join(SYMLINK_NAME); + + // peek at metadata. We use symlink_metadata to ensure we don't traverse a symlink by accident. + let metadata_file = fs::symlink_metadata(&p_file).expect("must succeed"); + let metadata_directory = fs::symlink_metadata(&p_directory).expect("must succeed"); + let metadata_symlink = fs::symlink_metadata(&p_symlink).expect("must succeed"); + + // modes should match. We & with 0o777 to remove any higher bits. + assert_eq!(0o444, metadata_file.mode() & 0o777); + assert_eq!(0o555, metadata_directory.mode() & 0o777); + assert_eq!(0o444, metadata_symlink.mode() & 0o777); + + // files should have the correct filesize + assert_eq!(fixtures::BLOB_A.len() as u64, metadata_file.len()); + // directories should have their "size" as filesize + assert_eq!( + fixtures::DIRECTORY_WITH_KEEP.size() as u64, + metadata_directory.size() + ); + + for metadata in &[&metadata_file, &metadata_directory, &metadata_symlink] { + // uid and gid should be 0. + assert_eq!(0, metadata.uid()); + assert_eq!(0, metadata.gid()); + + // all times should be set to the unix epoch. + assert_eq!(0, metadata.atime()); + assert_eq!(0, metadata.mtime()); + assert_eq!(0, metadata.ctime()); + // crtime seems MacOS only + } + + fuser_session.join() +} + +#[test] +/// Ensure we allocate the same inodes for the same directory contents. +/// $DIRECTORY_COMPLICATED_NAME/keep contains the same data as $DIRECTORY_WITH_KEEP. +fn compare_inodes_directories() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), |bs: Arc<_>, ds: Arc<_>, ps: Arc<_>| { + populate_directory_with_keep(bs.clone(), ds.clone(), ps.clone()); + populate_directory_complicated(bs, ds, ps); + }) + .expect("must succeed"); + + let p_dir_with_keep = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME); + let p_sibling_dir = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME).join("keep"); + + // peek at metadata. + assert_eq!( + fs::metadata(&p_dir_with_keep).expect("must succeed").ino(), + fs::metadata(&p_sibling_dir).expect("must succeed").ino() + ); + + fuser_session.join() +} + +/// Ensure we allocate the same inodes for the same directory contents. +/// $DIRECTORY_COMPLICATED_NAME/keep/,keep contains the same data as $DIRECTORY_COMPLICATED_NAME/.keep +#[test] +fn compare_inodes_files() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = + setup_and_mount(tmpdir.path(), populate_directory_complicated).expect("must succeed"); + + let p_keep1 = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME).join(".keep"); + let p_keep2 = tmpdir + .path() + .join(DIRECTORY_COMPLICATED_NAME) + .join("keep") + .join(".keep"); + + // peek at metadata. + assert_eq!( + fs::metadata(&p_keep1).expect("must succeed").ino(), + fs::metadata(&p_keep2).expect("must succeed").ino() + ); + + fuser_session.join() +} + +/// Ensure we allocate the same inode for symlinks pointing to the same targets. +/// $DIRECTORY_COMPLICATED_NAME/aa points to the same target as SYMLINK_NAME2. +#[test] +fn compare_inodes_symlinks() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), |bs: Arc<_>, ds: Arc<_>, ps: Arc<_>| { + populate_directory_complicated(bs.clone(), ds.clone(), ps.clone()); + populate_symlink2(bs, ds, ps); + }) + .expect("must succeed"); + + let p1 = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME).join("aa"); + let p2 = tmpdir.path().join(SYMLINK_NAME2); + + // peek at metadata. + assert_eq!( + fs::symlink_metadata(&p1).expect("must succeed").ino(), + fs::symlink_metadata(&p2).expect("must succeed").ino() + ); + + fuser_session.join() +} + +/// Check we match paths exactly. +#[test] +fn read_wrong_paths_in_root() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), populate_blob_a).expect("must succeed"); + + // wrong name + assert!(!tmpdir + .path() + .join("00000000000000000000000000000000-tes") + .exists()); + + // invalid hash + assert!(!tmpdir + .path() + .join("0000000000000000000000000000000-test") + .exists()); + + // right name, must exist + assert!(tmpdir + .path() + .join("00000000000000000000000000000000-test") + .exists()); + + // now wrong name with right hash still may not exist + assert!(!tmpdir + .path() + .join("00000000000000000000000000000000-tes") + .exists()); + + fuser_session.join() +} + +/// Make sure writes are not allowed +#[test] +fn disallow_writes() { + // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = setup_and_mount(tmpdir.path(), |_, _, _| {}).expect("must succeed"); + + let p = tmpdir.path().join(BLOB_A_NAME); + let e = std::fs::File::create(&p).expect_err("must fail"); + + assert_eq!(std::io::ErrorKind::Unsupported, e.kind()); + + fuser_session.join() +} + +#[test] +/// Ensure we get an IO error if the directory service does not have the Directory object. +fn missing_directory() { + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = + setup_and_mount(tmpdir.path(), populate_pathinfo_without_directory).expect("must succeed"); + + let p = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME); + + { + // `stat` on the path should succeed, because it doesn't trigger the directory request. + fs::metadata(&p).expect("must succeed"); + + // However, calling either `readdir` or `stat` on a child should fail with an IO error. + // It fails when trying to pull the first entry, because we don't implement opendir separately + fs::read_dir(&p) + .unwrap() + .into_iter() + .next() + .expect("must be some") + .expect_err("must be err"); + + // rust currently sets e.kind() to Uncategorized, which isn't very + // helpful, so we don't look at the error more closely than that.. + fs::metadata(p.join(".keep")).expect_err("must fail"); + } + + fuser_session.join() +} + +#[test] +/// Ensure we get an IO error if the blob service does not have the blob +fn missing_blob() { + if !std::path::Path::new("/dev/fuse").exists() { + eprintln!("skipping test"); + return; + } + let tmpdir = TempDir::new().unwrap(); + + let fuser_session = + setup_and_mount(tmpdir.path(), populate_blob_a_without_blob).expect("must succeed"); + + let p = tmpdir.path().join(BLOB_A_NAME); + + { + // `stat` on the blob should succeed, because it doesn't trigger a request to the blob service. + fs::metadata(&p).expect("must succeed"); + + // However, calling read on the blob should fail. + // rust currently sets e.kind() to Uncategorized, which isn't very + // helpful, so we don't look at the error more closely than that.. + fs::read(p).expect_err("must fail"); + } + + fuser_session.join() +} -- cgit 1.4.1