diff options
author | Connor Brewster <cbrewster@hey.com> | 2023-09-16T18·54-0500 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2023-09-20T14·27+0000 |
commit | 7e737fde34260daa477794d63b0b3344b4a1d81b (patch) | |
tree | 51f7d4b3c7f18aa78e584b06840fcdd645474895 /tvix/store/src/fuse/inode_tracker.rs | |
parent | 6b7c936bc50934b45df132f659292e2c45256dea (diff) |
refactor(tvix/store/fs): Separate FUSE and filesystem code r/6622
In prepration for adding virtiofs support, I thought it would make sense to split out the filesystem implementation from FUSE itself. The `fs` module holds the tvix-store filesystem implemetation and the `fuse` module holds the code to spawn a FUSE daemon backed by multiple threads. Change-Id: I8c58447b8c3aa016a613068f8e7ec166554e237c Reviewed-on: https://cl.tvl.fyi/c/depot/+/9343 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI Autosubmit: Connor Brewster <cbrewster@hey.com>
Diffstat (limited to 'tvix/store/src/fuse/inode_tracker.rs')
-rw-r--r-- | tvix/store/src/fuse/inode_tracker.rs | 455 |
1 files changed, 0 insertions, 455 deletions
diff --git a/tvix/store/src/fuse/inode_tracker.rs b/tvix/store/src/fuse/inode_tracker.rs deleted file mode 100644 index 97a9744c31d7..000000000000 --- a/tvix/store/src/fuse/inode_tracker.rs +++ /dev/null @@ -1,455 +0,0 @@ -use std::{collections::HashMap, sync::Arc}; - -use crate::{proto, B3Digest}; - -use super::inodes::{DirectoryInodeData, InodeData}; - -/// InodeTracker keeps track of inodes, stores data being these inodes and deals -/// with inode allocation. -pub struct InodeTracker { - data: HashMap<u64, Arc<InodeData>>, - - // lookup table for blobs by their B3Digest - blob_digest_to_inode: HashMap<B3Digest, u64>, - - // lookup table for symlinks by their target - symlink_target_to_inode: HashMap<bytes::Bytes, u64>, - - // lookup table for directories by their B3Digest. - // Note the corresponding directory may not be present in data yet. - directory_digest_to_inode: HashMap<B3Digest, u64>, - - // the next inode to allocate - next_inode: u64, -} - -impl Default for InodeTracker { - fn default() -> Self { - Self { - data: Default::default(), - - blob_digest_to_inode: Default::default(), - symlink_target_to_inode: Default::default(), - directory_digest_to_inode: Default::default(), - - next_inode: 2, - } - } -} - -impl InodeTracker { - // Retrieves data for a given inode, if it exists. - pub fn get(&self, ino: u64) -> Option<Arc<InodeData>> { - self.data.get(&ino).cloned() - } - - // Stores data and returns the inode for it. - // In case an inode has already been allocated for the same data, that inode - // is returned, otherwise a new one is allocated. - // In case data is a [InodeData::Directory], inodes for all items are looked - // up - pub fn put(&mut self, data: InodeData) -> u64 { - match data { - InodeData::Regular(ref digest, _, _) => { - match self.blob_digest_to_inode.get(digest) { - Some(found_ino) => { - // We already have it, return the inode. - *found_ino - } - None => self.insert_and_increment(data), - } - } - InodeData::Symlink(ref target) => { - match self.symlink_target_to_inode.get(target) { - Some(found_ino) => { - // We already have it, return the inode. - *found_ino - } - None => self.insert_and_increment(data), - } - } - InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => { - // check the lookup table if the B3Digest is known. - match self.directory_digest_to_inode.get(digest) { - Some(found_ino) => { - // We already have it, return the inode. - *found_ino - } - None => { - // insert and return the inode - self.insert_and_increment(data) - } - } - } - // Inserting [DirectoryInodeData::Populated] usually replaces an - // existing [DirectoryInodeData::Sparse] one. - InodeData::Directory(DirectoryInodeData::Populated(ref digest, ref children)) => { - let dir_ino = self.directory_digest_to_inode.get(digest); - if let Some(dir_ino) = dir_ino { - let dir_ino = *dir_ino; - - // We know the data must exist, as we found it in [directory_digest_to_inode]. - let needs_update = match **self.data.get(&dir_ino).unwrap() { - InodeData::Regular(..) | InodeData::Symlink(_) => { - panic!("unexpected type at inode {}", dir_ino); - } - // already populated, nothing to do - InodeData::Directory(DirectoryInodeData::Populated(..)) => false, - // in case the actual data is sparse, replace it with the populated one. - // this allocates inodes for new children in the process. - InodeData::Directory(DirectoryInodeData::Sparse( - ref old_digest, - ref _old_size, - )) => { - // sanity checking to ensure we update the right node - debug_assert_eq!(old_digest, digest); - - true - } - }; - - if needs_update { - // populate inode fields in children - let children = self.allocate_inodes_for_children(children.to_vec()); - - // update sparse data with populated data - self.data.insert( - dir_ino, - Arc::new(InodeData::Directory(DirectoryInodeData::Populated( - digest.clone(), - children, - ))), - ); - } - - dir_ino - } else { - // populate inode fields in children - let children = self.allocate_inodes_for_children(children.to_vec()); - // insert and return InodeData - self.insert_and_increment(InodeData::Directory(DirectoryInodeData::Populated( - digest.clone(), - children, - ))) - } - } - } - } - - // Consume a list of children with zeroed inodes, and allocate (or fetch existing) inodes. - fn allocate_inodes_for_children( - &mut self, - children: Vec<(u64, proto::node::Node)>, - ) -> Vec<(u64, proto::node::Node)> { - // allocate new inodes for all children - let mut children_new: Vec<(u64, proto::node::Node)> = Vec::new(); - - for (child_ino, ref child_node) in children { - debug_assert_eq!(0, child_ino, "expected child inode to be 0"); - let child_ino = match child_node { - proto::node::Node::Directory(directory_node) => { - // Try putting the sparse data in. If we already have a - // populated version, it'll not update it. - self.put(directory_node.into()) - } - proto::node::Node::File(file_node) => self.put(file_node.into()), - proto::node::Node::Symlink(symlink_node) => self.put(symlink_node.into()), - }; - - children_new.push((child_ino, child_node.clone())) - } - children_new - } - - // Inserts the data and returns the inode it was stored at, while - // incrementing next_inode. - fn insert_and_increment(&mut self, data: InodeData) -> u64 { - let ino = self.next_inode; - // insert into lookup tables - match data { - InodeData::Regular(ref digest, _, _) => { - self.blob_digest_to_inode.insert(digest.clone(), ino); - } - InodeData::Symlink(ref target) => { - self.symlink_target_to_inode.insert(target.clone(), ino); - } - InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => { - self.directory_digest_to_inode.insert(digest.clone(), ino); - } - // This is currently not used outside test fixtures. - // Usually a [DirectoryInodeData::Sparse] is inserted and later - // "upgraded" with more data. - // However, as a future optimization, a lookup for a PathInfo could trigger a - // [DirectoryService::get_recursive()] request that "forks into - // background" and prepopulates all Directories in a closure. - InodeData::Directory(DirectoryInodeData::Populated(ref digest, _)) => { - self.directory_digest_to_inode.insert(digest.clone(), ino); - } - } - // Insert data - self.data.insert(ino, Arc::new(data)); - - // increment inode counter and return old inode. - self.next_inode += 1; - ino - } -} - -#[cfg(test)] -mod tests { - use crate::fuse::inodes::DirectoryInodeData; - use crate::proto; - use crate::tests::fixtures; - - use super::InodeData; - use super::InodeTracker; - - /// Getting something non-existent should be none - #[test] - fn get_nonexistent() { - let inode_tracker = InodeTracker::default(); - assert!(inode_tracker.get(1).is_none()); - } - - /// Put of a regular file should allocate a uid, which should be the same when inserting again. - #[test] - fn put_regular() { - let mut inode_tracker = InodeTracker::default(); - let f = InodeData::Regular( - fixtures::BLOB_A_DIGEST.clone(), - fixtures::BLOB_A.len() as u32, - false, - ); - - // put it in - let ino = inode_tracker.put(f.clone()); - - // a get should return the right data - let data = inode_tracker.get(ino).expect("must be some"); - match *data { - InodeData::Regular(ref digest, _, _) => { - assert_eq!(&fixtures::BLOB_A_DIGEST.clone(), digest); - } - InodeData::Symlink(_) | InodeData::Directory(..) => panic!("wrong type"), - } - - // another put should return the same ino - assert_eq!(ino, inode_tracker.put(f)); - - // inserting another file should return a different ino - assert_ne!( - ino, - inode_tracker.put(InodeData::Regular( - fixtures::BLOB_B_DIGEST.clone(), - fixtures::BLOB_B.len() as u32, - false, - )) - ); - } - - // Put of a symlink should allocate a uid, which should be the same when inserting again - #[test] - fn put_symlink() { - let mut inode_tracker = InodeTracker::default(); - let f = InodeData::Symlink("target".into()); - - // put it in - let ino = inode_tracker.put(f.clone()); - - // a get should return the right data - let data = inode_tracker.get(ino).expect("must be some"); - match *data { - InodeData::Symlink(ref target) => { - assert_eq!(b"target".to_vec(), *target); - } - InodeData::Regular(..) | InodeData::Directory(..) => panic!("wrong type"), - } - - // another put should return the same ino - assert_eq!(ino, inode_tracker.put(f)); - - // inserting another file should return a different ino - assert_ne!(ino, inode_tracker.put(InodeData::Symlink("target2".into()))); - } - - // TODO: put sparse directory - - /// Put a directory into the inode tracker, which refers to a file not seen yet. - #[test] - fn put_directory_leaf() { - let mut inode_tracker = InodeTracker::default(); - - // this is a directory with a single item, a ".keep" file pointing to a 0 bytes blob. - let dir: InodeData = fixtures::DIRECTORY_WITH_KEEP.clone().into(); - - // put it in - let dir_ino = inode_tracker.put(dir); - - // a get should return the right data - let data = inode_tracker.get(dir_ino).expect("must be some"); - match *data { - InodeData::Directory(super::DirectoryInodeData::Sparse(..)) => { - panic!("wrong type"); - } - InodeData::Directory(super::DirectoryInodeData::Populated( - ref directory_digest, - ref children, - )) => { - // ensure the directory digest matches - assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), directory_digest); - - // ensure the child is populated, with a different inode than - // the parent, and the data matches expectations. - assert_eq!(1, children.len()); - let (child_ino, child_node) = children.first().unwrap(); - assert_ne!(dir_ino, *child_ino); - assert_eq!( - &proto::node::Node::File( - fixtures::DIRECTORY_WITH_KEEP.files.first().unwrap().clone() - ), - child_node - ); - - // ensure looking up that inode directly returns the data - let child_data = inode_tracker.get(*child_ino).expect("must exist"); - match *child_data { - InodeData::Regular(ref digest, size, executable) => { - assert_eq!(&fixtures::EMPTY_BLOB_DIGEST.clone(), digest); - assert_eq!(0, size); - assert!(!executable); - } - InodeData::Symlink(_) | InodeData::Directory(..) => panic!("wrong type"), - } - } - InodeData::Symlink(_) | InodeData::Regular(..) => panic!("wrong type"), - } - } - - /// Put a directory into the inode tracker, referring to files, directories - /// and symlinks not seen yet. - #[test] - fn put_directory_complicated() { - let mut inode_tracker = InodeTracker::default(); - - // this is a directory with a single item, a ".keep" file pointing to a 0 bytes blob. - let dir_complicated: InodeData = fixtures::DIRECTORY_COMPLICATED.clone().into(); - - // put it in - let dir_complicated_ino = inode_tracker.put(dir_complicated); - - // a get should return the right data - let dir_data = inode_tracker - .get(dir_complicated_ino) - .expect("must be some"); - - let child_dir_ino = match *dir_data { - InodeData::Directory(DirectoryInodeData::Sparse(..)) => { - panic!("wrong type"); - } - InodeData::Directory(DirectoryInodeData::Populated( - ref directory_digest, - ref children, - )) => { - // assert the directory digest matches - assert_eq!(&fixtures::DIRECTORY_COMPLICATED.digest(), directory_digest); - - // ensure there's three children, all with different inodes - assert_eq!(3, children.len()); - let mut seen_inodes = Vec::from([dir_complicated_ino]); - - // check the first child (.keep) - { - let (child_ino, child_node) = &children[0]; - assert!(!seen_inodes.contains(child_ino)); - assert_eq!( - &proto::node::Node::File(fixtures::DIRECTORY_COMPLICATED.files[0].clone()), - child_node - ); - seen_inodes.push(*child_ino); - } - - // check the second child (aa) - { - let (child_ino, child_node) = &children[1]; - assert!(!seen_inodes.contains(child_ino)); - assert_eq!( - &proto::node::Node::Symlink( - fixtures::DIRECTORY_COMPLICATED.symlinks[0].clone() - ), - child_node - ); - seen_inodes.push(*child_ino); - } - - // check the third child (keep) - { - let (child_ino, child_node) = &children[2]; - assert!(!seen_inodes.contains(child_ino)); - assert_eq!( - &proto::node::Node::Directory( - fixtures::DIRECTORY_COMPLICATED.directories[0].clone() - ), - child_node - ); - seen_inodes.push(*child_ino); - - // return the child_ino - *child_ino - } - } - InodeData::Regular(..) | InodeData::Symlink(_) => panic!("wrong type"), - }; - - // get of the inode for child_ino - let child_dir_data = inode_tracker.get(child_dir_ino).expect("must be some"); - // it should be a sparse InodeData::Directory with the right digest. - match *child_dir_data { - InodeData::Directory(DirectoryInodeData::Sparse( - ref child_dir_digest, - child_dir_size, - )) => { - assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), child_dir_digest); - assert_eq!(fixtures::DIRECTORY_WITH_KEEP.size(), child_dir_size); - } - InodeData::Directory(DirectoryInodeData::Populated(..)) - | InodeData::Regular(..) - | InodeData::Symlink(_) => { - panic!("wrong type") - } - } - - // put DIRECTORY_WITH_KEEP, which should return the same ino as [child_dir_ino], - // but update the sparse object to a populated one at the same time. - let child_dir_ino2 = inode_tracker.put(fixtures::DIRECTORY_WITH_KEEP.clone().into()); - assert_eq!(child_dir_ino, child_dir_ino2); - - // get the data - match *inode_tracker.get(child_dir_ino).expect("must be some") { - // it should be a populated InodeData::Directory with the right digest! - InodeData::Directory(DirectoryInodeData::Populated( - ref directory_digest, - ref children, - )) => { - // ensure the directory digest matches - assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), directory_digest); - - // ensure the child is populated, with a different inode than - // the parent, and the data matches expectations. - assert_eq!(1, children.len()); - let (child_node_inode, child_node) = children.first().unwrap(); - assert_ne!(dir_complicated_ino, *child_node_inode); - assert_eq!( - &proto::node::Node::File( - fixtures::DIRECTORY_WITH_KEEP.files.first().unwrap().clone() - ), - child_node - ); - } - InodeData::Directory(DirectoryInodeData::Sparse(..)) - | InodeData::Regular(..) - | InodeData::Symlink(_) => panic!("wrong type"), - } - } -} - -// TODO: add test inserting a populated one first, then ensure an update doesn't degrade it back to sparse. |