about summary refs log blame commit diff
path: root/tvix/store/src/fs/inode_tracker.rs
blob: daf6b4ee79c233d76e8fc0eea221d8a1c61cec3f (plain) (tree)
1
2
3
4
5

                                           
                                                   

                                     









                                                                                
                                                        




























































































































                                                                                                  

                                                    
                                               
                                                                             



                                                                           
                                                                     



                                                                           

                                                                                              
















                                                                      
                                                                         
























                                                                                         
                                              
                               
                                         


















































                                                                                                   
                                                    







                                                                 
                                                        







                                                                                      
                                                                                 












                                                                                             
                                             



















                                                                                      
                                                 










                                                                                    
                                             

















                                                                                             
                                                                     























                                                                                        
                                                              
                               


                                                                            







                                                               
                                                              
                               
                                                        









                                                                               
                                                              
                               
                                                          



















































                                                                                             
                                                 












                                                                                                           
use std::{collections::HashMap, sync::Arc};

use super::inodes::{DirectoryInodeData, InodeData};
use tvix_castore::proto as castorepb;
use tvix_castore::B3Digest;

/// InodeTracker keeps track of inodes, stores data being these inodes and deals
/// with inode allocation.
pub struct InodeTracker {
    data: HashMap<u64, Arc<InodeData>>,

    // lookup table for blobs by their B3Digest
    blob_digest_to_inode: HashMap<B3Digest, u64>,

    // lookup table for symlinks by their target
    symlink_target_to_inode: HashMap<bytes::Bytes, u64>,

    // lookup table for directories by their B3Digest.
    // Note the corresponding directory may not be present in data yet.
    directory_digest_to_inode: HashMap<B3Digest, u64>,

    // the next inode to allocate
    next_inode: u64,
}

impl Default for InodeTracker {
    fn default() -> Self {
        Self {
            data: Default::default(),

            blob_digest_to_inode: Default::default(),
            symlink_target_to_inode: Default::default(),
            directory_digest_to_inode: Default::default(),

            next_inode: 2,
        }
    }
}

impl InodeTracker {
    // Retrieves data for a given inode, if it exists.
    pub fn get(&self, ino: u64) -> Option<Arc<InodeData>> {
        self.data.get(&ino).cloned()
    }

    // Stores data and returns the inode for it.
    // In case an inode has already been allocated for the same data, that inode
    // is returned, otherwise a new one is allocated.
    // In case data is a [InodeData::Directory], inodes for all items are looked
    // up
    pub fn put(&mut self, data: InodeData) -> u64 {
        match data {
            InodeData::Regular(ref digest, _, _) => {
                match self.blob_digest_to_inode.get(digest) {
                    Some(found_ino) => {
                        // We already have it, return the inode.
                        *found_ino
                    }
                    None => self.insert_and_increment(data),
                }
            }
            InodeData::Symlink(ref target) => {
                match self.symlink_target_to_inode.get(target) {
                    Some(found_ino) => {
                        // We already have it, return the inode.
                        *found_ino
                    }
                    None => self.insert_and_increment(data),
                }
            }
            InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => {
                // check the lookup table if the B3Digest is known.
                match self.directory_digest_to_inode.get(digest) {
                    Some(found_ino) => {
                        // We already have it, return the inode.
                        *found_ino
                    }
                    None => {
                        // insert and return the inode
                        self.insert_and_increment(data)
                    }
                }
            }
            // Inserting [DirectoryInodeData::Populated] usually replaces an
            // existing [DirectoryInodeData::Sparse] one.
            InodeData::Directory(DirectoryInodeData::Populated(ref digest, ref children)) => {
                let dir_ino = self.directory_digest_to_inode.get(digest);
                if let Some(dir_ino) = dir_ino {
                    let dir_ino = *dir_ino;

                    // We know the data must exist, as we found it in [directory_digest_to_inode].
                    let needs_update = match **self.data.get(&dir_ino).unwrap() {
                        InodeData::Regular(..) | InodeData::Symlink(_) => {
                            panic!("unexpected type at inode {}", dir_ino);
                        }
                        // already populated, nothing to do
                        InodeData::Directory(DirectoryInodeData::Populated(..)) => false,
                        // in case the actual data is sparse, replace it with the populated one.
                        // this allocates inodes for new children in the process.
                        InodeData::Directory(DirectoryInodeData::Sparse(
                            ref old_digest,
                            ref _old_size,
                        )) => {
                            // sanity checking to ensure we update the right node
                            debug_assert_eq!(old_digest, digest);

                            true
                        }
                    };

                    if needs_update {
                        // populate inode fields in children
                        let children = self.allocate_inodes_for_children(children.to_vec());

                        // update sparse data with populated data
                        self.data.insert(
                            dir_ino,
                            Arc::new(InodeData::Directory(DirectoryInodeData::Populated(
                                digest.clone(),
                                children,
                            ))),
                        );
                    }

                    dir_ino
                } else {
                    // populate inode fields in children
                    let children = self.allocate_inodes_for_children(children.to_vec());
                    // insert and return InodeData
                    self.insert_and_increment(InodeData::Directory(DirectoryInodeData::Populated(
                        digest.clone(),
                        children,
                    )))
                }
            }
        }
    }

    // Consume a list of children with zeroed inodes, and allocate (or fetch existing) inodes.
    fn allocate_inodes_for_children(
        &mut self,
        children: Vec<(u64, castorepb::node::Node)>,
    ) -> Vec<(u64, castorepb::node::Node)> {
        // allocate new inodes for all children
        let mut children_new: Vec<(u64, castorepb::node::Node)> = Vec::new();

        for (child_ino, ref child_node) in children {
            debug_assert_eq!(0, child_ino, "expected child inode to be 0");
            let child_ino = match child_node {
                castorepb::node::Node::Directory(directory_node) => {
                    // Try putting the sparse data in. If we already have a
                    // populated version, it'll not update it.
                    self.put(directory_node.into())
                }
                castorepb::node::Node::File(file_node) => self.put(file_node.into()),
                castorepb::node::Node::Symlink(symlink_node) => self.put(symlink_node.into()),
            };

            children_new.push((child_ino, child_node.clone()))
        }
        children_new
    }

    // Inserts the data and returns the inode it was stored at, while
    // incrementing next_inode.
    fn insert_and_increment(&mut self, data: InodeData) -> u64 {
        let ino = self.next_inode;
        // insert into lookup tables
        match data {
            InodeData::Regular(ref digest, _, _) => {
                self.blob_digest_to_inode.insert(digest.clone(), ino);
            }
            InodeData::Symlink(ref target) => {
                self.symlink_target_to_inode.insert(target.clone(), ino);
            }
            InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => {
                self.directory_digest_to_inode.insert(digest.clone(), ino);
            }
            // This is currently not used outside test fixtures.
            // Usually a [DirectoryInodeData::Sparse] is inserted and later
            // "upgraded" with more data.
            // However, as a future optimization, a lookup for a PathInfo could trigger a
            // [DirectoryService::get_recursive()] request that "forks into
            // background" and prepopulates all Directories in a closure.
            InodeData::Directory(DirectoryInodeData::Populated(ref digest, _)) => {
                self.directory_digest_to_inode.insert(digest.clone(), ino);
            }
        }
        // Insert data
        self.data.insert(ino, Arc::new(data));

        // increment inode counter and return old inode.
        self.next_inode += 1;
        ino
    }
}

#[cfg(test)]
mod tests {
    use crate::fs::inodes::DirectoryInodeData;
    use crate::tests::fixtures;
    use tvix_castore::proto as castorepb;

    use super::InodeData;
    use super::InodeTracker;

    /// Getting something non-existent should be none
    #[test]
    fn get_nonexistent() {
        let inode_tracker = InodeTracker::default();
        assert!(inode_tracker.get(1).is_none());
    }

    /// Put of a regular file should allocate a uid, which should be the same when inserting again.
    #[test]
    fn put_regular() {
        let mut inode_tracker = InodeTracker::default();
        let f = InodeData::Regular(
            fixtures::BLOB_A_DIGEST.clone(),
            fixtures::BLOB_A.len() as u32,
            false,
        );

        // put it in
        let ino = inode_tracker.put(f.clone());

        // a get should return the right data
        let data = inode_tracker.get(ino).expect("must be some");
        match *data {
            InodeData::Regular(ref digest, _, _) => {
                assert_eq!(&fixtures::BLOB_A_DIGEST.clone(), digest);
            }
            InodeData::Symlink(_) | InodeData::Directory(..) => panic!("wrong type"),
        }

        // another put should return the same ino
        assert_eq!(ino, inode_tracker.put(f));

        // inserting another file should return a different ino
        assert_ne!(
            ino,
            inode_tracker.put(InodeData::Regular(
                fixtures::BLOB_B_DIGEST.clone(),
                fixtures::BLOB_B.len() as u32,
                false,
            ))
        );
    }

    // Put of a symlink should allocate a uid, which should be the same when inserting again
    #[test]
    fn put_symlink() {
        let mut inode_tracker = InodeTracker::default();
        let f = InodeData::Symlink("target".into());

        // put it in
        let ino = inode_tracker.put(f.clone());

        // a get should return the right data
        let data = inode_tracker.get(ino).expect("must be some");
        match *data {
            InodeData::Symlink(ref target) => {
                assert_eq!(b"target".to_vec(), *target);
            }
            InodeData::Regular(..) | InodeData::Directory(..) => panic!("wrong type"),
        }

        // another put should return the same ino
        assert_eq!(ino, inode_tracker.put(f));

        // inserting another file should return a different ino
        assert_ne!(ino, inode_tracker.put(InodeData::Symlink("target2".into())));
    }

    // TODO: put sparse directory

    /// Put a directory into the inode tracker, which refers to a file not seen yet.
    #[test]
    fn put_directory_leaf() {
        let mut inode_tracker = InodeTracker::default();

        // this is a directory with a single item, a ".keep" file pointing to a 0 bytes blob.
        let dir: InodeData = fixtures::DIRECTORY_WITH_KEEP.clone().into();

        // put it in
        let dir_ino = inode_tracker.put(dir);

        // a get should return the right data
        let data = inode_tracker.get(dir_ino).expect("must be some");
        match *data {
            InodeData::Directory(super::DirectoryInodeData::Sparse(..)) => {
                panic!("wrong type");
            }
            InodeData::Directory(super::DirectoryInodeData::Populated(
                ref directory_digest,
                ref children,
            )) => {
                // ensure the directory digest matches
                assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), directory_digest);

                // ensure the child is populated, with a different inode than
                // the parent, and the data matches expectations.
                assert_eq!(1, children.len());
                let (child_ino, child_node) = children.first().unwrap();
                assert_ne!(dir_ino, *child_ino);
                assert_eq!(
                    &castorepb::node::Node::File(
                        fixtures::DIRECTORY_WITH_KEEP.files.first().unwrap().clone()
                    ),
                    child_node
                );

                // ensure looking up that inode directly returns the data
                let child_data = inode_tracker.get(*child_ino).expect("must exist");
                match *child_data {
                    InodeData::Regular(ref digest, size, executable) => {
                        assert_eq!(&fixtures::EMPTY_BLOB_DIGEST.clone(), digest);
                        assert_eq!(0, size);
                        assert!(!executable);
                    }
                    InodeData::Symlink(_) | InodeData::Directory(..) => panic!("wrong type"),
                }
            }
            InodeData::Symlink(_) | InodeData::Regular(..) => panic!("wrong type"),
        }
    }

    /// Put a directory into the inode tracker, referring to files, directories
    /// and symlinks not seen yet.
    #[test]
    fn put_directory_complicated() {
        let mut inode_tracker = InodeTracker::default();

        // this is a directory with a single item, a ".keep" file pointing to a 0 bytes blob.
        let dir_complicated: InodeData = fixtures::DIRECTORY_COMPLICATED.clone().into();

        // put it in
        let dir_complicated_ino = inode_tracker.put(dir_complicated);

        // a get should return the right data
        let dir_data = inode_tracker
            .get(dir_complicated_ino)
            .expect("must be some");

        let child_dir_ino = match *dir_data {
            InodeData::Directory(DirectoryInodeData::Sparse(..)) => {
                panic!("wrong type");
            }
            InodeData::Directory(DirectoryInodeData::Populated(
                ref directory_digest,
                ref children,
            )) => {
                // assert the directory digest matches
                assert_eq!(&fixtures::DIRECTORY_COMPLICATED.digest(), directory_digest);

                // ensure there's three children, all with different inodes
                assert_eq!(3, children.len());
                let mut seen_inodes = Vec::from([dir_complicated_ino]);

                // check the first child (.keep)
                {
                    let (child_ino, child_node) = &children[0];
                    assert!(!seen_inodes.contains(child_ino));
                    assert_eq!(
                        &castorepb::node::Node::File(
                            fixtures::DIRECTORY_COMPLICATED.files[0].clone()
                        ),
                        child_node
                    );
                    seen_inodes.push(*child_ino);
                }

                // check the second child (aa)
                {
                    let (child_ino, child_node) = &children[1];
                    assert!(!seen_inodes.contains(child_ino));
                    assert_eq!(
                        &castorepb::node::Node::Symlink(
                            fixtures::DIRECTORY_COMPLICATED.symlinks[0].clone()
                        ),
                        child_node
                    );
                    seen_inodes.push(*child_ino);
                }

                // check the third child (keep)
                {
                    let (child_ino, child_node) = &children[2];
                    assert!(!seen_inodes.contains(child_ino));
                    assert_eq!(
                        &castorepb::node::Node::Directory(
                            fixtures::DIRECTORY_COMPLICATED.directories[0].clone()
                        ),
                        child_node
                    );
                    seen_inodes.push(*child_ino);

                    // return the child_ino
                    *child_ino
                }
            }
            InodeData::Regular(..) | InodeData::Symlink(_) => panic!("wrong type"),
        };

        // get of the inode for child_ino
        let child_dir_data = inode_tracker.get(child_dir_ino).expect("must be some");
        // it should be a sparse InodeData::Directory with the right digest.
        match *child_dir_data {
            InodeData::Directory(DirectoryInodeData::Sparse(
                ref child_dir_digest,
                child_dir_size,
            )) => {
                assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), child_dir_digest);
                assert_eq!(fixtures::DIRECTORY_WITH_KEEP.size(), child_dir_size);
            }
            InodeData::Directory(DirectoryInodeData::Populated(..))
            | InodeData::Regular(..)
            | InodeData::Symlink(_) => {
                panic!("wrong type")
            }
        }

        // put DIRECTORY_WITH_KEEP, which should return the same ino as [child_dir_ino],
        // but update the sparse object to a populated one at the same time.
        let child_dir_ino2 = inode_tracker.put(fixtures::DIRECTORY_WITH_KEEP.clone().into());
        assert_eq!(child_dir_ino, child_dir_ino2);

        // get the data
        match *inode_tracker.get(child_dir_ino).expect("must be some") {
            // it should be a populated InodeData::Directory with the right digest!
            InodeData::Directory(DirectoryInodeData::Populated(
                ref directory_digest,
                ref children,
            )) => {
                // ensure the directory digest matches
                assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), directory_digest);

                // ensure the child is populated, with a different inode than
                // the parent, and the data matches expectations.
                assert_eq!(1, children.len());
                let (child_node_inode, child_node) = children.first().unwrap();
                assert_ne!(dir_complicated_ino, *child_node_inode);
                assert_eq!(
                    &castorepb::node::Node::File(
                        fixtures::DIRECTORY_WITH_KEEP.files.first().unwrap().clone()
                    ),
                    child_node
                );
            }
            InodeData::Directory(DirectoryInodeData::Sparse(..))
            | InodeData::Regular(..)
            | InodeData::Symlink(_) => panic!("wrong type"),
        }
    }
}

// TODO: add test inserting a populated one first, then ensure an update doesn't degrade it back to sparse.