about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-05-29T12·11+0300
committerflokli <flokli@flokli.de>2023-06-20T10·14+0000
commitb25d98a84e9830c3f800ca8c84d9df09d6b1296d (patch)
tree9f1240fdf3a349e735ad2c62d4e7e230ba12a97d
parent4516cd09c51b7a19707de0a5ba171c9592241a18 (diff)
feat(tvix/store/fuse): initial implementation r/6337
This is a first implementation of a FUSE filesystem, mounting tvix-store
to a given location.

This is mostly meant as one additional lens into a store, and could be
used for builds. It's not meant to be used as a general-purpose thing.

It still has some rough edges:

 - It doesn't implement open/close, so it doesn't use file handles.
   Which means, we need to open blobs for partial reads over and over
   again.
 - It doesn't implement seek, as BlobReader doesn't implement seek yet.
 - It doesn't track "lifetimes" of inodes by listening on forget,
   meaning it might hold more data in memory than necessary.
 - As we don't have store composition (and a caching layer) yet,
   operations might be slow.

Change-Id: Ib1812ed761dfaf6aeb548443ae939c87530b7be8
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8667
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: tazjin <tazjin@tvl.su>
-rw-r--r--tvix/store/src/fuse/file_attr.rs56
-rw-r--r--tvix/store/src/fuse/inode_tracker.rs458
-rw-r--r--tvix/store/src/fuse/inodes.rs78
-rw-r--r--tvix/store/src/fuse/mod.rs417
-rw-r--r--tvix/store/src/fuse/tests.rs797
5 files changed, 1804 insertions, 2 deletions
diff --git a/tvix/store/src/fuse/file_attr.rs b/tvix/store/src/fuse/file_attr.rs
new file mode 100644
index 0000000000..b2b971d9e4
--- /dev/null
+++ b/tvix/store/src/fuse/file_attr.rs
@@ -0,0 +1,56 @@
+use std::time::SystemTime;
+
+use super::inodes::{DirectoryInodeData, InodeData};
+use fuser::FileAttr;
+
+/// The [FileAttr] describing the root
+pub const ROOT_FILE_ATTR: FileAttr = FileAttr {
+    ino: fuser::FUSE_ROOT_ID,
+    size: 0,
+    blksize: 1024,
+    blocks: 0,
+    atime: SystemTime::UNIX_EPOCH,
+    mtime: SystemTime::UNIX_EPOCH,
+    ctime: SystemTime::UNIX_EPOCH,
+    crtime: SystemTime::UNIX_EPOCH,
+    kind: fuser::FileType::Directory,
+    perm: 0o555,
+    nlink: 0,
+    uid: 0,
+    gid: 0,
+    rdev: 0,
+    flags: 0,
+};
+
+/// for given &Node and inode, construct a [FileAttr]
+pub fn gen_file_attr(inode_data: &InodeData, inode: u64) -> FileAttr {
+    FileAttr {
+        ino: inode,
+        size: match inode_data {
+            InodeData::Regular(_, size, _) => *size as u64,
+            InodeData::Symlink(target) => target.len() as u64,
+            InodeData::Directory(DirectoryInodeData::Sparse(_, size)) => *size as u64,
+            InodeData::Directory(DirectoryInodeData::Populated(_, ref children)) => {
+                children.len() as u64
+            }
+        },
+        // FUTUREWORK: play with this numbers, as it affects read sizes for client applications.
+        blksize: 1024,
+        blocks: 0,
+        atime: SystemTime::UNIX_EPOCH,
+        mtime: SystemTime::UNIX_EPOCH,
+        ctime: SystemTime::UNIX_EPOCH,
+        crtime: SystemTime::UNIX_EPOCH,
+        kind: inode_data.into(),
+        perm: match inode_data {
+            InodeData::Regular(..) => 0o444,
+            InodeData::Symlink(_) => 0o444,
+            InodeData::Directory(..) => 0o555,
+        },
+        nlink: 0,
+        uid: 0,
+        gid: 0,
+        rdev: 0,
+        flags: 0,
+    }
+}
diff --git a/tvix/store/src/fuse/inode_tracker.rs b/tvix/store/src/fuse/inode_tracker.rs
new file mode 100644
index 0000000000..06434d25b3
--- /dev/null
+++ b/tvix/store/src/fuse/inode_tracker.rs
@@ -0,0 +1,458 @@
+use std::{collections::HashMap, sync::Arc};
+
+use crate::{proto, B3Digest};
+
+use super::inodes::{DirectoryInodeData, InodeData};
+
+/// InodeTracker keeps track of inodes, stores data being these inodes and deals
+/// with inode allocation.
+pub struct InodeTracker {
+    data: HashMap<u64, Arc<InodeData>>,
+
+    // lookup table for blobs by their B3Digest
+    blob_digest_to_inode: HashMap<B3Digest, u64>,
+
+    // lookup table for symlinks by their target
+    symlink_target_to_inode: HashMap<String, u64>,
+
+    // lookup table for directories by their B3Digest.
+    // Note the corresponding directory may not be present in data yet.
+    directory_digest_to_inode: HashMap<B3Digest, u64>,
+
+    // the next inode to allocate
+    next_inode: u64,
+}
+
+impl Default for InodeTracker {
+    fn default() -> Self {
+        Self {
+            data: Default::default(),
+
+            blob_digest_to_inode: Default::default(),
+            symlink_target_to_inode: Default::default(),
+            directory_digest_to_inode: Default::default(),
+
+            next_inode: 2,
+        }
+    }
+}
+
+impl InodeTracker {
+    // Retrieves data for a given inode, if it exists.
+    pub fn get(&self, ino: u64) -> Option<Arc<InodeData>> {
+        self.data.get(&ino).cloned()
+    }
+
+    // Stores data and returns the inode for it.
+    // In case an inode has already been allocated for the same data, that inode
+    // is returned, otherwise a new one is allocated.
+    // In case data is a [InodeData::Directory], inodes for all items are looked
+    // up
+    pub fn put(&mut self, data: InodeData) -> u64 {
+        match data {
+            InodeData::Regular(ref digest, _, _) => {
+                match self.blob_digest_to_inode.get(digest) {
+                    Some(found_ino) => {
+                        // We already have it, return the inode.
+                        *found_ino
+                    }
+                    None => self.insert_and_increment(data),
+                }
+            }
+            InodeData::Symlink(ref target) => {
+                match self.symlink_target_to_inode.get(target) {
+                    Some(found_ino) => {
+                        // We already have it, return the inode.
+                        *found_ino
+                    }
+                    None => self.insert_and_increment(data),
+                }
+            }
+            InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => {
+                // check the lookup table if the B3Digest is known.
+                match self.directory_digest_to_inode.get(digest) {
+                    Some(found_ino) => {
+                        // We already have it, return the inode.
+                        *found_ino
+                    }
+                    None => {
+                        // insert and return the inode
+                        self.insert_and_increment(data)
+                    }
+                }
+            }
+            // Inserting [DirectoryInodeData::Populated] usually replaces an
+            // existing [DirectoryInodeData::Sparse] one.
+            InodeData::Directory(DirectoryInodeData::Populated(ref digest, ref children)) => {
+                let dir_ino = self.directory_digest_to_inode.get(digest);
+                if let Some(dir_ino) = dir_ino {
+                    let dir_ino = *dir_ino;
+
+                    // We know the data must exist, as we found it in [directory_digest_to_inode].
+                    let needs_update = match **self.data.get(&dir_ino).unwrap() {
+                        InodeData::Regular(..) | InodeData::Symlink(_) => {
+                            panic!("unexpected type at inode {}", dir_ino);
+                        }
+                        // already populated, nothing to do
+                        InodeData::Directory(DirectoryInodeData::Populated(..)) => false,
+                        // in case the actual data is sparse, replace it with the populated one.
+                        // this allocates inodes for new children in the process.
+                        InodeData::Directory(DirectoryInodeData::Sparse(
+                            ref old_digest,
+                            ref _old_size,
+                        )) => {
+                            // sanity checking to ensure we update the right node
+                            debug_assert_eq!(old_digest, digest);
+
+                            true
+                        }
+                    };
+
+                    if needs_update {
+                        // populate inode fields in children
+                        let children = self.allocate_inodes_for_children(children.to_vec());
+
+                        // update sparse data with populated data
+                        self.data.insert(
+                            dir_ino,
+                            Arc::new(InodeData::Directory(DirectoryInodeData::Populated(
+                                digest.clone(),
+                                children,
+                            ))),
+                        );
+                    }
+
+                    dir_ino
+                } else {
+                    // populate inode fields in children
+                    let children = self.allocate_inodes_for_children(children.to_vec());
+                    // insert and return InodeData
+                    self.insert_and_increment(InodeData::Directory(DirectoryInodeData::Populated(
+                        digest.clone(),
+                        children,
+                    )))
+                }
+            }
+        }
+    }
+
+    // Consume a list of children with zeroed inodes, and allocate (or fetch existing) inodes.
+    fn allocate_inodes_for_children(
+        &mut self,
+        children: Vec<(u64, proto::node::Node)>,
+    ) -> Vec<(u64, proto::node::Node)> {
+        // allocate new inodes for all children
+        let mut children_new: Vec<(u64, proto::node::Node)> = Vec::new();
+
+        for (child_ino, ref child_node) in children {
+            debug_assert_eq!(0, child_ino, "expected child inode to be 0");
+            let child_ino = match child_node {
+                proto::node::Node::Directory(directory_node) => {
+                    // Try putting the sparse data in. If we already have a
+                    // populated version, it'll not update it.
+                    self.put(directory_node.into())
+                }
+                proto::node::Node::File(file_node) => self.put(file_node.into()),
+                proto::node::Node::Symlink(symlink_node) => self.put(symlink_node.into()),
+            };
+
+            children_new.push((child_ino, child_node.clone()))
+        }
+        children_new
+    }
+
+    // Inserts the data and returns the inode it was stored at, while
+    // incrementing next_inode.
+    fn insert_and_increment(&mut self, data: InodeData) -> u64 {
+        let ino = self.next_inode;
+        // insert into lookup tables
+        match data {
+            InodeData::Regular(ref digest, _, _) => {
+                self.blob_digest_to_inode.insert(digest.clone(), ino);
+            }
+            InodeData::Symlink(ref target) => {
+                self.symlink_target_to_inode.insert(target.to_string(), ino);
+            }
+            InodeData::Directory(DirectoryInodeData::Sparse(ref digest, _size)) => {
+                self.directory_digest_to_inode.insert(digest.clone(), ino);
+            }
+            // This is currently not used outside test fixtures.
+            // Usually a [DirectoryInodeData::Sparse] is inserted and later
+            // "upgraded" with more data.
+            // However, as a future optimization, a lookup for a PathInfo could trigger a
+            // [DirectoryService::get_recursive()] request that "forks into
+            // background" and prepopulates all Directories in a closure.
+            InodeData::Directory(DirectoryInodeData::Populated(ref digest, _)) => {
+                self.directory_digest_to_inode.insert(digest.clone(), ino);
+            }
+        }
+        // Insert data
+        self.data.insert(ino, Arc::new(data));
+
+        // increment inode counter and return old inode.
+        self.next_inode += 1;
+        ino
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::fuse::inodes::DirectoryInodeData;
+    use crate::proto;
+    use crate::tests::fixtures;
+
+    use super::InodeData;
+    use super::InodeTracker;
+
+    /// Getting something non-existent should be none
+    #[test]
+    fn get_nonexistent() {
+        let inode_tracker = InodeTracker::default();
+        assert!(inode_tracker.get(1).is_none());
+    }
+
+    /// Put of a regular file should allocate a uid, which should be the same when inserting again.
+    #[test]
+    fn put_regular() {
+        let mut inode_tracker = InodeTracker::default();
+        let f = InodeData::Regular(
+            fixtures::BLOB_A_DIGEST.clone(),
+            fixtures::BLOB_A.len() as u32,
+            false,
+        );
+
+        // put it in
+        let ino = inode_tracker.put(f.clone());
+
+        // a get should return the right data
+        let data = inode_tracker.get(ino).expect("must be some");
+        match *data {
+            InodeData::Regular(ref digest, _, _) => {
+                assert_eq!(&fixtures::BLOB_A_DIGEST.clone(), digest);
+            }
+            InodeData::Symlink(_) | InodeData::Directory(..) => panic!("wrong type"),
+        }
+
+        // another put should return the same ino
+        assert_eq!(ino, inode_tracker.put(f));
+
+        // inserting another file should return a different ino
+        assert_ne!(
+            ino,
+            inode_tracker.put(InodeData::Regular(
+                fixtures::BLOB_B_DIGEST.clone(),
+                fixtures::BLOB_B.len() as u32,
+                false,
+            ))
+        );
+    }
+
+    // Put of a symlink should allocate a uid, which should be the same when inserting again
+    #[test]
+    fn put_symlink() {
+        let mut inode_tracker = InodeTracker::default();
+        let f = InodeData::Symlink("target".to_string());
+
+        // put it in
+        let ino = inode_tracker.put(f.clone());
+
+        // a get should return the right data
+        let data = inode_tracker.get(ino).expect("must be some");
+        match *data {
+            InodeData::Symlink(ref target) => {
+                assert_eq!("target", target);
+            }
+            InodeData::Regular(..) | InodeData::Directory(..) => panic!("wrong type"),
+        }
+
+        // another put should return the same ino
+        assert_eq!(ino, inode_tracker.put(f));
+
+        // inserting another file should return a different ino
+        assert_ne!(
+            ino,
+            inode_tracker.put(InodeData::Symlink("target2".to_string()))
+        );
+    }
+
+    // TODO: put sparse directory
+
+    /// Put a directory into the inode tracker, which refers to a file not seen yet.
+    #[test]
+    fn put_directory_leaf() {
+        let mut inode_tracker = InodeTracker::default();
+
+        // this is a directory with a single item, a ".keep" file pointing to a 0 bytes blob.
+        let dir: InodeData = fixtures::DIRECTORY_WITH_KEEP.clone().into();
+
+        // put it in
+        let dir_ino = inode_tracker.put(dir.clone());
+
+        // a get should return the right data
+        let data = inode_tracker.get(dir_ino).expect("must be some");
+        match *data {
+            InodeData::Directory(super::DirectoryInodeData::Sparse(..)) => {
+                panic!("wrong type");
+            }
+            InodeData::Directory(super::DirectoryInodeData::Populated(
+                ref directory_digest,
+                ref children,
+            )) => {
+                // ensure the directory digest matches
+                assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), directory_digest);
+
+                // ensure the child is populated, with a different inode than
+                // the parent, and the data matches expectations.
+                assert_eq!(1, children.len());
+                let (child_ino, child_node) = children.first().unwrap();
+                assert_ne!(dir_ino, *child_ino);
+                assert_eq!(
+                    &proto::node::Node::File(
+                        fixtures::DIRECTORY_WITH_KEEP.files.first().unwrap().clone()
+                    ),
+                    child_node
+                );
+
+                // ensure looking up that inode directly returns the data
+                let child_data = inode_tracker.get(*child_ino).expect("must exist");
+                match *child_data {
+                    InodeData::Regular(ref digest, size, executable) => {
+                        assert_eq!(&fixtures::EMPTY_BLOB_DIGEST.clone(), digest);
+                        assert_eq!(0, size);
+                        assert_eq!(false, executable);
+                    }
+                    InodeData::Symlink(_) | InodeData::Directory(..) => panic!("wrong type"),
+                }
+            }
+            InodeData::Symlink(_) | InodeData::Regular(..) => panic!("wrong type"),
+        }
+    }
+
+    /// Put a directory into the inode tracker, referring to files, directories
+    /// and symlinks not seen yet.
+    #[test]
+    fn put_directory_complicated() {
+        let mut inode_tracker = InodeTracker::default();
+
+        // this is a directory with a single item, a ".keep" file pointing to a 0 bytes blob.
+        let dir_complicated: InodeData = fixtures::DIRECTORY_COMPLICATED.clone().into();
+
+        // put it in
+        let dir_complicated_ino = inode_tracker.put(dir_complicated.clone());
+
+        // a get should return the right data
+        let dir_data = inode_tracker
+            .get(dir_complicated_ino)
+            .expect("must be some");
+
+        let child_dir_ino = match *dir_data {
+            InodeData::Directory(DirectoryInodeData::Sparse(..)) => {
+                panic!("wrong type");
+            }
+            InodeData::Directory(DirectoryInodeData::Populated(
+                ref directory_digest,
+                ref children,
+            )) => {
+                // assert the directory digest matches
+                assert_eq!(&fixtures::DIRECTORY_COMPLICATED.digest(), directory_digest);
+
+                // ensure there's three children, all with different inodes
+                assert_eq!(3, children.len());
+                let mut seen_inodes = Vec::from([dir_complicated_ino]);
+
+                // check the first child (.keep)
+                {
+                    let (child_ino, child_node) = &children[0];
+                    assert!(!seen_inodes.contains(&child_ino));
+                    assert_eq!(
+                        &proto::node::Node::File(fixtures::DIRECTORY_COMPLICATED.files[0].clone()),
+                        child_node
+                    );
+                    seen_inodes.push(*child_ino);
+                }
+
+                // check the second child (aa)
+                {
+                    let (child_ino, child_node) = &children[1];
+                    assert!(!seen_inodes.contains(&child_ino));
+                    assert_eq!(
+                        &proto::node::Node::Symlink(
+                            fixtures::DIRECTORY_COMPLICATED.symlinks[0].clone()
+                        ),
+                        child_node
+                    );
+                    seen_inodes.push(*child_ino);
+                }
+
+                // check the third child (keep)
+                {
+                    let (child_ino, child_node) = &children[2];
+                    assert!(!seen_inodes.contains(&child_ino));
+                    assert_eq!(
+                        &proto::node::Node::Directory(
+                            fixtures::DIRECTORY_COMPLICATED.directories[0].clone()
+                        ),
+                        child_node
+                    );
+                    seen_inodes.push(*child_ino);
+
+                    // return the child_ino
+                    *child_ino
+                }
+            }
+            InodeData::Regular(..) | InodeData::Symlink(_) => panic!("wrong type"),
+        };
+
+        // get of the inode for child_ino
+        let child_dir_data = inode_tracker.get(child_dir_ino).expect("must be some");
+        // it should be a sparse InodeData::Directory with the right digest.
+        match *child_dir_data {
+            InodeData::Directory(DirectoryInodeData::Sparse(
+                ref child_dir_digest,
+                child_dir_size,
+            )) => {
+                assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), child_dir_digest);
+                assert_eq!(fixtures::DIRECTORY_WITH_KEEP.size(), child_dir_size);
+            }
+            InodeData::Directory(DirectoryInodeData::Populated(..))
+            | InodeData::Regular(..)
+            | InodeData::Symlink(_) => {
+                panic!("wrong type")
+            }
+        }
+
+        // put DIRECTORY_WITH_KEEP, which should return the same ino as [child_dir_ino],
+        // but update the sparse object to a populated one at the same time.
+        let child_dir_ino2 = inode_tracker.put(fixtures::DIRECTORY_WITH_KEEP.clone().into());
+        assert_eq!(child_dir_ino, child_dir_ino2);
+
+        // get the data
+        match *inode_tracker.get(child_dir_ino).expect("must be some") {
+            // it should be a populated InodeData::Directory with the right digest!
+            InodeData::Directory(DirectoryInodeData::Populated(
+                ref directory_digest,
+                ref children,
+            )) => {
+                // ensure the directory digest matches
+                assert_eq!(&fixtures::DIRECTORY_WITH_KEEP.digest(), directory_digest);
+
+                // ensure the child is populated, with a different inode than
+                // the parent, and the data matches expectations.
+                assert_eq!(1, children.len());
+                let (child_node_inode, child_node) = children.first().unwrap();
+                assert_ne!(dir_complicated_ino, *child_node_inode);
+                assert_eq!(
+                    &proto::node::Node::File(
+                        fixtures::DIRECTORY_WITH_KEEP.files.first().unwrap().clone()
+                    ),
+                    child_node
+                );
+            }
+            InodeData::Directory(DirectoryInodeData::Sparse(..))
+            | InodeData::Regular(..)
+            | InodeData::Symlink(_) => panic!("wrong type"),
+        }
+    }
+}
+
+// TODO: add test inserting a populated one first, then ensure an update doesn't degrade it back to sparse.
diff --git a/tvix/store/src/fuse/inodes.rs b/tvix/store/src/fuse/inodes.rs
new file mode 100644
index 0000000000..c16509f424
--- /dev/null
+++ b/tvix/store/src/fuse/inodes.rs
@@ -0,0 +1,78 @@
+///! This module contains all the data structures used to track information
+///! about inodes, which present tvix-store nodes in a filesystem.
+use crate::{proto, B3Digest};
+
+#[derive(Clone, Debug)]
+pub enum InodeData {
+    Regular(B3Digest, u32, bool),  // digest, size, executable
+    Symlink(String),               // target
+    Directory(DirectoryInodeData), // either [DirectoryInodeData:Sparse] or [DirectoryInodeData:Populated]
+}
+
+/// This encodes the two different states of [InodeData::Directory].
+/// Either the data still is sparse (we only saw a [proto::DirectoryNode], but
+/// didn't fetch the [proto::Directory] struct yet,
+/// or we processed a lookup and did fetch the data.
+#[derive(Clone, Debug)]
+pub enum DirectoryInodeData {
+    Sparse(B3Digest, u32),                              // digest, size
+    Populated(B3Digest, Vec<(u64, proto::node::Node)>), // [(child_inode, node)]
+}
+
+impl From<&proto::node::Node> for InodeData {
+    fn from(value: &proto::node::Node) -> Self {
+        match value {
+            proto::node::Node::Directory(directory_node) => directory_node.into(),
+            proto::node::Node::File(file_node) => file_node.into(),
+            proto::node::Node::Symlink(symlink_node) => symlink_node.into(),
+        }
+    }
+}
+
+impl From<&proto::SymlinkNode> for InodeData {
+    fn from(value: &proto::SymlinkNode) -> Self {
+        InodeData::Symlink(value.target.clone())
+    }
+}
+
+impl From<&proto::FileNode> for InodeData {
+    fn from(value: &proto::FileNode) -> Self {
+        InodeData::Regular(
+            B3Digest::from_vec(value.digest.clone()).unwrap(),
+            value.size,
+            value.executable,
+        )
+    }
+}
+
+/// Converts a DirectoryNode to a sparsely populated InodeData::Directory.
+impl From<&proto::DirectoryNode> for InodeData {
+    fn from(value: &proto::DirectoryNode) -> Self {
+        InodeData::Directory(DirectoryInodeData::Sparse(
+            B3Digest::from_vec(value.digest.clone()).unwrap(),
+            value.size,
+        ))
+    }
+}
+
+/// converts a proto::Directory to a InodeData::Directory(DirectoryInodeData::Populated(..)).
+/// The inodes for each child are 0, because it's up to the InodeTracker to allocate them.
+impl From<proto::Directory> for InodeData {
+    fn from(value: proto::Directory) -> Self {
+        let digest = value.digest();
+
+        let children: Vec<(u64, proto::node::Node)> = value.nodes().map(|node| (0, node)).collect();
+
+        InodeData::Directory(DirectoryInodeData::Populated(digest, children))
+    }
+}
+
+impl From<&InodeData> for fuser::FileType {
+    fn from(val: &InodeData) -> Self {
+        match val {
+            InodeData::Regular(..) => fuser::FileType::RegularFile,
+            InodeData::Symlink(_) => fuser::FileType::Symlink,
+            InodeData::Directory(..) => fuser::FileType::Directory,
+        }
+    }
+}
diff --git a/tvix/store/src/fuse/mod.rs b/tvix/store/src/fuse/mod.rs
index 7206cf3076..ac0bf29dab 100644
--- a/tvix/store/src/fuse/mod.rs
+++ b/tvix/store/src/fuse/mod.rs
@@ -1,12 +1,74 @@
+mod file_attr;
+mod inode_tracker;
+mod inodes;
+
+#[cfg(test)]
+mod tests;
+
 use crate::{
-    blobservice::BlobService, directoryservice::DirectoryService, pathinfoservice::PathInfoService,
+    blobservice::BlobService,
+    directoryservice::DirectoryService,
+    fuse::{
+        file_attr::gen_file_attr,
+        inodes::{DirectoryInodeData, InodeData},
+    },
+    pathinfoservice::PathInfoService,
+    proto::{node::Node, NamedNode},
+    B3Digest, Error,
 };
+use fuser::{FileAttr, ReplyAttr, Request};
+use nix_compat::store_path::StorePath;
+use std::io::Read;
 use std::sync::Arc;
+use std::{collections::HashMap, time::Duration};
+use tracing::{debug, info_span, warn};
 
+use self::inode_tracker::InodeTracker;
+
+/// This implements a read-only FUSE filesystem for a tvix-store
+/// with the passed [BlobService], [DirectoryService] and [PathInfoService].
+///
+/// We don't allow listing on the root mountpoint (inode 0).
+/// In the future, this might be made configurable once a listing method is
+/// added to [self.path_info_service], and then show all store paths in that
+/// store.
+///
+/// Linux uses inodes in filesystems. When implementing FUSE, most calls are
+/// *for* a given inode.
+///
+/// This means, we need to have a stable mapping of inode numbers to the
+/// corresponding store nodes.
+///
+/// We internally delegate all inode allocation and state keeping to a
+/// [InodeTracker], and store the currently "explored" store paths together with
+/// root inode of the root.
+///
+/// There's some places where inodes are allocated / data inserted into
+/// [self.inode_tracker], if not allocated before already:
+///  - Processing a `lookup` request, either in the mount root, or somewhere
+///    deeper
+///  - Processing a `readdir` request
+///
+///  Things pointing to the same contents get the same inodes, irrespective of
+///  their own location.
+///  This means:
+///  - Symlinks with the same target will get the same inode.
+///  - Regular/executable files with the same contents will get the same inode
+///  - Directories with the same contents will get the same inode.
+///
+/// Due to the above being valid across the whole store, and considering the
+/// merkle structure is a DAG, not a tree, this also means we can't do "bucketed
+/// allocation", aka reserve Directory.size inodes for each PathInfo.
 pub struct FUSE {
     blob_service: Arc<dyn BlobService>,
     directory_service: Arc<dyn DirectoryService>,
     path_info_service: Arc<dyn PathInfoService>,
+
+    /// This maps a given StorePath to the inode we allocated for the root inode.
+    store_paths: HashMap<StorePath, u64>,
+
+    /// This keeps track of inodes and data alongside them.
+    inode_tracker: InodeTracker,
 }
 
 impl FUSE {
@@ -19,8 +81,359 @@ impl FUSE {
             blob_service,
             directory_service,
             path_info_service,
+
+            store_paths: HashMap::default(),
+            inode_tracker: Default::default(),
+        }
+    }
+
+    /// This will turn a lookup request for [std::ffi::OsStr] in the root to
+    /// a ino and [InodeData].
+    /// It will peek in [self.store_paths], and then either look it up from
+    /// [self.inode_tracker],
+    /// or otherwise fetch from [self.path_info_service], and then insert into
+    /// [self.inode_tracker].
+    fn name_in_root_to_ino_and_data(
+        &mut self,
+        name: &std::ffi::OsStr,
+    ) -> Result<Option<(u64, Arc<InodeData>)>, Error> {
+        // parse the name into a [StorePath].
+        let store_path = if let Some(name) = name.to_str() {
+            match StorePath::from_string(name) {
+                Ok(store_path) => store_path,
+                Err(e) => {
+                    debug!(e=?e, "unable to parse as store path");
+                    // This is not an error, but a "ENOENT", as someone can stat
+                    // a file inside the root that's no valid store path
+                    return Ok(None);
+                }
+            }
+        } else {
+            debug!("{name:?} is no string");
+            // same here.
+            return Ok(None);
+        };
+
+        if let Some(ino) = self.store_paths.get(&store_path) {
+            // If we already have that store path, lookup the inode from
+            // self.store_paths and then get the data from [self.inode_tracker],
+            // which in the case of a [InodeData::Directory] will be fully
+            // populated.
+            Ok(Some((
+                *ino,
+                self.inode_tracker.get(*ino).expect("must exist"),
+            )))
+        } else {
+            // If we don't have it, look it up in PathInfoService.
+            match self.path_info_service.get(store_path.digest)? {
+                // the pathinfo doesn't exist, so the file doesn't exist.
+                None => Ok(None),
+                Some(path_info) => {
+                    // The pathinfo does exist, so there must be a root node
+                    let root_node = path_info.node.unwrap().node.unwrap();
+
+                    // The name must match what's passed in the lookup, otherwise we return nothing.
+                    if root_node.get_name() != store_path.to_string() {
+                        return Ok(None);
+                    }
+
+                    // insert the (sparse) inode data and register in
+                    // self.store_paths.
+                    // FUTUREWORK: change put to return the data after
+                    // inserting, so we don't need to lookup a second
+                    // time?
+                    let ino = self.inode_tracker.put((&root_node).into());
+                    self.store_paths.insert(store_path, ino);
+
+                    Ok(Some((ino, self.inode_tracker.get(ino).unwrap())))
+                }
+            }
+        }
+    }
+
+    /// This will lookup a directory by digest, and will turn it into a
+    /// [InodeData::Directory(DirectoryInodeData::Populated(..))].
+    /// This is both used to initially insert the root node of a store path,
+    /// as well as when looking up an intermediate DirectoryNode.
+    fn fetch_directory_inode_data(&self, directory_digest: &B3Digest) -> Result<InodeData, Error> {
+        match self.directory_service.get(directory_digest) {
+            Err(e) => {
+                warn!(e = e.to_string(), directory.digest=%directory_digest, "failed to get directory");
+                Err(e)
+            }
+            // If the Directory can't be found, this is a hole, bail out.
+            Ok(None) => {
+                tracing::error!(directory.digest=%directory_digest, "directory not found in directory service");
+                Err(Error::StorageError(format!(
+                    "directory {} not found",
+                    directory_digest
+                )))
+            }
+            Ok(Some(directory)) => Ok(directory.into()),
         }
     }
 }
 
-impl fuser::Filesystem for FUSE {}
+impl fuser::Filesystem for FUSE {
+    #[tracing::instrument(skip_all, fields(rq.inode = ino))]
+    fn getattr(&mut self, _req: &Request, ino: u64, reply: ReplyAttr) {
+        debug!("getattr");
+
+        if ino == fuser::FUSE_ROOT_ID {
+            reply.attr(&Duration::MAX, &file_attr::ROOT_FILE_ATTR);
+            return;
+        }
+
+        match self.inode_tracker.get(ino) {
+            None => reply.error(libc::ENOENT),
+            Some(node) => {
+                debug!(node = ?node, "found node");
+                reply.attr(&Duration::MAX, &file_attr::gen_file_attr(&node, ino));
+            }
+        }
+    }
+
+    #[tracing::instrument(skip_all, fields(rq.parent_inode = parent_ino, rq.name = ?name))]
+    fn lookup(
+        &mut self,
+        _req: &Request,
+        parent_ino: u64,
+        name: &std::ffi::OsStr,
+        reply: fuser::ReplyEntry,
+    ) {
+        debug!("lookup");
+
+        // This goes from a parent inode to a node.
+        // - If the parent is [fuser::FUSE_ROOT_ID], we need to check
+        //   [self.store_paths] (fetching from PathInfoService if needed)
+        // - Otherwise, lookup the parent in [self.inode_tracker] (which must be
+        //   a [InodeData::Directory]), and find the child with that name.
+        if parent_ino == fuser::FUSE_ROOT_ID {
+            match self.name_in_root_to_ino_and_data(name) {
+                Err(e) => {
+                    warn!("{}", e);
+                    reply.error(libc::EIO);
+                }
+                Ok(None) => {
+                    reply.error(libc::ENOENT);
+                }
+                Ok(Some((ino, inode_data))) => {
+                    warn!(inode_data=?&inode_data, ino=ino, "Some");
+                    reply_with_entry(reply, &gen_file_attr(&inode_data, ino));
+                }
+            }
+        } else {
+            // This is the "lookup for "a" inside inode 42.
+            // We already know that inode 42 must be a directory.
+            // It might not be populated yet, so if it isn't, we do (by
+            // fetching from [self.directory_service]), and save the result in
+            // [self.inode_tracker].
+            // Now it for sure is populated, so we search for that name in the
+            // list of children and return the FileAttrs.
+
+            let parent_data = self.inode_tracker.get(parent_ino).unwrap();
+            let parent_data = match *parent_data {
+                InodeData::Regular(..) | InodeData::Symlink(_) => {
+                    // if the parent inode was not a directory, this doesn't make sense
+                    reply.error(libc::ENOTDIR);
+                    return;
+                }
+                InodeData::Directory(DirectoryInodeData::Sparse(ref parent_digest, _)) => {
+                    match self.fetch_directory_inode_data(parent_digest) {
+                        Ok(new_data) => {
+                            // update data in [self.inode_tracker] with populated variant.
+                            // FUTUREWORK: change put to return the data after
+                            // inserting, so we don't need to lookup a second
+                            // time?
+                            let ino = self.inode_tracker.put(new_data);
+                            self.inode_tracker.get(ino).unwrap()
+                        }
+                        Err(_e) => {
+                            reply.error(libc::EIO);
+                            return;
+                        }
+                    }
+                }
+                InodeData::Directory(DirectoryInodeData::Populated(..)) => parent_data,
+            };
+
+            // now parent_data can only be a [InodeData::Directory(DirectoryInodeData::Populated(..))].
+            let (parent_digest, children) = if let InodeData::Directory(
+                DirectoryInodeData::Populated(ref parent_digest, ref children),
+            ) = *parent_data
+            {
+                (parent_digest, children)
+            } else {
+                panic!("unexpected type")
+            };
+            let span = info_span!("lookup", directory.digest = %parent_digest);
+            let _enter = span.enter();
+
+            // in the children, find the one with the desired name.
+            if let Some((child_ino, _)) = children.iter().find(|e| e.1.get_name() == name) {
+                // lookup the child [InodeData] in [self.inode_tracker].
+                // We know the inodes for children have already been allocated.
+                let child_inode_data = self.inode_tracker.get(*child_ino).unwrap();
+
+                // Reply with the file attributes for the child.
+                // For child directories, we still have all data we need to reply.
+                reply_with_entry(reply, &gen_file_attr(&child_inode_data, *child_ino));
+            } else {
+                // Child not found, return ENOENT.
+                reply.error(libc::ENOENT);
+            }
+        }
+    }
+
+    // TODO: readdirplus?
+
+    #[tracing::instrument(skip_all, fields(rq.inode = ino, rq.offset = offset))]
+    fn readdir(
+        &mut self,
+        _req: &Request<'_>,
+        ino: u64,
+        _fh: u64,
+        offset: i64,
+        mut reply: fuser::ReplyDirectory,
+    ) {
+        debug!("readdir");
+
+        if ino == fuser::FUSE_ROOT_ID {
+            reply.error(libc::EPERM); // same error code as ipfs/kubo
+            return;
+        }
+
+        // lookup the inode data.
+        let dir_inode_data = self.inode_tracker.get(ino).unwrap();
+        let dir_inode_data = match *dir_inode_data {
+            InodeData::Regular(..) | InodeData::Symlink(..) => {
+                warn!("Not a directory");
+                reply.error(libc::ENOTDIR);
+                return;
+            }
+            InodeData::Directory(DirectoryInodeData::Sparse(ref directory_digest, _)) => {
+                match self.fetch_directory_inode_data(directory_digest) {
+                    Ok(new_data) => {
+                        // update data in [self.inode_tracker] with populated variant.
+                        // FUTUREWORK: change put to return the data after
+                        // inserting, so we don't need to lookup a second
+                        // time?
+                        let ino = self.inode_tracker.put(new_data);
+                        self.inode_tracker.get(ino).unwrap()
+                    }
+                    Err(_e) => {
+                        reply.error(libc::EIO);
+                        return;
+                    }
+                }
+            }
+            InodeData::Directory(DirectoryInodeData::Populated(..)) => dir_inode_data,
+        };
+
+        // now parent_data can only be InodeData::Directory(DirectoryInodeData::Populated(..))
+        if let InodeData::Directory(DirectoryInodeData::Populated(ref _digest, ref children)) =
+            *dir_inode_data
+        {
+            for (i, (ino, child_node)) in children.iter().skip(offset as usize).enumerate() {
+                // the second parameter will become the "offset" parameter on the next call.
+                let full = reply.add(
+                    *ino,
+                    offset + i as i64 + 1_i64,
+                    match child_node {
+                        Node::Directory(_) => fuser::FileType::Directory,
+                        Node::File(_) => fuser::FileType::RegularFile,
+                        Node::Symlink(_) => fuser::FileType::Symlink,
+                    },
+                    child_node.get_name(),
+                );
+                if full {
+                    break;
+                }
+            }
+            reply.ok();
+        } else {
+            panic!("unexpected type")
+        }
+    }
+
+    /// TODO: implement open + close?
+
+    #[tracing::instrument(skip_all, fields(rq.inode = ino, rq.offset = offset, rq.size = size))]
+    fn read(
+        &mut self,
+        _req: &Request<'_>,
+        ino: u64,
+        _fh: u64,
+        offset: i64,
+        size: u32,
+        _flags: i32,
+        _lock_owner: Option<u64>,
+        reply: fuser::ReplyData,
+    ) {
+        debug!("read");
+
+        if ino == fuser::FUSE_ROOT_ID {
+            reply.error(libc::ENOSYS);
+            return;
+        }
+        // lookup the inode
+        match *self.inode_tracker.get(ino).unwrap() {
+            // read is invalid on non-files.
+            InodeData::Directory(..) | InodeData::Symlink(_) => {
+                warn!("is directory");
+                reply.error(libc::EISDIR);
+            }
+            InodeData::Regular(ref blob_digest, _blob_size, _) => {
+                let span = info_span!("read", blob.digest = %blob_digest);
+                let _enter = span.enter();
+
+                match self.blob_service.open_read(blob_digest) {
+                    Ok(None) => {
+                        warn!("blob not found");
+                        reply.error(libc::EIO);
+                    }
+                    Err(e) => {
+                        warn!(e=?e, "error opening blob");
+                        reply.error(libc::EIO);
+                    }
+                    Ok(Some(blob_reader)) => {
+                        let data: std::io::Result<Vec<u8>> = blob_reader
+                            .bytes()
+                            // TODO: this is obviously terrible. blobreader should implement seek.
+                            .skip(offset.try_into().unwrap())
+                            .take(size.try_into().unwrap())
+                            .collect();
+
+                        match data {
+                            Ok(data) => {
+                                // respond with the requested data
+                                reply.data(&data);
+                            }
+                            Err(e) => reply.error(e.raw_os_error().unwrap()),
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    #[tracing::instrument(skip_all, fields(rq.inode = ino))]
+    fn readlink(&mut self, _req: &Request<'_>, ino: u64, reply: fuser::ReplyData) {
+        if ino == fuser::FUSE_ROOT_ID {
+            reply.error(libc::ENOSYS);
+            return;
+        }
+
+        // lookup the inode
+        match *self.inode_tracker.get(ino).unwrap() {
+            InodeData::Directory(..) | InodeData::Regular(..) => {
+                reply.error(libc::EINVAL);
+            }
+            InodeData::Symlink(ref target) => reply.data(target.as_bytes()),
+        }
+    }
+}
+
+fn reply_with_entry(reply: fuser::ReplyEntry, file_attr: &FileAttr) {
+    reply.entry(&Duration::MAX, file_attr, 1 /* TODO: generation */);
+}
diff --git a/tvix/store/src/fuse/tests.rs b/tvix/store/src/fuse/tests.rs
new file mode 100644
index 0000000000..6350c8fba4
--- /dev/null
+++ b/tvix/store/src/fuse/tests.rs
@@ -0,0 +1,797 @@
+use std::fs;
+use std::io::Cursor;
+use std::os::unix::prelude::MetadataExt;
+use std::path::Path;
+use std::sync::Arc;
+
+use tempfile::TempDir;
+
+use crate::blobservice::BlobService;
+use crate::directoryservice::DirectoryService;
+use crate::pathinfoservice::PathInfoService;
+use crate::proto::{DirectoryNode, FileNode, PathInfo};
+use crate::tests::fixtures;
+use crate::tests::utils::{gen_blob_service, gen_directory_service, gen_pathinfo_service};
+use crate::{proto, FUSE};
+
+const BLOB_A_NAME: &str = "00000000000000000000000000000000-test";
+const SYMLINK_NAME: &str = "11111111111111111111111111111111-test";
+const SYMLINK_NAME2: &str = "44444444444444444444444444444444-test";
+const DIRECTORY_WITH_KEEP_NAME: &str = "22222222222222222222222222222222-test";
+const DIRECTORY_COMPLICATED_NAME: &str = "33333333333333333333333333333333-test";
+
+fn setup_and_mount<P: AsRef<Path>, F>(
+    mountpoint: P,
+    setup_fn: F,
+) -> Result<fuser::BackgroundSession, std::io::Error>
+where
+    F: Fn(Arc<dyn BlobService>, Arc<dyn DirectoryService>, Arc<dyn PathInfoService>),
+{
+    let blob_service = gen_blob_service();
+    let directory_service = gen_directory_service();
+    let path_info_service = gen_pathinfo_service(blob_service.clone(), directory_service.clone());
+
+    setup_fn(
+        blob_service.clone(),
+        directory_service.clone(),
+        path_info_service.clone(),
+    );
+
+    let fs = FUSE::new(blob_service, directory_service, path_info_service);
+    fuser::spawn_mount2(fs, mountpoint, &[])
+}
+
+fn populate_blob_a(
+    blob_service: Arc<dyn BlobService>,
+    _directory_service: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+) {
+    // Upload BLOB_A
+    let mut bw = blob_service.open_write();
+    std::io::copy(&mut Cursor::new(fixtures::BLOB_A.to_vec()), &mut bw)
+        .expect("must succeed uploading");
+    bw.close().expect("must succeed closing");
+
+    // Create a PathInfo for it
+    let path_info = PathInfo {
+        node: Some(proto::Node {
+            node: Some(proto::node::Node::File(FileNode {
+                name: BLOB_A_NAME.to_string(),
+                digest: fixtures::BLOB_A_DIGEST.to_vec(),
+                size: fixtures::BLOB_A.len() as u32,
+                executable: false,
+            })),
+        }),
+        ..Default::default()
+    };
+    path_info_service.put(path_info).expect("must succeed");
+}
+
+fn populate_symlink(
+    _blob_service: Arc<dyn BlobService>,
+    _directory_service: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+) {
+    // Create a PathInfo for it
+    let path_info = PathInfo {
+        node: Some(proto::Node {
+            node: Some(proto::node::Node::Symlink(proto::SymlinkNode {
+                name: SYMLINK_NAME.to_string(),
+                target: BLOB_A_NAME.to_string(),
+            })),
+        }),
+        ..Default::default()
+    };
+    path_info_service.put(path_info).expect("must succeed");
+}
+
+/// This writes a symlink pointing to /nix/store/somewhereelse,
+/// which is the same symlink target as "aa" inside DIRECTORY_COMPLICATED.
+fn populate_symlink2(
+    _blob_service: Arc<dyn BlobService>,
+    _directory_service: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+) {
+    // Create a PathInfo for it
+    let path_info = PathInfo {
+        node: Some(proto::Node {
+            node: Some(proto::node::Node::Symlink(proto::SymlinkNode {
+                name: SYMLINK_NAME2.to_string(),
+                target: "/nix/store/somewhereelse".to_string(),
+            })),
+        }),
+        ..Default::default()
+    };
+    path_info_service.put(path_info).expect("must succeed");
+}
+
+fn populate_directory_with_keep(
+    blob_service: Arc<dyn BlobService>,
+    directory_service: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+) {
+    // upload empty blob
+    let mut bw = blob_service.open_write();
+    assert_eq!(
+        fixtures::EMPTY_BLOB_DIGEST.to_vec(),
+        bw.close().expect("must succeed closing").to_vec(),
+    );
+
+    // upload directory
+    directory_service
+        .put(fixtures::DIRECTORY_WITH_KEEP.clone())
+        .expect("must succeed uploading");
+
+    // upload pathinfo
+    let path_info = PathInfo {
+        node: Some(proto::Node {
+            node: Some(proto::node::Node::Directory(DirectoryNode {
+                name: DIRECTORY_WITH_KEEP_NAME.to_string(),
+                digest: fixtures::DIRECTORY_WITH_KEEP.digest().to_vec(),
+                size: fixtures::DIRECTORY_WITH_KEEP.size(),
+            })),
+        }),
+        ..Default::default()
+    };
+    path_info_service.put(path_info).expect("must succeed");
+}
+
+/// Insert [PathInfo] for DIRECTORY_WITH_KEEP, but don't provide the Directory
+/// itself.
+fn populate_pathinfo_without_directory(
+    _: Arc<dyn BlobService>,
+    _: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+) {
+    // upload pathinfo
+    let path_info = PathInfo {
+        node: Some(proto::Node {
+            node: Some(proto::node::Node::Directory(DirectoryNode {
+                name: DIRECTORY_WITH_KEEP_NAME.to_string(),
+                digest: fixtures::DIRECTORY_WITH_KEEP.digest().to_vec(),
+                size: fixtures::DIRECTORY_WITH_KEEP.size(),
+            })),
+        }),
+        ..Default::default()
+    };
+    path_info_service.put(path_info).expect("must succeed");
+}
+
+/// Insert , but don't provide the blob .keep is pointing to
+fn populate_blob_a_without_blob(
+    _: Arc<dyn BlobService>,
+    _: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+) {
+    // Create a PathInfo for blob A
+    let path_info = PathInfo {
+        node: Some(proto::Node {
+            node: Some(proto::node::Node::File(FileNode {
+                name: BLOB_A_NAME.to_string(),
+                digest: fixtures::BLOB_A_DIGEST.to_vec(),
+                size: fixtures::BLOB_A.len() as u32,
+                executable: false,
+            })),
+        }),
+        ..Default::default()
+    };
+    path_info_service.put(path_info).expect("must succeed");
+}
+
+fn populate_directory_complicated(
+    blob_service: Arc<dyn BlobService>,
+    directory_service: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+) {
+    // upload empty blob
+    let mut bw = blob_service.open_write();
+    assert_eq!(
+        fixtures::EMPTY_BLOB_DIGEST.to_vec(),
+        bw.close().expect("must succeed closing").to_vec(),
+    );
+
+    // upload inner directory
+    directory_service
+        .put(fixtures::DIRECTORY_WITH_KEEP.clone())
+        .expect("must succeed uploading");
+
+    // uplodad parent directory
+    directory_service
+        .put(fixtures::DIRECTORY_COMPLICATED.clone())
+        .expect("must succeed uploading");
+
+    // upload pathinfo
+    let path_info = PathInfo {
+        node: Some(proto::Node {
+            node: Some(proto::node::Node::Directory(DirectoryNode {
+                name: DIRECTORY_COMPLICATED_NAME.to_string(),
+                digest: fixtures::DIRECTORY_COMPLICATED.digest().to_vec(),
+                size: fixtures::DIRECTORY_COMPLICATED.size(),
+            })),
+        }),
+        ..Default::default()
+    };
+    path_info_service.put(path_info).expect("must succeed");
+}
+
+/// Ensure mounting itself doesn't fail
+#[test]
+fn mount() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), |_, _, _| {}).expect("must succeed");
+
+    fuser_session.join()
+}
+
+/// Ensure listing the root isn't allowed
+#[test]
+fn root() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), |_, _, _| {}).expect("must succeed");
+
+    {
+        // read_dir succeeds, but getting the first element will fail.
+        let mut it = fs::read_dir(tmpdir).expect("must succeed");
+
+        let err = it.next().expect("must be some").expect_err("must be err");
+        assert_eq!(std::io::ErrorKind::PermissionDenied, err.kind());
+    }
+
+    fuser_session.join()
+}
+
+/// Ensure we can stat a file at the root
+#[test]
+fn stat_file_at_root() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), populate_blob_a).expect("must succeed");
+
+    let p = tmpdir.path().join(BLOB_A_NAME);
+
+    // peek at the file metadata
+    let metadata = fs::metadata(p).expect("must succeed");
+
+    assert!(metadata.is_file());
+    assert!(metadata.permissions().readonly());
+    assert_eq!(fixtures::BLOB_A.len() as u64, metadata.len());
+
+    fuser_session.join()
+}
+
+/// Ensure we can read a file at the root
+#[test]
+fn read_file_at_root() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), populate_blob_a).expect("must succeed");
+
+    let p = tmpdir.path().join(BLOB_A_NAME);
+
+    // read the file contents
+    let data = fs::read(p).expect("must succeed");
+
+    // ensure size and contents match
+    assert_eq!(fixtures::BLOB_A.len(), data.len());
+    assert_eq!(fixtures::BLOB_A.to_vec(), data);
+
+    fuser_session.join()
+}
+
+/// Read the target of a symlink
+#[test]
+fn symlink_readlink() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), populate_symlink).expect("must succeed");
+    let p = tmpdir.path().join(SYMLINK_NAME);
+
+    let target = fs::read_link(&p).expect("must succeed");
+    assert_eq!(BLOB_A_NAME, target.to_str().unwrap());
+
+    // peek at the file metadata, which follows symlinks.
+    // this must fail, as we didn't populate the target.
+    let e = fs::metadata(&p).expect_err("must fail");
+    assert_eq!(std::io::ErrorKind::NotFound, e.kind());
+
+    // peeking at the file metadata without following symlinks will succeed.
+    let metadata = fs::symlink_metadata(&p).expect("must succeed");
+    assert!(metadata.is_symlink());
+
+    // reading from the symlink (which follows) will fail, because the target doesn't exist.
+    let e = fs::read(p).expect_err("must fail");
+    assert_eq!(std::io::ErrorKind::NotFound, e.kind());
+
+    fuser_session.join()
+}
+
+/// Read and stat a regular file through a symlink pointing to it.
+#[test]
+fn read_stat_through_symlink() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), |bs: Arc<_>, ds: Arc<_>, ps: Arc<_>| {
+        populate_blob_a(bs.clone(), ds.clone(), ps.clone());
+        populate_symlink(bs, ds, ps);
+    })
+    .expect("must succeed");
+
+    let p_symlink = tmpdir.path().join(SYMLINK_NAME);
+    let p_blob = tmpdir.path().join(SYMLINK_NAME);
+
+    // peek at the file metadata, which follows symlinks.
+    // this must now return the same metadata as when statting at the target directly.
+    let metadata_symlink = fs::metadata(&p_symlink).expect("must succeed");
+    let metadata_blob = fs::metadata(&p_blob).expect("must succeed");
+    assert_eq!(metadata_blob.file_type(), metadata_symlink.file_type());
+    assert_eq!(metadata_blob.len(), metadata_symlink.len());
+
+    // reading from the symlink (which follows) will return the same data as if
+    // we were reading from the file directly.
+    assert_eq!(
+        std::fs::read(p_blob).expect("must succeed"),
+        std::fs::read(p_symlink).expect("must succeed"),
+    );
+
+    fuser_session.join()
+}
+
+/// Read a directory in the root, and validate some attributes.
+#[test]
+fn read_stat_directory() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session =
+        setup_and_mount(tmpdir.path(), populate_directory_with_keep).expect("must succeed");
+
+    let p = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME);
+
+    // peek at the metadata of the directory
+    let metadata = fs::metadata(&p).expect("must succeed");
+    assert!(metadata.is_dir());
+    assert!(metadata.permissions().readonly());
+
+    fuser_session.join()
+}
+
+#[test]
+/// Read a blob inside a directory. This ensures we successfully populate directory data.
+fn read_blob_inside_dir() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session =
+        setup_and_mount(tmpdir.path(), populate_directory_with_keep).expect("must succeed");
+
+    let p = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME).join(".keep");
+
+    // peek at metadata.
+    let metadata = fs::metadata(&p).expect("must succeed");
+    assert!(metadata.is_file());
+    assert!(metadata.permissions().readonly());
+
+    // read from it
+    let data = fs::read(&p).expect("must succeed");
+    assert_eq!(fixtures::EMPTY_BLOB_CONTENTS.to_vec(), data);
+
+    fuser_session.join()
+}
+
+#[test]
+/// Read a blob inside a directory inside a directory. This ensures we properly
+/// populate directories as we traverse down the structure.
+fn read_blob_deep_inside_dir() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session =
+        setup_and_mount(tmpdir.path(), populate_directory_complicated).expect("must succeed");
+
+    let p = tmpdir
+        .path()
+        .join(DIRECTORY_COMPLICATED_NAME)
+        .join("keep")
+        .join(".keep");
+
+    // peek at metadata.
+    let metadata = fs::metadata(&p).expect("must succeed");
+    assert!(metadata.is_file());
+    assert!(metadata.permissions().readonly());
+
+    // read from it
+    let data = fs::read(&p).expect("must succeed");
+    assert_eq!(fixtures::EMPTY_BLOB_CONTENTS.to_vec(), data);
+
+    fuser_session.join()
+}
+
+/// Ensure readdir works.
+#[test]
+fn readdir() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session =
+        setup_and_mount(tmpdir.path(), populate_directory_complicated).expect("must succeed");
+
+    let p = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME);
+
+    {
+        // read_dir should succeed. Collect all elements
+        let elements: Vec<_> = fs::read_dir(p)
+            .expect("must succeed")
+            .map(|e| e.expect("must not be err"))
+            .collect();
+
+        assert_eq!(3, elements.len(), "number of elements should be 3"); // rust skips . and ..
+
+        // We explicitly look at specific positions here, because we always emit
+        // them ordered.
+
+        // ".keep", 0 byte file.
+        let e = &elements[0];
+        assert_eq!(".keep", e.file_name());
+        assert!(e.file_type().expect("must succeed").is_file());
+        assert_eq!(0, e.metadata().expect("must succeed").len());
+
+        // "aa", symlink.
+        let e = &elements[1];
+        assert_eq!("aa", e.file_name());
+        assert!(e.file_type().expect("must succeed").is_symlink());
+
+        // "keep", directory
+        let e = &elements[2];
+        assert_eq!("keep", e.file_name());
+        assert!(e.file_type().expect("must succeed").is_dir());
+    }
+
+    fuser_session.join()
+}
+
+#[test]
+/// Do a readdir deeper inside a directory, without doing readdir or stat in the parent directory.
+fn readdir_deep() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session =
+        setup_and_mount(tmpdir.path(), populate_directory_complicated).expect("must succeed");
+
+    let p = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME).join("keep");
+
+    {
+        // read_dir should succeed. Collect all elements
+        let elements: Vec<_> = fs::read_dir(p)
+            .expect("must succeed")
+            .map(|e| e.expect("must not be err"))
+            .collect();
+
+        assert_eq!(1, elements.len(), "number of elements should be 1"); // rust skips . and ..
+
+        // ".keep", 0 byte file.
+        let e = &elements[0];
+        assert_eq!(".keep", e.file_name());
+        assert!(e.file_type().expect("must succeed").is_file());
+        assert_eq!(0, e.metadata().expect("must succeed").len());
+    }
+
+    fuser_session.join()
+}
+
+/// Check attributes match how they show up in /nix/store normally.
+#[test]
+fn check_attributes() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), |bs: Arc<_>, ds: Arc<_>, ps: Arc<_>| {
+        populate_blob_a(bs.clone(), ds.clone(), ps.clone());
+        populate_directory_with_keep(bs.clone(), ds.clone(), ps.clone());
+        populate_symlink(bs, ds, ps);
+    })
+    .expect("must succeed");
+
+    let p_file = tmpdir.path().join(BLOB_A_NAME);
+    let p_directory = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME);
+    let p_symlink = tmpdir.path().join(SYMLINK_NAME);
+
+    // peek at metadata. We use symlink_metadata to ensure we don't traverse a symlink by accident.
+    let metadata_file = fs::symlink_metadata(&p_file).expect("must succeed");
+    let metadata_directory = fs::symlink_metadata(&p_directory).expect("must succeed");
+    let metadata_symlink = fs::symlink_metadata(&p_symlink).expect("must succeed");
+
+    // modes should match. We & with 0o777 to remove any higher bits.
+    assert_eq!(0o444, metadata_file.mode() & 0o777);
+    assert_eq!(0o555, metadata_directory.mode() & 0o777);
+    assert_eq!(0o444, metadata_symlink.mode() & 0o777);
+
+    // files should have the correct filesize
+    assert_eq!(fixtures::BLOB_A.len() as u64, metadata_file.len());
+    // directories should have their "size" as filesize
+    assert_eq!(
+        fixtures::DIRECTORY_WITH_KEEP.size() as u64,
+        metadata_directory.size()
+    );
+
+    for metadata in &[&metadata_file, &metadata_directory, &metadata_symlink] {
+        // uid and gid should be 0.
+        assert_eq!(0, metadata.uid());
+        assert_eq!(0, metadata.gid());
+
+        // all times should be set to the unix epoch.
+        assert_eq!(0, metadata.atime());
+        assert_eq!(0, metadata.mtime());
+        assert_eq!(0, metadata.ctime());
+        // crtime seems MacOS only
+    }
+
+    fuser_session.join()
+}
+
+#[test]
+/// Ensure we allocate the same inodes for the same directory contents.
+/// $DIRECTORY_COMPLICATED_NAME/keep contains the same data as $DIRECTORY_WITH_KEEP.
+fn compare_inodes_directories() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), |bs: Arc<_>, ds: Arc<_>, ps: Arc<_>| {
+        populate_directory_with_keep(bs.clone(), ds.clone(), ps.clone());
+        populate_directory_complicated(bs, ds, ps);
+    })
+    .expect("must succeed");
+
+    let p_dir_with_keep = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME);
+    let p_sibling_dir = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME).join("keep");
+
+    // peek at metadata.
+    assert_eq!(
+        fs::metadata(&p_dir_with_keep).expect("must succeed").ino(),
+        fs::metadata(&p_sibling_dir).expect("must succeed").ino()
+    );
+
+    fuser_session.join()
+}
+
+/// Ensure we allocate the same inodes for the same directory contents.
+/// $DIRECTORY_COMPLICATED_NAME/keep/,keep contains the same data as $DIRECTORY_COMPLICATED_NAME/.keep
+#[test]
+fn compare_inodes_files() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session =
+        setup_and_mount(tmpdir.path(), populate_directory_complicated).expect("must succeed");
+
+    let p_keep1 = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME).join(".keep");
+    let p_keep2 = tmpdir
+        .path()
+        .join(DIRECTORY_COMPLICATED_NAME)
+        .join("keep")
+        .join(".keep");
+
+    // peek at metadata.
+    assert_eq!(
+        fs::metadata(&p_keep1).expect("must succeed").ino(),
+        fs::metadata(&p_keep2).expect("must succeed").ino()
+    );
+
+    fuser_session.join()
+}
+
+/// Ensure we allocate the same inode for symlinks pointing to the same targets.
+/// $DIRECTORY_COMPLICATED_NAME/aa points to the same target as SYMLINK_NAME2.
+#[test]
+fn compare_inodes_symlinks() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), |bs: Arc<_>, ds: Arc<_>, ps: Arc<_>| {
+        populate_directory_complicated(bs.clone(), ds.clone(), ps.clone());
+        populate_symlink2(bs, ds, ps);
+    })
+    .expect("must succeed");
+
+    let p1 = tmpdir.path().join(DIRECTORY_COMPLICATED_NAME).join("aa");
+    let p2 = tmpdir.path().join(SYMLINK_NAME2);
+
+    // peek at metadata.
+    assert_eq!(
+        fs::symlink_metadata(&p1).expect("must succeed").ino(),
+        fs::symlink_metadata(&p2).expect("must succeed").ino()
+    );
+
+    fuser_session.join()
+}
+
+/// Check we match paths exactly.
+#[test]
+fn read_wrong_paths_in_root() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), populate_blob_a).expect("must succeed");
+
+    // wrong name
+    assert!(!tmpdir
+        .path()
+        .join("00000000000000000000000000000000-tes")
+        .exists());
+
+    // invalid hash
+    assert!(!tmpdir
+        .path()
+        .join("0000000000000000000000000000000-test")
+        .exists());
+
+    // right name, must exist
+    assert!(tmpdir
+        .path()
+        .join("00000000000000000000000000000000-test")
+        .exists());
+
+    // now wrong name with right hash still may not exist
+    assert!(!tmpdir
+        .path()
+        .join("00000000000000000000000000000000-tes")
+        .exists());
+
+    fuser_session.join()
+}
+
+/// Make sure writes are not allowed
+#[test]
+fn disallow_writes() {
+    // https://plume.benboeckel.net/~/JustAnotherBlog/skipping-tests-in-rust
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session = setup_and_mount(tmpdir.path(), |_, _, _| {}).expect("must succeed");
+
+    let p = tmpdir.path().join(BLOB_A_NAME);
+    let e = std::fs::File::create(&p).expect_err("must fail");
+
+    assert_eq!(std::io::ErrorKind::Unsupported, e.kind());
+
+    fuser_session.join()
+}
+
+#[test]
+/// Ensure we get an IO error if the directory service does not have the Directory object.
+fn missing_directory() {
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session =
+        setup_and_mount(tmpdir.path(), populate_pathinfo_without_directory).expect("must succeed");
+
+    let p = tmpdir.path().join(DIRECTORY_WITH_KEEP_NAME);
+
+    {
+        // `stat` on the path should succeed, because it doesn't trigger the directory request.
+        fs::metadata(&p).expect("must succeed");
+
+        // However, calling either `readdir` or `stat` on a child should fail with an IO error.
+        // It fails when trying to pull the first entry, because we don't implement opendir separately
+        fs::read_dir(&p)
+            .unwrap()
+            .into_iter()
+            .next()
+            .expect("must be some")
+            .expect_err("must be err");
+
+        // rust currently sets e.kind() to Uncategorized, which isn't very
+        // helpful, so we don't look at the error more closely than that..
+        fs::metadata(p.join(".keep")).expect_err("must fail");
+    }
+
+    fuser_session.join()
+}
+
+#[test]
+/// Ensure we get an IO error if the blob service does not have the blob
+fn missing_blob() {
+    if !std::path::Path::new("/dev/fuse").exists() {
+        eprintln!("skipping test");
+        return;
+    }
+    let tmpdir = TempDir::new().unwrap();
+
+    let fuser_session =
+        setup_and_mount(tmpdir.path(), populate_blob_a_without_blob).expect("must succeed");
+
+    let p = tmpdir.path().join(BLOB_A_NAME);
+
+    {
+        // `stat` on the blob should succeed, because it doesn't trigger a request to the blob service.
+        fs::metadata(&p).expect("must succeed");
+
+        // However, calling read on the blob should fail.
+        // rust currently sets e.kind() to Uncategorized, which isn't very
+        // helpful, so we don't look at the error more closely than that..
+        fs::read(p).expect_err("must fail");
+    }
+
+    fuser_session.join()
+}