about summary refs log tree commit diff
path: root/tvix/castore/src/proto
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-09-21T19·32+0300
committerclbot <clbot@tvl.fyi>2023-09-22T12·51+0000
commit32f41458c0a0f62bf906021ef096c465ccc45581 (patch)
tree3aaab8c453871f39c46fb43f8278aa933b24519d /tvix/castore/src/proto
parentd8ef0cfb4a859af7e33828b013356412d02532da (diff)
refactor(tvix): move castore into tvix-castore crate r/6629
This splits the pure content-addressed layers from tvix-store into a
`castore` crate, and only leaves PathInfo related things, as well as the
CLI entrypoint in the tvix-store crate.

Notable changes:
 - `fixtures` and `utils` had to be moved out of the `test` cfg, so they
   can be imported from tvix-store.
 - Some ad-hoc fixtures in the test were moved to proper fixtures in the
   same step.
 - The protos are now created by a (more static) recipe in the protos/
   directory.

The (now two) golang targets are commented out, as it's not possible to
update them properly in the same CL. This will be done by a followup CL
once this is merged (and whitby deployed)

Bug: https://b.tvl.fyi/issues/301

Change-Id: I8d675d4bf1fb697eb7d479747c1b1e3635718107
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9370
Reviewed-by: tazjin <tazjin@tvl.su>
Reviewed-by: flokli <flokli@flokli.de>
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Diffstat (limited to 'tvix/castore/src/proto')
-rw-r--r--tvix/castore/src/proto/grpc_blobservice_wrapper.rs177
-rw-r--r--tvix/castore/src/proto/grpc_directoryservice_wrapper.rs184
-rw-r--r--tvix/castore/src/proto/mod.rs279
-rw-r--r--tvix/castore/src/proto/tests/directory.rs287
-rw-r--r--tvix/castore/src/proto/tests/directory_nodes_iterator.rs78
-rw-r--r--tvix/castore/src/proto/tests/grpc_blobservice.rs100
-rw-r--r--tvix/castore/src/proto/tests/grpc_directoryservice.rs239
-rw-r--r--tvix/castore/src/proto/tests/mod.rs4
8 files changed, 1348 insertions, 0 deletions
diff --git a/tvix/castore/src/proto/grpc_blobservice_wrapper.rs b/tvix/castore/src/proto/grpc_blobservice_wrapper.rs
new file mode 100644
index 0000000000..93db1deef6
--- /dev/null
+++ b/tvix/castore/src/proto/grpc_blobservice_wrapper.rs
@@ -0,0 +1,177 @@
+use crate::blobservice::BlobService;
+use core::pin::pin;
+use futures::TryFutureExt;
+use std::{
+    collections::VecDeque,
+    io,
+    ops::{Deref, DerefMut},
+    pin::Pin,
+    sync::Arc,
+};
+use tokio_stream::StreamExt;
+use tokio_util::io::ReaderStream;
+use tonic::{async_trait, Request, Response, Status, Streaming};
+use tracing::{instrument, warn};
+
+pub struct GRPCBlobServiceWrapper {
+    blob_service: Arc<dyn BlobService>,
+}
+
+impl From<Arc<dyn BlobService>> for GRPCBlobServiceWrapper {
+    fn from(value: Arc<dyn BlobService>) -> Self {
+        Self {
+            blob_service: value,
+        }
+    }
+}
+
+// This is necessary because bytes::BytesMut comes up with
+// a default 64 bytes capacity that cannot be changed
+// easily if you assume a bytes::BufMut trait implementation
+// Therefore, we override the Default implementation here
+// TODO(raitobezarius?): upstream me properly
+struct BytesMutWithDefaultCapacity<const N: usize> {
+    inner: bytes::BytesMut,
+}
+
+impl<const N: usize> Deref for BytesMutWithDefaultCapacity<N> {
+    type Target = bytes::BytesMut;
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+
+impl<const N: usize> DerefMut for BytesMutWithDefaultCapacity<N> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.inner
+    }
+}
+
+impl<const N: usize> Default for BytesMutWithDefaultCapacity<N> {
+    fn default() -> Self {
+        BytesMutWithDefaultCapacity {
+            inner: bytes::BytesMut::with_capacity(N),
+        }
+    }
+}
+
+impl<const N: usize> bytes::Buf for BytesMutWithDefaultCapacity<N> {
+    fn remaining(&self) -> usize {
+        self.inner.remaining()
+    }
+
+    fn chunk(&self) -> &[u8] {
+        self.inner.chunk()
+    }
+
+    fn advance(&mut self, cnt: usize) {
+        self.inner.advance(cnt);
+    }
+}
+
+unsafe impl<const N: usize> bytes::BufMut for BytesMutWithDefaultCapacity<N> {
+    fn remaining_mut(&self) -> usize {
+        self.inner.remaining_mut()
+    }
+
+    unsafe fn advance_mut(&mut self, cnt: usize) {
+        self.inner.advance_mut(cnt);
+    }
+
+    fn chunk_mut(&mut self) -> &mut bytes::buf::UninitSlice {
+        self.inner.chunk_mut()
+    }
+}
+
+#[async_trait]
+impl super::blob_service_server::BlobService for GRPCBlobServiceWrapper {
+    // https://github.com/tokio-rs/tokio/issues/2723#issuecomment-1534723933
+    type ReadStream =
+        Pin<Box<dyn futures::Stream<Item = Result<super::BlobChunk, Status>> + Send + 'static>>;
+
+    #[instrument(skip(self))]
+    async fn stat(
+        &self,
+        request: Request<super::StatBlobRequest>,
+    ) -> Result<Response<super::BlobMeta>, Status> {
+        let rq = request.into_inner();
+        let req_digest = rq
+            .digest
+            .try_into()
+            .map_err(|_e| Status::invalid_argument("invalid digest length"))?;
+
+        match self.blob_service.has(&req_digest).await {
+            Ok(true) => Ok(Response::new(super::BlobMeta::default())),
+            Ok(false) => Err(Status::not_found(format!("blob {} not found", &req_digest))),
+            Err(e) => Err(e.into()),
+        }
+    }
+
+    #[instrument(skip(self))]
+    async fn read(
+        &self,
+        request: Request<super::ReadBlobRequest>,
+    ) -> Result<Response<Self::ReadStream>, Status> {
+        let rq = request.into_inner();
+
+        let req_digest = rq
+            .digest
+            .try_into()
+            .map_err(|_e| Status::invalid_argument("invalid digest length"))?;
+
+        match self.blob_service.open_read(&req_digest).await {
+            Ok(Some(reader)) => {
+                fn stream_mapper(
+                    x: Result<bytes::Bytes, io::Error>,
+                ) -> Result<super::BlobChunk, Status> {
+                    match x {
+                        Ok(bytes) => Ok(super::BlobChunk { data: bytes }),
+                        Err(e) => Err(Status::from(e)),
+                    }
+                }
+
+                let chunks_stream = ReaderStream::new(reader).map(stream_mapper);
+                Ok(Response::new(Box::pin(chunks_stream)))
+            }
+            Ok(None) => Err(Status::not_found(format!("blob {} not found", &req_digest))),
+            Err(e) => Err(e.into()),
+        }
+    }
+
+    #[instrument(skip(self))]
+    async fn put(
+        &self,
+        request: Request<Streaming<super::BlobChunk>>,
+    ) -> Result<Response<super::PutBlobResponse>, Status> {
+        let req_inner = request.into_inner();
+
+        let data_stream = req_inner.map(|x| {
+            x.map(|x| VecDeque::from(x.data.to_vec()))
+                .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e))
+        });
+
+        let mut data_reader = tokio_util::io::StreamReader::new(data_stream);
+
+        let mut blob_writer = pin!(self.blob_service.open_write().await);
+
+        tokio::io::copy(&mut data_reader, &mut blob_writer)
+            .await
+            .map_err(|e| {
+                warn!("error copying: {}", e);
+                Status::internal("error copying")
+            })?;
+
+        let digest = blob_writer
+            .close()
+            .map_err(|e| {
+                warn!("error closing stream: {}", e);
+                Status::internal("error closing stream")
+            })
+            .await?
+            .to_vec();
+
+        Ok(Response::new(super::PutBlobResponse {
+            digest: digest.into(),
+        }))
+    }
+}
diff --git a/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs
new file mode 100644
index 0000000000..5e143a7bd7
--- /dev/null
+++ b/tvix/castore/src/proto/grpc_directoryservice_wrapper.rs
@@ -0,0 +1,184 @@
+use crate::proto;
+use crate::{directoryservice::DirectoryService, B3Digest};
+use futures::StreamExt;
+use std::collections::HashMap;
+use std::sync::Arc;
+use tokio::{sync::mpsc::channel, task};
+use tokio_stream::wrappers::ReceiverStream;
+use tonic::{async_trait, Request, Response, Status, Streaming};
+use tracing::{debug, instrument, warn};
+
+pub struct GRPCDirectoryServiceWrapper {
+    directory_service: Arc<dyn DirectoryService>,
+}
+
+impl From<Arc<dyn DirectoryService>> for GRPCDirectoryServiceWrapper {
+    fn from(value: Arc<dyn DirectoryService>) -> Self {
+        Self {
+            directory_service: value,
+        }
+    }
+}
+
+#[async_trait]
+impl proto::directory_service_server::DirectoryService for GRPCDirectoryServiceWrapper {
+    type GetStream = ReceiverStream<tonic::Result<proto::Directory, Status>>;
+
+    #[instrument(skip(self))]
+    async fn get(
+        &self,
+        request: Request<proto::GetDirectoryRequest>,
+    ) -> Result<Response<Self::GetStream>, Status> {
+        let (tx, rx) = channel(5);
+
+        let req_inner = request.into_inner();
+
+        let directory_service = self.directory_service.clone();
+
+        let _task = {
+            // look at the digest in the request and put it in the top of the queue.
+            match &req_inner.by_what {
+                None => return Err(Status::invalid_argument("by_what needs to be specified")),
+                Some(proto::get_directory_request::ByWhat::Digest(ref digest)) => {
+                    let digest: B3Digest = digest
+                        .clone()
+                        .try_into()
+                        .map_err(|_e| Status::invalid_argument("invalid digest length"))?;
+
+                    task::spawn(async move {
+                        if !req_inner.recursive {
+                            let e: Result<proto::Directory, Status> =
+                                match directory_service.get(&digest).await {
+                                    Ok(Some(directory)) => Ok(directory),
+                                    Ok(None) => Err(Status::not_found(format!(
+                                        "directory {} not found",
+                                        digest
+                                    ))),
+                                    Err(e) => Err(e.into()),
+                                };
+
+                            if tx.send(e).await.is_err() {
+                                debug!("receiver dropped");
+                            }
+                        } else {
+                            // If recursive was requested, traverse via get_recursive.
+                            let mut directories_it = directory_service.get_recursive(&digest);
+
+                            while let Some(e) = directories_it.next().await {
+                                // map err in res from Error to Status
+                                let res = e.map_err(|e| Status::internal(e.to_string()));
+                                if tx.send(res).await.is_err() {
+                                    debug!("receiver dropped");
+                                    break;
+                                }
+                            }
+                        }
+                    });
+                }
+            }
+        };
+
+        let receiver_stream = ReceiverStream::new(rx);
+        Ok(Response::new(receiver_stream))
+    }
+
+    #[instrument(skip(self, request))]
+    async fn put(
+        &self,
+        request: Request<Streaming<proto::Directory>>,
+    ) -> Result<Response<proto::PutDirectoryResponse>, Status> {
+        let mut req_inner = request.into_inner();
+        // TODO: let this use DirectoryPutter to the store it's connected to,
+        // and move the validation logic into [SimplePutter].
+
+        // This keeps track of the seen directory keys, and their size.
+        // This is used to validate the size field of a reference to a previously sent directory.
+        // We don't need to keep the contents around, they're stored in the DB.
+        // https://github.com/rust-lang/rust-clippy/issues/5812
+        #[allow(clippy::mutable_key_type)]
+        let mut seen_directories_sizes: HashMap<B3Digest, u32> = HashMap::new();
+        let mut last_directory_dgst: Option<B3Digest> = None;
+
+        // Consume directories, and insert them into the store.
+        // Reject directory messages that refer to Directories not sent in the same stream.
+        while let Some(directory) = req_inner.message().await? {
+            // validate the directory itself.
+            if let Err(e) = directory.validate() {
+                return Err(Status::invalid_argument(format!(
+                    "directory {} failed validation: {}",
+                    directory.digest(),
+                    e,
+                )));
+            }
+
+            // for each child directory this directory refers to, we need
+            // to ensure it has been seen already in this stream, and that the size
+            // matches what we recorded.
+            for child_directory in &directory.directories {
+                let child_directory_digest: B3Digest = child_directory
+                    .digest
+                    .clone()
+                    .try_into()
+                    .map_err(|_e| Status::internal("invalid child directory digest len"))?;
+
+                match seen_directories_sizes.get(&child_directory_digest) {
+                    None => {
+                        return Err(Status::invalid_argument(format!(
+                            "child directory '{:?}' ({}) in directory '{}' not seen yet",
+                            child_directory.name,
+                            &child_directory_digest,
+                            &directory.digest(),
+                        )));
+                    }
+                    Some(seen_child_directory_size) => {
+                        if seen_child_directory_size != &child_directory.size {
+                            return Err(Status::invalid_argument(format!(
+                                    "child directory '{:?}' ({}) in directory '{}' referred with wrong size, expected {}, actual {}",
+                                    child_directory.name,
+                                    &child_directory_digest,
+                                    &directory.digest(),
+                                    seen_child_directory_size,
+                                    child_directory.size,
+                            )));
+                        }
+                    }
+                }
+            }
+
+            // NOTE: We can't know if a directory we're receiving actually is
+            // part of the closure, because we receive directories from the leaf nodes up to
+            // the root.
+            // The only thing we could to would be doing a final check when the
+            // last Directory was received, that all Directories received so far are
+            // reachable from that (root) node.
+
+            let dgst = directory.digest();
+            seen_directories_sizes.insert(dgst.clone(), directory.size());
+            last_directory_dgst = Some(dgst.clone());
+
+            // check if the directory already exists in the database. We can skip
+            // inserting if it's already there, as that'd be a no-op.
+            match self.directory_service.get(&dgst).await {
+                Err(e) => {
+                    warn!("error checking if directory already exists: {}", e);
+                    return Err(e.into());
+                }
+                // skip if already exists
+                Ok(Some(_)) => {}
+                // insert if it doesn't already exist
+                Ok(None) => {
+                    self.directory_service.put(directory).await?;
+                }
+            }
+        }
+
+        // We're done receiving. peek at last_directory_digest and either return the digest,
+        // or an error, if we received an empty stream.
+        match last_directory_dgst {
+            None => Err(Status::invalid_argument("no directories received")),
+            Some(last_directory_dgst) => Ok(Response::new(proto::PutDirectoryResponse {
+                root_digest: last_directory_dgst.into(),
+            })),
+        }
+    }
+}
diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs
new file mode 100644
index 0000000000..2a44383fdd
--- /dev/null
+++ b/tvix/castore/src/proto/mod.rs
@@ -0,0 +1,279 @@
+#![allow(clippy::derive_partial_eq_without_eq, non_snake_case)]
+// https://github.com/hyperium/tonic/issues/1056
+use data_encoding::BASE64;
+use std::{collections::HashSet, iter::Peekable};
+use thiserror::Error;
+
+use prost::Message;
+
+mod grpc_blobservice_wrapper;
+mod grpc_directoryservice_wrapper;
+
+pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper;
+pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper;
+
+use crate::B3Digest;
+
+tonic::include_proto!("tvix.castore.v1");
+
+#[cfg(feature = "reflection")]
+/// Compiled file descriptors for implementing [gRPC
+/// reflection](https://github.com/grpc/grpc/blob/master/doc/server-reflection.md) with e.g.
+/// [`tonic_reflection`](https://docs.rs/tonic-reflection).
+pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix.castore.v1");
+
+#[cfg(test)]
+mod tests;
+
+/// Errors that can occur during the validation of Directory messages.
+#[derive(Debug, PartialEq, Eq, Error)]
+pub enum ValidateDirectoryError {
+    /// Elements are not in sorted order
+    #[error("{} is not sorted", std::str::from_utf8(.0).unwrap_or(&BASE64.encode(.0)))]
+    WrongSorting(Vec<u8>),
+    /// Multiple elements with the same name encountered
+    #[error("{0:?} is a duplicate name")]
+    DuplicateName(Vec<u8>),
+    /// Invalid name encountered
+    #[error("Invalid name in {0:?}")]
+    InvalidName(Vec<u8>),
+    /// Invalid digest length encountered
+    #[error("Invalid Digest length: {0}")]
+    InvalidDigestLen(usize),
+}
+
+/// Checks a Node name for validity as an intermediate node, and returns an
+/// error that's generated from the supplied constructor.
+///
+/// We disallow slashes, null bytes, '.', '..' and the empty string.
+fn validate_node_name<E>(name: &[u8], err: fn(Vec<u8>) -> E) -> Result<(), E> {
+    if name.is_empty()
+        || name == b".."
+        || name == b"."
+        || name.contains(&0x00)
+        || name.contains(&b'/')
+    {
+        return Err(err(name.to_vec()));
+    }
+    Ok(())
+}
+
+/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode]
+/// and [node::Node], so we can ask all of them for the name easily.
+pub trait NamedNode {
+    fn get_name(&self) -> &[u8];
+}
+
+impl NamedNode for &FileNode {
+    fn get_name(&self) -> &[u8] {
+        &self.name
+    }
+}
+
+impl NamedNode for &DirectoryNode {
+    fn get_name(&self) -> &[u8] {
+        &self.name
+    }
+}
+
+impl NamedNode for &SymlinkNode {
+    fn get_name(&self) -> &[u8] {
+        &self.name
+    }
+}
+
+impl NamedNode for node::Node {
+    fn get_name(&self) -> &[u8] {
+        match self {
+            node::Node::File(node_file) => &node_file.name,
+            node::Node::Directory(node_directory) => &node_directory.name,
+            node::Node::Symlink(node_symlink) => &node_symlink.name,
+        }
+    }
+}
+
+impl node::Node {
+    /// Returns the node with a new name.
+    pub fn rename(self, name: bytes::Bytes) -> Self {
+        match self {
+            node::Node::Directory(n) => node::Node::Directory(DirectoryNode { name, ..n }),
+            node::Node::File(n) => node::Node::File(FileNode { name, ..n }),
+            node::Node::Symlink(n) => node::Node::Symlink(SymlinkNode { name, ..n }),
+        }
+    }
+}
+
+/// Accepts a name, and a mutable reference to the previous name.
+/// If the passed name is larger than the previous one, the reference is updated.
+/// If it's not, an error is returned.
+fn update_if_lt_prev<'n>(
+    prev_name: &mut &'n [u8],
+    name: &'n [u8],
+) -> Result<(), ValidateDirectoryError> {
+    if *name < **prev_name {
+        return Err(ValidateDirectoryError::WrongSorting(name.to_vec()));
+    }
+    *prev_name = name;
+    Ok(())
+}
+
+/// Inserts the given name into a HashSet if it's not already in there.
+/// If it is, an error is returned.
+fn insert_once<'n>(
+    seen_names: &mut HashSet<&'n [u8]>,
+    name: &'n [u8],
+) -> Result<(), ValidateDirectoryError> {
+    if seen_names.get(name).is_some() {
+        return Err(ValidateDirectoryError::DuplicateName(name.to_vec()));
+    }
+    seen_names.insert(name);
+    Ok(())
+}
+
+impl Directory {
+    /// The size of a directory is the number of all regular and symlink elements,
+    /// the number of directory elements, and their size fields.
+    pub fn size(&self) -> u32 {
+        self.files.len() as u32
+            + self.symlinks.len() as u32
+            + self
+                .directories
+                .iter()
+                .fold(0, |acc: u32, e| (acc + 1 + e.size))
+    }
+
+    /// Calculates the digest of a Directory, which is the blake3 hash of a
+    /// Directory protobuf message, serialized in protobuf canonical form.
+    pub fn digest(&self) -> B3Digest {
+        let mut hasher = blake3::Hasher::new();
+
+        hasher
+            .update(&self.encode_to_vec())
+            .finalize()
+            .as_bytes()
+            .into()
+    }
+
+    /// validate checks the directory for invalid data, such as:
+    /// - violations of name restrictions
+    /// - invalid digest lengths
+    /// - not properly sorted lists
+    /// - duplicate names in the three lists
+    pub fn validate(&self) -> Result<(), ValidateDirectoryError> {
+        let mut seen_names: HashSet<&[u8]> = HashSet::new();
+
+        let mut last_directory_name: &[u8] = b"";
+        let mut last_file_name: &[u8] = b"";
+        let mut last_symlink_name: &[u8] = b"";
+
+        // check directories
+        for directory_node in &self.directories {
+            validate_node_name(&directory_node.name, ValidateDirectoryError::InvalidName)?;
+            // ensure the digest has the appropriate size.
+            if TryInto::<B3Digest>::try_into(directory_node.digest.clone()).is_err() {
+                return Err(ValidateDirectoryError::InvalidDigestLen(
+                    directory_node.digest.len(),
+                ));
+            }
+
+            update_if_lt_prev(&mut last_directory_name, &directory_node.name)?;
+            insert_once(&mut seen_names, &directory_node.name)?;
+        }
+
+        // check files
+        for file_node in &self.files {
+            validate_node_name(&file_node.name, ValidateDirectoryError::InvalidName)?;
+            if TryInto::<B3Digest>::try_into(file_node.digest.clone()).is_err() {
+                return Err(ValidateDirectoryError::InvalidDigestLen(
+                    file_node.digest.len(),
+                ));
+            }
+
+            update_if_lt_prev(&mut last_file_name, &file_node.name)?;
+            insert_once(&mut seen_names, &file_node.name)?;
+        }
+
+        // check symlinks
+        for symlink_node in &self.symlinks {
+            validate_node_name(&symlink_node.name, ValidateDirectoryError::InvalidName)?;
+
+            update_if_lt_prev(&mut last_symlink_name, &symlink_node.name)?;
+            insert_once(&mut seen_names, &symlink_node.name)?;
+        }
+
+        Ok(())
+    }
+
+    /// Allows iterating over all three nodes ([DirectoryNode], [FileNode],
+    /// [SymlinkNode]) in an ordered fashion, as long as the individual lists
+    /// are sorted (which can be checked by the [Directory::validate]).
+    pub fn nodes(&self) -> DirectoryNodesIterator {
+        return DirectoryNodesIterator {
+            i_directories: self.directories.iter().peekable(),
+            i_files: self.files.iter().peekable(),
+            i_symlinks: self.symlinks.iter().peekable(),
+        };
+    }
+}
+
+/// Struct to hold the state of an iterator over all nodes of a Directory.
+///
+/// Internally, this keeps peekable Iterators over all three lists of a
+/// Directory message.
+pub struct DirectoryNodesIterator<'a> {
+    // directory: &Directory,
+    i_directories: Peekable<std::slice::Iter<'a, DirectoryNode>>,
+    i_files: Peekable<std::slice::Iter<'a, FileNode>>,
+    i_symlinks: Peekable<std::slice::Iter<'a, SymlinkNode>>,
+}
+
+/// looks at two elements implementing NamedNode, and returns true if "left
+/// is smaller / comes first".
+///
+/// Some(_) is preferred over None.
+fn left_name_lt_right<A: NamedNode, B: NamedNode>(left: Option<&A>, right: Option<&B>) -> bool {
+    match left {
+        // if left is None, right always wins
+        None => false,
+        Some(left_inner) => {
+            // left is Some.
+            match right {
+                // left is Some, right is None - left wins.
+                None => true,
+                Some(right_inner) => {
+                    // both are Some - compare the name.
+                    return left_inner.get_name() < right_inner.get_name();
+                }
+            }
+        }
+    }
+}
+
+impl Iterator for DirectoryNodesIterator<'_> {
+    type Item = node::Node;
+
+    // next returns the next node in the Directory.
+    // we peek at all three internal iterators, and pick the one with the
+    // smallest name, to ensure lexicographical ordering.
+    // The individual lists are already known to be sorted.
+    fn next(&mut self) -> Option<Self::Item> {
+        if left_name_lt_right(self.i_directories.peek(), self.i_files.peek()) {
+            // i_directories is still in the game, compare with symlinks
+            if left_name_lt_right(self.i_directories.peek(), self.i_symlinks.peek()) {
+                self.i_directories
+                    .next()
+                    .cloned()
+                    .map(node::Node::Directory)
+            } else {
+                self.i_symlinks.next().cloned().map(node::Node::Symlink)
+            }
+        } else {
+            // i_files is still in the game, compare with symlinks
+            if left_name_lt_right(self.i_files.peek(), self.i_symlinks.peek()) {
+                self.i_files.next().cloned().map(node::Node::File)
+            } else {
+                self.i_symlinks.next().cloned().map(node::Node::Symlink)
+            }
+        }
+    }
+}
diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs
new file mode 100644
index 0000000000..eed49b2b59
--- /dev/null
+++ b/tvix/castore/src/proto/tests/directory.rs
@@ -0,0 +1,287 @@
+use crate::proto::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError};
+use lazy_static::lazy_static;
+
+lazy_static! {
+    static ref DUMMY_DIGEST: [u8; 32] = [
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        0x00, 0x00,
+    ];
+}
+#[test]
+fn size() {
+    {
+        let d = Directory::default();
+        assert_eq!(d.size(), 0);
+    }
+    {
+        let d = Directory {
+            directories: vec![DirectoryNode {
+                name: "foo".into(),
+                digest: DUMMY_DIGEST.to_vec().into(),
+                size: 0,
+            }],
+            ..Default::default()
+        };
+        assert_eq!(d.size(), 1);
+    }
+    {
+        let d = Directory {
+            directories: vec![DirectoryNode {
+                name: "foo".into(),
+                digest: DUMMY_DIGEST.to_vec().into(),
+                size: 4,
+            }],
+            ..Default::default()
+        };
+        assert_eq!(d.size(), 5);
+    }
+    {
+        let d = Directory {
+            files: vec![FileNode {
+                name: "foo".into(),
+                digest: DUMMY_DIGEST.to_vec().into(),
+                size: 42,
+                executable: false,
+            }],
+            ..Default::default()
+        };
+        assert_eq!(d.size(), 1);
+    }
+    {
+        let d = Directory {
+            symlinks: vec![SymlinkNode {
+                name: "foo".into(),
+                target: "bar".into(),
+            }],
+            ..Default::default()
+        };
+        assert_eq!(d.size(), 1);
+    }
+}
+
+#[test]
+fn digest() {
+    let d = Directory::default();
+
+    assert_eq!(
+        d.digest(),
+        vec![
+            0xaf, 0x13, 0x49, 0xb9, 0xf5, 0xf9, 0xa1, 0xa6, 0xa0, 0x40, 0x4d, 0xea, 0x36, 0xdc,
+            0xc9, 0x49, 0x9b, 0xcb, 0x25, 0xc9, 0xad, 0xc1, 0x12, 0xb7, 0xcc, 0x9a, 0x93, 0xca,
+            0xe4, 0x1f, 0x32, 0x62
+        ]
+        .try_into()
+        .unwrap()
+    )
+}
+
+#[test]
+fn validate_empty() {
+    let d = Directory::default();
+    assert_eq!(d.validate(), Ok(()));
+}
+
+#[test]
+fn validate_invalid_names() {
+    {
+        let d = Directory {
+            directories: vec![DirectoryNode {
+                name: "".into(),
+                digest: DUMMY_DIGEST.to_vec().into(),
+                size: 42,
+            }],
+            ..Default::default()
+        };
+        match d.validate().expect_err("must fail") {
+            ValidateDirectoryError::InvalidName(n) => {
+                assert_eq!(n, b"")
+            }
+            _ => panic!("unexpected error"),
+        };
+    }
+
+    {
+        let d = Directory {
+            directories: vec![DirectoryNode {
+                name: ".".into(),
+                digest: DUMMY_DIGEST.to_vec().into(),
+                size: 42,
+            }],
+            ..Default::default()
+        };
+        match d.validate().expect_err("must fail") {
+            ValidateDirectoryError::InvalidName(n) => {
+                assert_eq!(n, b".")
+            }
+            _ => panic!("unexpected error"),
+        };
+    }
+
+    {
+        let d = Directory {
+            files: vec![FileNode {
+                name: "..".into(),
+                digest: DUMMY_DIGEST.to_vec().into(),
+                size: 42,
+                executable: false,
+            }],
+            ..Default::default()
+        };
+        match d.validate().expect_err("must fail") {
+            ValidateDirectoryError::InvalidName(n) => {
+                assert_eq!(n, b"..")
+            }
+            _ => panic!("unexpected error"),
+        };
+    }
+
+    {
+        let d = Directory {
+            symlinks: vec![SymlinkNode {
+                name: "\x00".into(),
+                target: "foo".into(),
+            }],
+            ..Default::default()
+        };
+        match d.validate().expect_err("must fail") {
+            ValidateDirectoryError::InvalidName(n) => {
+                assert_eq!(n, b"\x00")
+            }
+            _ => panic!("unexpected error"),
+        };
+    }
+
+    {
+        let d = Directory {
+            symlinks: vec![SymlinkNode {
+                name: "foo/bar".into(),
+                target: "foo".into(),
+            }],
+            ..Default::default()
+        };
+        match d.validate().expect_err("must fail") {
+            ValidateDirectoryError::InvalidName(n) => {
+                assert_eq!(n, b"foo/bar")
+            }
+            _ => panic!("unexpected error"),
+        };
+    }
+}
+
+#[test]
+fn validate_invalid_digest() {
+    let d = Directory {
+        directories: vec![DirectoryNode {
+            name: "foo".into(),
+            digest: vec![0x00, 0x42].into(), // invalid length
+            size: 42,
+        }],
+        ..Default::default()
+    };
+    match d.validate().expect_err("must fail") {
+        ValidateDirectoryError::InvalidDigestLen(n) => {
+            assert_eq!(n, 2)
+        }
+        _ => panic!("unexpected error"),
+    }
+}
+
+#[test]
+fn validate_sorting() {
+    // "b" comes before "a", bad.
+    {
+        let d = Directory {
+            directories: vec![
+                DirectoryNode {
+                    name: "b".into(),
+                    digest: DUMMY_DIGEST.to_vec().into(),
+                    size: 42,
+                },
+                DirectoryNode {
+                    name: "a".into(),
+                    digest: DUMMY_DIGEST.to_vec().into(),
+                    size: 42,
+                },
+            ],
+            ..Default::default()
+        };
+        match d.validate().expect_err("must fail") {
+            ValidateDirectoryError::WrongSorting(s) => {
+                assert_eq!(s, b"a");
+            }
+            _ => panic!("unexpected error"),
+        }
+    }
+
+    // "a" exists twice, bad.
+    {
+        let d = Directory {
+            directories: vec![
+                DirectoryNode {
+                    name: "a".into(),
+                    digest: DUMMY_DIGEST.to_vec().into(),
+                    size: 42,
+                },
+                DirectoryNode {
+                    name: "a".into(),
+                    digest: DUMMY_DIGEST.to_vec().into(),
+                    size: 42,
+                },
+            ],
+            ..Default::default()
+        };
+        match d.validate().expect_err("must fail") {
+            ValidateDirectoryError::DuplicateName(s) => {
+                assert_eq!(s, b"a");
+            }
+            _ => panic!("unexpected error"),
+        }
+    }
+
+    // "a" comes before "b", all good.
+    {
+        let d = Directory {
+            directories: vec![
+                DirectoryNode {
+                    name: "a".into(),
+                    digest: DUMMY_DIGEST.to_vec().into(),
+                    size: 42,
+                },
+                DirectoryNode {
+                    name: "b".into(),
+                    digest: DUMMY_DIGEST.to_vec().into(),
+                    size: 42,
+                },
+            ],
+            ..Default::default()
+        };
+
+        d.validate().expect("validate shouldn't error");
+    }
+
+    // [b, c] and [a] are both properly sorted.
+    {
+        let d = Directory {
+            directories: vec![
+                DirectoryNode {
+                    name: "b".into(),
+                    digest: DUMMY_DIGEST.to_vec().into(),
+                    size: 42,
+                },
+                DirectoryNode {
+                    name: "c".into(),
+                    digest: DUMMY_DIGEST.to_vec().into(),
+                    size: 42,
+                },
+            ],
+            symlinks: vec![SymlinkNode {
+                name: "a".into(),
+                target: "foo".into(),
+            }],
+            ..Default::default()
+        };
+
+        d.validate().expect("validate shouldn't error");
+    }
+}
diff --git a/tvix/castore/src/proto/tests/directory_nodes_iterator.rs b/tvix/castore/src/proto/tests/directory_nodes_iterator.rs
new file mode 100644
index 0000000000..68f147a332
--- /dev/null
+++ b/tvix/castore/src/proto/tests/directory_nodes_iterator.rs
@@ -0,0 +1,78 @@
+use crate::proto::Directory;
+use crate::proto::DirectoryNode;
+use crate::proto::FileNode;
+use crate::proto::NamedNode;
+use crate::proto::SymlinkNode;
+
+#[test]
+fn iterator() {
+    let d = Directory {
+        directories: vec![
+            DirectoryNode {
+                name: "c".into(),
+                ..DirectoryNode::default()
+            },
+            DirectoryNode {
+                name: "d".into(),
+                ..DirectoryNode::default()
+            },
+            DirectoryNode {
+                name: "h".into(),
+                ..DirectoryNode::default()
+            },
+            DirectoryNode {
+                name: "l".into(),
+                ..DirectoryNode::default()
+            },
+        ],
+        files: vec![
+            FileNode {
+                name: "b".into(),
+                ..FileNode::default()
+            },
+            FileNode {
+                name: "e".into(),
+                ..FileNode::default()
+            },
+            FileNode {
+                name: "g".into(),
+                ..FileNode::default()
+            },
+            FileNode {
+                name: "j".into(),
+                ..FileNode::default()
+            },
+        ],
+        symlinks: vec![
+            SymlinkNode {
+                name: "a".into(),
+                ..SymlinkNode::default()
+            },
+            SymlinkNode {
+                name: "f".into(),
+                ..SymlinkNode::default()
+            },
+            SymlinkNode {
+                name: "i".into(),
+                ..SymlinkNode::default()
+            },
+            SymlinkNode {
+                name: "k".into(),
+                ..SymlinkNode::default()
+            },
+        ],
+    };
+
+    // We keep this strings here and convert to string to make the comparison
+    // less messy.
+    let mut node_names: Vec<String> = vec![];
+
+    for node in d.nodes() {
+        node_names.push(String::from_utf8(node.get_name().to_vec()).unwrap());
+    }
+
+    assert_eq!(
+        vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"],
+        node_names
+    );
+}
diff --git a/tvix/castore/src/proto/tests/grpc_blobservice.rs b/tvix/castore/src/proto/tests/grpc_blobservice.rs
new file mode 100644
index 0000000000..0d7b340b44
--- /dev/null
+++ b/tvix/castore/src/proto/tests/grpc_blobservice.rs
@@ -0,0 +1,100 @@
+use crate::fixtures::{BLOB_A, BLOB_A_DIGEST};
+use crate::proto::blob_service_server::BlobService as GRPCBlobService;
+use crate::proto::{BlobChunk, GRPCBlobServiceWrapper, ReadBlobRequest, StatBlobRequest};
+use crate::utils::gen_blob_service;
+use tokio_stream::StreamExt;
+
+fn gen_grpc_blob_service() -> GRPCBlobServiceWrapper {
+    let blob_service = gen_blob_service();
+    GRPCBlobServiceWrapper::from(blob_service)
+}
+
+/// Trying to read a non-existent blob should return a not found error.
+#[tokio::test]
+async fn not_found_read() {
+    let service = gen_grpc_blob_service();
+
+    let resp = service
+        .read(tonic::Request::new(ReadBlobRequest {
+            digest: BLOB_A_DIGEST.clone().into(),
+        }))
+        .await;
+
+    // We can't use unwrap_err here, because the Ok value doesn't implement
+    // debug.
+    if let Err(e) = resp {
+        assert_eq!(e.code(), tonic::Code::NotFound);
+    } else {
+        panic!("resp is not err")
+    }
+}
+
+/// Trying to stat a non-existent blob should return a not found error.
+#[tokio::test]
+async fn not_found_stat() {
+    let service = gen_grpc_blob_service();
+
+    let resp = service
+        .stat(tonic::Request::new(StatBlobRequest {
+            digest: BLOB_A_DIGEST.clone().into(),
+            ..Default::default()
+        }))
+        .await
+        .expect_err("must fail");
+
+    // The resp should be a status with Code::NotFound
+    assert_eq!(resp.code(), tonic::Code::NotFound);
+}
+
+/// Put a blob in the store, get it back.
+#[tokio::test]
+async fn put_read_stat() {
+    let service = gen_grpc_blob_service();
+
+    // Send blob A.
+    let put_resp = service
+        .put(tonic_mock::streaming_request(vec![BlobChunk {
+            data: BLOB_A.clone(),
+        }]))
+        .await
+        .expect("must succeed")
+        .into_inner();
+
+    assert_eq!(BLOB_A_DIGEST.to_vec(), put_resp.digest);
+
+    // Stat for the digest of A.
+    // We currently don't ask for more granular chunking data, as we don't
+    // expose it yet.
+    let _resp = service
+        .stat(tonic::Request::new(StatBlobRequest {
+            digest: BLOB_A_DIGEST.clone().into(),
+            ..Default::default()
+        }))
+        .await
+        .expect("must succeed")
+        .into_inner();
+
+    // Read the blob. It should return the same data.
+    let resp = service
+        .read(tonic::Request::new(ReadBlobRequest {
+            digest: BLOB_A_DIGEST.clone().into(),
+        }))
+        .await;
+
+    let mut rx = resp.ok().unwrap().into_inner();
+
+    // the stream should contain one element, a BlobChunk with the same contents as BLOB_A.
+    let item = rx
+        .next()
+        .await
+        .expect("must be some")
+        .expect("must succeed");
+
+    assert_eq!(BLOB_A.clone(), item.data);
+
+    // … and no more elements
+    assert!(rx.next().await.is_none());
+
+    // TODO: we rely here on the blob being small enough to not get broken up into multiple chunks.
+    // Test with some bigger blob too
+}
diff --git a/tvix/castore/src/proto/tests/grpc_directoryservice.rs b/tvix/castore/src/proto/tests/grpc_directoryservice.rs
new file mode 100644
index 0000000000..6e8cf1e4a7
--- /dev/null
+++ b/tvix/castore/src/proto/tests/grpc_directoryservice.rs
@@ -0,0 +1,239 @@
+use crate::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C};
+use crate::proto::directory_service_server::DirectoryService as GRPCDirectoryService;
+use crate::proto::get_directory_request::ByWhat;
+use crate::proto::{Directory, DirectoryNode, SymlinkNode};
+use crate::proto::{GRPCDirectoryServiceWrapper, GetDirectoryRequest};
+use crate::utils::gen_directory_service;
+use tokio_stream::StreamExt;
+use tonic::Status;
+
+fn gen_grpc_service() -> GRPCDirectoryServiceWrapper {
+    let directory_service = gen_directory_service();
+    GRPCDirectoryServiceWrapper::from(directory_service)
+}
+
+/// Send the specified GetDirectoryRequest.
+/// Returns an error in the case of an error response, or an error in one of
+// the items in the stream, or a Vec<Directory> in the case of a successful
+/// request.
+async fn get_directories<S: GRPCDirectoryService>(
+    svc: &S,
+    get_directory_request: GetDirectoryRequest,
+) -> Result<Vec<Directory>, Status> {
+    let resp = svc.get(tonic::Request::new(get_directory_request)).await;
+
+    // if the response is an error itself, return the error, otherwise unpack
+    let stream = match resp {
+        Ok(resp) => resp,
+        Err(status) => return Err(status),
+    }
+    .into_inner();
+
+    let directory_results: Vec<Result<Directory, Status>> = stream.collect().await;
+
+    // turn Vec<Result<Directory, Status> into Result<Vec<Directory>,Status>
+    directory_results.into_iter().collect()
+}
+
+/// Trying to get a non-existent Directory should return a not found error.
+#[tokio::test]
+async fn not_found() {
+    let service = gen_grpc_service();
+
+    let resp = service
+        .get(tonic::Request::new(GetDirectoryRequest {
+            by_what: Some(ByWhat::Digest(DIRECTORY_A.digest().into())),
+            ..Default::default()
+        }))
+        .await;
+
+    let mut rx = resp.expect("must succeed").into_inner().into_inner();
+
+    // The stream should contain one element, an error with Code::NotFound.
+    let item = rx
+        .recv()
+        .await
+        .expect("must be some")
+        .expect_err("must be err");
+    assert_eq!(item.code(), tonic::Code::NotFound);
+
+    // … and nothing else
+    assert!(rx.recv().await.is_none());
+}
+
+/// Put a Directory into the store, get it back.
+#[tokio::test]
+async fn put_get() {
+    let service = gen_grpc_service();
+
+    let streaming_request = tonic_mock::streaming_request(vec![DIRECTORY_A.clone()]);
+    let put_resp = service
+        .put(streaming_request)
+        .await
+        .expect("must succeed")
+        .into_inner();
+
+    // the sent root_digest should match the calculated digest
+    assert_eq!(put_resp.root_digest, DIRECTORY_A.digest().to_vec());
+
+    // get it back
+    let items = get_directories(
+        &service,
+        GetDirectoryRequest {
+            by_what: Some(ByWhat::Digest(DIRECTORY_A.digest().into())),
+            ..Default::default()
+        },
+    )
+    .await
+    .expect("must not error");
+
+    assert_eq!(vec![DIRECTORY_A.clone()], items);
+}
+
+/// Put multiple Directories into the store, and get them back
+#[tokio::test]
+async fn put_get_multiple() {
+    let service = gen_grpc_service();
+
+    // sending "b" (which refers to "a") without sending "a" first should fail.
+    let put_resp = service
+        .put(tonic_mock::streaming_request(vec![DIRECTORY_B.clone()]))
+        .await
+        .expect_err("must fail");
+
+    assert_eq!(tonic::Code::InvalidArgument, put_resp.code());
+
+    // sending "a", then "b" should succeed, and the response should contain the digest of b.
+    let put_resp = service
+        .put(tonic_mock::streaming_request(vec![
+            DIRECTORY_A.clone(),
+            DIRECTORY_B.clone(),
+        ]))
+        .await
+        .expect("must succeed");
+
+    assert_eq!(
+        DIRECTORY_B.digest().to_vec(),
+        put_resp.into_inner().root_digest
+    );
+
+    // now, request b, first in non-recursive mode.
+    let items = get_directories(
+        &service,
+        GetDirectoryRequest {
+            recursive: false,
+            by_what: Some(ByWhat::Digest(DIRECTORY_B.digest().into())),
+        },
+    )
+    .await
+    .expect("must not error");
+
+    // We expect to only get b.
+    assert_eq!(vec![DIRECTORY_B.clone()], items);
+
+    // now, request b, but in recursive mode.
+    let items = get_directories(
+        &service,
+        GetDirectoryRequest {
+            recursive: true,
+            by_what: Some(ByWhat::Digest(DIRECTORY_B.digest().into())),
+        },
+    )
+    .await
+    .expect("must not error");
+
+    // We expect to get b, and then a, because that's how we traverse down.
+    assert_eq!(vec![DIRECTORY_B.clone(), DIRECTORY_A.clone()], items);
+}
+
+/// Put multiple Directories into the store, and omit duplicates.
+#[tokio::test]
+async fn put_get_dedup() {
+    let service = gen_grpc_service();
+
+    // Send "A", then "C", which refers to "A" two times
+    // Pretend we're a dumb client sending A twice.
+    let put_resp = service
+        .put(tonic_mock::streaming_request(vec![
+            DIRECTORY_A.clone(),
+            DIRECTORY_A.clone(),
+            DIRECTORY_C.clone(),
+        ]))
+        .await
+        .expect("must succeed");
+
+    assert_eq!(
+        DIRECTORY_C.digest().to_vec(),
+        put_resp.into_inner().root_digest
+    );
+
+    // Ask for "C" recursively. We expect to only get "A" once, as there's no point sending it twice.
+    let items = get_directories(
+        &service,
+        GetDirectoryRequest {
+            recursive: true,
+            by_what: Some(ByWhat::Digest(DIRECTORY_C.digest().into())),
+        },
+    )
+    .await
+    .expect("must not error");
+
+    // We expect to get C, and then A (once, as the second A has been deduplicated).
+    assert_eq!(vec![DIRECTORY_C.clone(), DIRECTORY_A.clone()], items);
+}
+
+/// Trying to upload a Directory failing validation should fail.
+#[tokio::test]
+async fn put_reject_failed_validation() {
+    let service = gen_grpc_service();
+
+    // construct a broken Directory message that fails validation
+    let broken_directory = Directory {
+        symlinks: vec![SymlinkNode {
+            name: "".into(),
+            target: "doesntmatter".into(),
+        }],
+        ..Default::default()
+    };
+    assert!(broken_directory.validate().is_err());
+
+    // send it over, it must fail
+    let put_resp = service
+        .put(tonic_mock::streaming_request(vec![broken_directory]))
+        .await
+        .expect_err("must fail");
+
+    assert_eq!(put_resp.code(), tonic::Code::InvalidArgument);
+}
+
+/// Trying to upload a Directory with wrong size should fail.
+#[tokio::test]
+async fn put_reject_wrong_size() {
+    let service = gen_grpc_service();
+
+    // Construct a directory referring to DIRECTORY_A, but with wrong size.
+    let broken_parent_directory = Directory {
+        directories: vec![DirectoryNode {
+            name: "foo".into(),
+            digest: DIRECTORY_A.digest().into(),
+            size: 42,
+        }],
+        ..Default::default()
+    };
+    // Make sure we got the size wrong.
+    assert_ne!(
+        broken_parent_directory.directories[0].size,
+        DIRECTORY_A.size()
+    );
+
+    // now upload both (first A, then the broken parent). This must fail.
+    let put_resp = service
+        .put(tonic_mock::streaming_request(vec![
+            DIRECTORY_A.clone(),
+            broken_parent_directory,
+        ]))
+        .await
+        .expect_err("must fail");
+
+    assert_eq!(put_resp.code(), tonic::Code::InvalidArgument);
+}
diff --git a/tvix/castore/src/proto/tests/mod.rs b/tvix/castore/src/proto/tests/mod.rs
new file mode 100644
index 0000000000..8b62fadeb5
--- /dev/null
+++ b/tvix/castore/src/proto/tests/mod.rs
@@ -0,0 +1,4 @@
+mod directory;
+mod directory_nodes_iterator;
+mod grpc_blobservice;
+mod grpc_directoryservice;