diff options
Diffstat (limited to 'tvix/store/src/proto')
-rw-r--r-- | tvix/store/src/proto/grpc_blobservice_wrapper.rs | 130 | ||||
-rw-r--r-- | tvix/store/src/proto/grpc_directoryservice_wrapper.rs | 177 | ||||
-rw-r--r-- | tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs | 93 | ||||
-rw-r--r-- | tvix/store/src/proto/mod.rs | 370 | ||||
-rw-r--r-- | tvix/store/src/proto/sync_read_into_async_read.rs | 158 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/directory.rs | 289 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/directory_nodes_iterator.rs | 80 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/grpc_blobservice.rs | 102 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/grpc_directoryservice.rs | 241 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/grpc_pathinfoservice.rs | 67 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/mod.rs | 6 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/pathinfo.rs | 207 |
12 files changed, 1920 insertions, 0 deletions
diff --git a/tvix/store/src/proto/grpc_blobservice_wrapper.rs b/tvix/store/src/proto/grpc_blobservice_wrapper.rs new file mode 100644 index 000000000000..3ec1d68872c7 --- /dev/null +++ b/tvix/store/src/proto/grpc_blobservice_wrapper.rs @@ -0,0 +1,130 @@ +use crate::{ + blobservice::{BlobService, BlobWriter}, + proto::sync_read_into_async_read::SyncReadIntoAsyncRead, + B3Digest, +}; +use std::{collections::VecDeque, io, pin::Pin}; +use tokio::task; +use tokio_stream::StreamExt; +use tokio_util::io::ReaderStream; +use tonic::{async_trait, Request, Response, Status, Streaming}; +use tracing::{instrument, warn}; + +pub struct GRPCBlobServiceWrapper<BS: BlobService> { + blob_service: BS, +} + +impl<BS: BlobService> From<BS> for GRPCBlobServiceWrapper<BS> { + fn from(value: BS) -> Self { + Self { + blob_service: value, + } + } +} + +#[async_trait] +impl<BS: BlobService + Send + Sync + Clone + 'static> super::blob_service_server::BlobService + for GRPCBlobServiceWrapper<BS> +{ + // https://github.com/tokio-rs/tokio/issues/2723#issuecomment-1534723933 + type ReadStream = + Pin<Box<dyn futures::Stream<Item = Result<super::BlobChunk, Status>> + Send + 'static>>; + + #[instrument(skip(self))] + async fn stat( + &self, + request: Request<super::StatBlobRequest>, + ) -> Result<Response<super::BlobMeta>, Status> { + let rq = request.into_inner(); + let req_digest = B3Digest::from_vec(rq.digest) + .map_err(|_e| Status::invalid_argument("invalid digest length"))?; + + if rq.include_chunks || rq.include_bao { + return Err(Status::internal("not implemented")); + } + + match self.blob_service.has(&req_digest) { + Ok(true) => Ok(Response::new(super::BlobMeta::default())), + Ok(false) => Err(Status::not_found(format!("blob {} not found", &req_digest))), + Err(e) => Err(e.into()), + } + } + + #[instrument(skip(self))] + async fn read( + &self, + request: Request<super::ReadBlobRequest>, + ) -> Result<Response<Self::ReadStream>, Status> { + let rq = request.into_inner(); + + let req_digest = B3Digest::from_vec(rq.digest) + .map_err(|_e| Status::invalid_argument("invalid digest length"))?; + + match self.blob_service.open_read(&req_digest) { + Ok(Some(reader)) => { + let async_reader: SyncReadIntoAsyncRead<_, bytes::BytesMut> = reader.into(); + + fn stream_mapper( + x: Result<bytes::Bytes, io::Error>, + ) -> Result<super::BlobChunk, Status> { + match x { + Ok(bytes) => Ok(super::BlobChunk { + data: bytes.to_vec(), + }), + Err(e) => Err(Status::from(e)), + } + } + + let chunks_stream = ReaderStream::new(async_reader).map(stream_mapper); + Ok(Response::new(Box::pin(chunks_stream))) + } + Ok(None) => Err(Status::not_found(format!("blob {} not found", &req_digest))), + Err(e) => Err(e.into()), + } + } + + #[instrument(skip(self))] + async fn put( + &self, + request: Request<Streaming<super::BlobChunk>>, + ) -> Result<Response<super::PutBlobResponse>, Status> { + let req_inner = request.into_inner(); + + let data_stream = req_inner.map(|x| { + x.map(|x| VecDeque::from(x.data)) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidInput, e)) + }); + + let data_reader = tokio_util::io::StreamReader::new(data_stream); + + // prepare a writer, which we'll use in the blocking task below. + let mut writer = self + .blob_service + .open_write() + .map_err(|e| Status::internal(format!("unable to open for write: {}", e)))?; + + let result = task::spawn_blocking(move || -> Result<super::PutBlobResponse, Status> { + // construct a sync reader to the data + let mut reader = tokio_util::io::SyncIoBridge::new(data_reader); + + io::copy(&mut reader, &mut writer).map_err(|e| { + warn!("error copying: {}", e); + Status::internal("error copying") + })?; + + let digest = writer + .close() + .map_err(|e| { + warn!("error closing stream: {}", e); + Status::internal("error closing stream") + })? + .to_vec(); + + Ok(super::PutBlobResponse { digest }) + }) + .await + .map_err(|_| Status::internal("failed to wait for task"))??; + + Ok(Response::new(result)) + } +} diff --git a/tvix/store/src/proto/grpc_directoryservice_wrapper.rs b/tvix/store/src/proto/grpc_directoryservice_wrapper.rs new file mode 100644 index 000000000000..6d2df310137f --- /dev/null +++ b/tvix/store/src/proto/grpc_directoryservice_wrapper.rs @@ -0,0 +1,177 @@ +use crate::proto; +use crate::{directoryservice::DirectoryService, B3Digest}; +use std::collections::HashMap; +use tokio::{sync::mpsc::channel, task}; +use tokio_stream::wrappers::ReceiverStream; +use tonic::{async_trait, Request, Response, Status, Streaming}; +use tracing::{debug, instrument, warn}; + +pub struct GRPCDirectoryServiceWrapper<C: DirectoryService> { + directory_service: C, +} + +impl<DS: DirectoryService> From<DS> for GRPCDirectoryServiceWrapper<DS> { + fn from(value: DS) -> Self { + Self { + directory_service: value, + } + } +} + +#[async_trait] +impl<DS: DirectoryService + Send + Sync + Clone + 'static> + proto::directory_service_server::DirectoryService for GRPCDirectoryServiceWrapper<DS> +{ + type GetStream = ReceiverStream<tonic::Result<proto::Directory, Status>>; + + #[instrument(skip(self))] + async fn get( + &self, + request: Request<proto::GetDirectoryRequest>, + ) -> Result<Response<Self::GetStream>, Status> { + let (tx, rx) = channel(5); + + let req_inner = request.into_inner(); + + let directory_service = self.directory_service.clone(); + + let _task = { + // look at the digest in the request and put it in the top of the queue. + match &req_inner.by_what { + None => return Err(Status::invalid_argument("by_what needs to be specified")), + Some(proto::get_directory_request::ByWhat::Digest(digest)) => { + let digest = B3Digest::from_vec(digest.to_vec()) + .map_err(|_e| Status::invalid_argument("invalid digest length"))?; + + task::spawn(async move { + if !req_inner.recursive { + let e: Result<proto::Directory, Status> = + match directory_service.get(&digest) { + Ok(Some(directory)) => Ok(directory), + Ok(None) => Err(Status::not_found(format!( + "directory {} not found", + digest + ))), + Err(e) => Err(e.into()), + }; + + if tx.send(e).await.is_err() { + debug!("receiver dropped"); + } + } else { + // If recursive was requested, traverse via get_recursive. + let directories_it = directory_service.get_recursive(&digest); + + for e in directories_it { + // map err in res from Error to Status + let res = e.map_err(|e| Status::internal(e.to_string())); + if tx.send(res).await.is_err() { + debug!("receiver dropped"); + break; + } + } + } + }); + } + } + }; + + let receiver_stream = ReceiverStream::new(rx); + Ok(Response::new(receiver_stream)) + } + + #[instrument(skip(self, request))] + async fn put( + &self, + request: Request<Streaming<proto::Directory>>, + ) -> Result<Response<proto::PutDirectoryResponse>, Status> { + let mut req_inner = request.into_inner(); + // TODO: let this use DirectoryPutter to the store it's connected to, + // and move the validation logic into [SimplePutter]. + + // This keeps track of the seen directory keys, and their size. + // This is used to validate the size field of a reference to a previously sent directory. + // We don't need to keep the contents around, they're stored in the DB. + let mut seen_directories_sizes: HashMap<B3Digest, u32> = HashMap::new(); + let mut last_directory_dgst: Option<B3Digest> = None; + + // Consume directories, and insert them into the store. + // Reject directory messages that refer to Directories not sent in the same stream. + while let Some(directory) = req_inner.message().await? { + // validate the directory itself. + if let Err(e) = directory.validate() { + return Err(Status::invalid_argument(format!( + "directory {} failed validation: {}", + directory.digest(), + e, + ))); + } + + // for each child directory this directory refers to, we need + // to ensure it has been seen already in this stream, and that the size + // matches what we recorded. + for child_directory in &directory.directories { + let child_directory_digest = B3Digest::from_vec(child_directory.digest.to_vec()) + .map_err(|_e| Status::internal("invalid child directory digest len"))?; + + match seen_directories_sizes.get(&child_directory_digest) { + None => { + return Err(Status::invalid_argument(format!( + "child directory '{}' ({}) in directory '{}' not seen yet", + child_directory.name, + &child_directory_digest, + &directory.digest(), + ))); + } + Some(seen_child_directory_size) => { + if seen_child_directory_size != &child_directory.size { + return Err(Status::invalid_argument(format!( + "child directory '{}' ({}) in directory '{}' referred with wrong size, expected {}, actual {}", + child_directory.name, + &child_directory_digest, + &directory.digest(), + seen_child_directory_size, + child_directory.size, + ))); + } + } + } + } + + // NOTE: We can't know if a directory we're receiving actually is + // part of the closure, because we receive directories from the leaf nodes up to + // the root. + // The only thing we could to would be doing a final check when the + // last Directory was received, that all Directories received so far are + // reachable from that (root) node. + + let dgst = directory.digest(); + seen_directories_sizes.insert(dgst.clone(), directory.size()); + last_directory_dgst = Some(dgst.clone()); + + // check if the directory already exists in the database. We can skip + // inserting if it's already there, as that'd be a no-op. + match self.directory_service.get(&dgst) { + Err(e) => { + warn!("error checking if directory already exists: {}", e); + return Err(e.into()); + } + // skip if already exists + Ok(Some(_)) => {} + // insert if it doesn't already exist + Ok(None) => { + self.directory_service.put(directory)?; + } + } + } + + // We're done receiving. peek at last_directory_digest and either return the digest, + // or an error, if we received an empty stream. + match last_directory_dgst { + None => Err(Status::invalid_argument("no directories received")), + Some(last_directory_dgst) => Ok(Response::new(proto::PutDirectoryResponse { + root_digest: last_directory_dgst.to_vec(), + })), + } + } +} diff --git a/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs b/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs new file mode 100644 index 000000000000..e82557b3a06c --- /dev/null +++ b/tvix/store/src/proto/grpc_pathinfoservice_wrapper.rs @@ -0,0 +1,93 @@ +use crate::nar::RenderError; +use crate::proto; +use crate::{nar::NARCalculationService, pathinfoservice::PathInfoService}; +use tonic::{async_trait, Request, Response, Result, Status}; +use tracing::{instrument, warn}; + +pub struct GRPCPathInfoServiceWrapper<PS: PathInfoService, NS: NARCalculationService> { + path_info_service: PS, + nar_calculation_service: NS, +} + +impl<PS: PathInfoService, NS: NARCalculationService> GRPCPathInfoServiceWrapper<PS, NS> { + pub fn new(path_info_service: PS, nar_calculation_service: NS) -> Self { + Self { + path_info_service, + nar_calculation_service, + } + } +} + +#[async_trait] +impl< + PS: PathInfoService + Send + Sync + 'static, + NS: NARCalculationService + Send + Sync + 'static, + > proto::path_info_service_server::PathInfoService for GRPCPathInfoServiceWrapper<PS, NS> +{ + #[instrument(skip(self))] + async fn get( + &self, + request: Request<proto::GetPathInfoRequest>, + ) -> Result<Response<proto::PathInfo>> { + match request.into_inner().by_what { + None => Err(Status::unimplemented("by_what needs to be specified")), + Some(proto::get_path_info_request::ByWhat::ByOutputHash(digest)) => { + let digest: [u8; 20] = digest + .try_into() + .map_err(|_e| Status::invalid_argument("invalid digest length"))?; + match self.path_info_service.get(digest) { + Ok(None) => Err(Status::not_found("PathInfo not found")), + Ok(Some(path_info)) => Ok(Response::new(path_info)), + Err(e) => { + warn!("failed to retrieve PathInfo: {}", e); + Err(e.into()) + } + } + } + } + } + + #[instrument(skip(self))] + async fn put(&self, request: Request<proto::PathInfo>) -> Result<Response<proto::PathInfo>> { + let path_info = request.into_inner(); + + // Store the PathInfo in the client. Clients MUST validate the data + // they receive, so we don't validate additionally here. + match self.path_info_service.put(path_info) { + Ok(path_info_new) => Ok(Response::new(path_info_new)), + Err(e) => { + warn!("failed to insert PathInfo: {}", e); + Err(e.into()) + } + } + } + + #[instrument(skip(self))] + async fn calculate_nar( + &self, + request: Request<proto::Node>, + ) -> Result<Response<proto::CalculateNarResponse>> { + match request.into_inner().node { + None => Err(Status::invalid_argument("no root node sent")), + Some(root_node) => match self.nar_calculation_service.calculate_nar(&root_node) { + Ok((nar_size, nar_sha256)) => Ok(Response::new(proto::CalculateNarResponse { + nar_size, + nar_sha256: nar_sha256.to_vec(), + })), + Err(e) => Err(e.into()), + }, + } + } +} + +impl From<RenderError> for tonic::Status { + fn from(value: RenderError) -> Self { + match value { + RenderError::BlobNotFound(_, _) => Self::not_found(value.to_string()), + RenderError::DirectoryNotFound(_, _) => Self::not_found(value.to_string()), + RenderError::NARWriterError(_) => Self::internal(value.to_string()), + RenderError::StoreError(_) => Self::internal(value.to_string()), + RenderError::UnexpectedBlobMeta(_, _, _, _) => Self::internal(value.to_string()), + } + } +} diff --git a/tvix/store/src/proto/mod.rs b/tvix/store/src/proto/mod.rs new file mode 100644 index 000000000000..4db0b9731edc --- /dev/null +++ b/tvix/store/src/proto/mod.rs @@ -0,0 +1,370 @@ +#![allow(clippy::derive_partial_eq_without_eq)] +// https://github.com/hyperium/tonic/issues/1056 +use std::{collections::HashSet, iter::Peekable}; +use thiserror::Error; + +use prost::Message; + +use nix_compat::store_path::{self, StorePath}; + +mod grpc_blobservice_wrapper; +mod grpc_directoryservice_wrapper; +mod grpc_pathinfoservice_wrapper; + +mod sync_read_into_async_read; + +pub use grpc_blobservice_wrapper::GRPCBlobServiceWrapper; +pub use grpc_directoryservice_wrapper::GRPCDirectoryServiceWrapper; +pub use grpc_pathinfoservice_wrapper::GRPCPathInfoServiceWrapper; + +use crate::B3Digest; + +tonic::include_proto!("tvix.store.v1"); + +#[cfg(feature = "reflection")] +/// Compiled file descriptors for implementing [gRPC +/// reflection](https://github.com/grpc/grpc/blob/master/doc/server-reflection.md) with e.g. +/// [`tonic_reflection`](https://docs.rs/tonic-reflection). +pub const FILE_DESCRIPTOR_SET: &[u8] = tonic::include_file_descriptor_set!("tvix.store.v1"); + +#[cfg(test)] +mod tests; + +/// Errors that can occur during the validation of Directory messages. +#[derive(Debug, PartialEq, Eq, Error)] +pub enum ValidateDirectoryError { + /// Elements are not in sorted order + #[error("{0} is not sorted")] + WrongSorting(String), + /// Multiple elements with the same name encountered + #[error("{0} is a duplicate name")] + DuplicateName(String), + /// Invalid name encountered + #[error("Invalid name in {0}")] + InvalidName(String), + /// Invalid digest length encountered + #[error("Invalid Digest length: {0}")] + InvalidDigestLen(usize), +} + +/// Errors that can occur during the validation of PathInfo messages. +#[derive(Debug, Error, PartialEq)] +pub enum ValidatePathInfoError { + /// No node present + #[error("No node present")] + NoNodePresent(), + + /// Invalid node name encountered. + #[error("Failed to parse {0} as StorePath: {1}")] + InvalidNodeName(String, store_path::Error), + + /// The digest the (root) node refers to has invalid length. + #[error("Invalid Digest length: {0}")] + InvalidDigestLen(usize), + + /// The number of references in the narinfo.reference_names field does not match + /// the number of references in the .references field. + #[error("Inconsistent Number of References: {0} (references) vs {0} (narinfo)")] + InconsistentNumberOfReferences(usize, usize), +} + +/// Checks a Node name for validity as an intermediate node, and returns an +/// error that's generated from the supplied constructor. +/// +/// We disallow slashes, null bytes, '.', '..' and the empty string. +fn validate_node_name<E>(name: &str, err: fn(String) -> E) -> Result<(), E> { + if name.is_empty() || name == ".." || name == "." || name.contains('\x00') || name.contains('/') + { + return Err(err(name.to_string())); + } + Ok(()) +} + +/// Checks a digest for validity. +/// Digests are 32 bytes long, as we store blake3 digests. +fn validate_digest<E>(digest: &Vec<u8>, err: fn(usize) -> E) -> Result<(), E> { + if digest.len() != 32 { + return Err(err(digest.len())); + } + Ok(()) +} + +/// Parses a root node name. +/// +/// On success, this returns the parsed [StorePath]. +/// On error, it returns an error generated from the supplied constructor. +fn parse_node_name_root<E>( + name: &str, + err: fn(String, store_path::Error) -> E, +) -> Result<StorePath, E> { + match StorePath::from_string(name) { + Ok(np) => Ok(np), + Err(e) => Err(err(name.to_string(), e)), + } +} + +impl PathInfo { + /// validate performs some checks on the PathInfo struct, + /// Returning either a [StorePath] of the root node, or a + /// [ValidatePathInfoError]. + pub fn validate(&self) -> Result<StorePath, ValidatePathInfoError> { + // If there is a narinfo field populated, ensure the number of references there + // matches PathInfo.references count. + if let Some(narinfo) = &self.narinfo { + if narinfo.reference_names.len() != self.references.len() { + return Err(ValidatePathInfoError::InconsistentNumberOfReferences( + narinfo.reference_names.len(), + self.references.len(), + )); + } + } + // FUTUREWORK: parse references in reference_names. ensure they start + // with storeDir, and use the same digest as in self.references. + + // Ensure there is a (root) node present, and it properly parses to a [StorePath]. + let root_nix_path = match &self.node { + None => { + return Err(ValidatePathInfoError::NoNodePresent()); + } + Some(Node { node }) => match node { + None => { + return Err(ValidatePathInfoError::NoNodePresent()); + } + Some(node::Node::Directory(directory_node)) => { + // ensure the digest has the appropriate size. + validate_digest( + &directory_node.digest, + ValidatePathInfoError::InvalidDigestLen, + )?; + + // parse the name + parse_node_name_root( + &directory_node.name, + ValidatePathInfoError::InvalidNodeName, + )? + } + Some(node::Node::File(file_node)) => { + // ensure the digest has the appropriate size. + validate_digest(&file_node.digest, ValidatePathInfoError::InvalidDigestLen)?; + + // parse the name + parse_node_name_root(&file_node.name, ValidatePathInfoError::InvalidNodeName)? + } + Some(node::Node::Symlink(symlink_node)) => { + // parse the name + parse_node_name_root( + &symlink_node.name, + ValidatePathInfoError::InvalidNodeName, + )? + } + }, + }; + + // return the root nix path + Ok(root_nix_path) + } +} + +/// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode] +/// and [node::Node], so we can ask all of them for the name easily. +pub trait NamedNode { + fn get_name(&self) -> &str; +} + +impl NamedNode for &FileNode { + fn get_name(&self) -> &str { + self.name.as_str() + } +} + +impl NamedNode for &DirectoryNode { + fn get_name(&self) -> &str { + self.name.as_str() + } +} + +impl NamedNode for &SymlinkNode { + fn get_name(&self) -> &str { + self.name.as_str() + } +} + +impl NamedNode for node::Node { + fn get_name(&self) -> &str { + match self { + node::Node::File(node_file) => &node_file.name, + node::Node::Directory(node_directory) => &node_directory.name, + node::Node::Symlink(node_symlink) => &node_symlink.name, + } + } +} + +/// Accepts a name, and a mutable reference to the previous name. +/// If the passed name is larger than the previous one, the reference is updated. +/// If it's not, an error is returned. +fn update_if_lt_prev<'n>( + prev_name: &mut &'n str, + name: &'n str, +) -> Result<(), ValidateDirectoryError> { + if *name < **prev_name { + return Err(ValidateDirectoryError::WrongSorting(name.to_string())); + } + *prev_name = name; + Ok(()) +} + +/// Inserts the given name into a HashSet if it's not already in there. +/// If it is, an error is returned. +fn insert_once<'n>( + seen_names: &mut HashSet<&'n str>, + name: &'n str, +) -> Result<(), ValidateDirectoryError> { + if seen_names.get(name).is_some() { + return Err(ValidateDirectoryError::DuplicateName(name.to_string())); + } + seen_names.insert(name); + Ok(()) +} + +impl Directory { + /// The size of a directory is the number of all regular and symlink elements, + /// the number of directory elements, and their size fields. + pub fn size(&self) -> u32 { + self.files.len() as u32 + + self.symlinks.len() as u32 + + self + .directories + .iter() + .fold(0, |acc: u32, e| (acc + 1 + e.size)) + } + + /// Calculates the digest of a Directory, which is the blake3 hash of a + /// Directory protobuf message, serialized in protobuf canonical form. + pub fn digest(&self) -> B3Digest { + let mut hasher = blake3::Hasher::new(); + + let vec = hasher + .update(&self.encode_to_vec()) + .finalize() + .as_bytes() + .to_vec(); + B3Digest::from_vec(vec).unwrap() + } + + /// validate checks the directory for invalid data, such as: + /// - violations of name restrictions + /// - invalid digest lengths + /// - not properly sorted lists + /// - duplicate names in the three lists + pub fn validate(&self) -> Result<(), ValidateDirectoryError> { + let mut seen_names: HashSet<&str> = HashSet::new(); + + let mut last_directory_name: &str = ""; + let mut last_file_name: &str = ""; + let mut last_symlink_name: &str = ""; + + // check directories + for directory_node in &self.directories { + validate_node_name(&directory_node.name, ValidateDirectoryError::InvalidName)?; + validate_digest( + &directory_node.digest, + ValidateDirectoryError::InvalidDigestLen, + )?; + + update_if_lt_prev(&mut last_directory_name, directory_node.name.as_str())?; + insert_once(&mut seen_names, directory_node.name.as_str())?; + } + + // check files + for file_node in &self.files { + validate_node_name(&file_node.name, ValidateDirectoryError::InvalidName)?; + validate_digest(&file_node.digest, ValidateDirectoryError::InvalidDigestLen)?; + + update_if_lt_prev(&mut last_file_name, file_node.name.as_str())?; + insert_once(&mut seen_names, file_node.name.as_str())?; + } + + // check symlinks + for symlink_node in &self.symlinks { + validate_node_name(&symlink_node.name, ValidateDirectoryError::InvalidName)?; + + update_if_lt_prev(&mut last_symlink_name, symlink_node.name.as_str())?; + insert_once(&mut seen_names, symlink_node.name.as_str())?; + } + + Ok(()) + } + + /// Allows iterating over all three nodes ([DirectoryNode], [FileNode], + /// [SymlinkNode]) in an ordered fashion, as long as the individual lists + /// are sorted (which can be checked by the [Directory::validate]). + pub fn nodes(&self) -> DirectoryNodesIterator { + return DirectoryNodesIterator { + i_directories: self.directories.iter().peekable(), + i_files: self.files.iter().peekable(), + i_symlinks: self.symlinks.iter().peekable(), + }; + } +} + +/// Struct to hold the state of an iterator over all nodes of a Directory. +/// +/// Internally, this keeps peekable Iterators over all three lists of a +/// Directory message. +pub struct DirectoryNodesIterator<'a> { + // directory: &Directory, + i_directories: Peekable<std::slice::Iter<'a, DirectoryNode>>, + i_files: Peekable<std::slice::Iter<'a, FileNode>>, + i_symlinks: Peekable<std::slice::Iter<'a, SymlinkNode>>, +} + +/// looks at two elements implementing NamedNode, and returns true if "left +/// is smaller / comes first". +/// +/// Some(_) is preferred over None. +fn left_name_lt_right<A: NamedNode, B: NamedNode>(left: Option<&A>, right: Option<&B>) -> bool { + match left { + // if left is None, right always wins + None => false, + Some(left_inner) => { + // left is Some. + match right { + // left is Some, right is None - left wins. + None => true, + Some(right_inner) => { + // both are Some - compare the name. + return left_inner.get_name() < right_inner.get_name(); + } + } + } + } +} + +impl Iterator for DirectoryNodesIterator<'_> { + type Item = node::Node; + + // next returns the next node in the Directory. + // we peek at all three internal iterators, and pick the one with the + // smallest name, to ensure lexicographical ordering. + // The individual lists are already known to be sorted. + fn next(&mut self) -> Option<Self::Item> { + if left_name_lt_right(self.i_directories.peek(), self.i_files.peek()) { + // i_directories is still in the game, compare with symlinks + if left_name_lt_right(self.i_directories.peek(), self.i_symlinks.peek()) { + self.i_directories + .next() + .cloned() + .map(node::Node::Directory) + } else { + self.i_symlinks.next().cloned().map(node::Node::Symlink) + } + } else { + // i_files is still in the game, compare with symlinks + if left_name_lt_right(self.i_files.peek(), self.i_symlinks.peek()) { + self.i_files.next().cloned().map(node::Node::File) + } else { + self.i_symlinks.next().cloned().map(node::Node::Symlink) + } + } + } +} diff --git a/tvix/store/src/proto/sync_read_into_async_read.rs b/tvix/store/src/proto/sync_read_into_async_read.rs new file mode 100644 index 000000000000..0a0ef019781c --- /dev/null +++ b/tvix/store/src/proto/sync_read_into_async_read.rs @@ -0,0 +1,158 @@ +use bytes::Buf; +use core::task::Poll::Ready; +use futures::ready; +use futures::Future; +use std::io; +use std::io::Read; +use std::pin::Pin; +use std::sync::Arc; +use std::task::Context; +use std::task::Poll; +use tokio::io::AsyncRead; +use tokio::runtime::Handle; +use tokio::sync::Mutex; +use tokio::task::JoinHandle; + +#[derive(Debug)] +enum State<Buf: bytes::Buf + bytes::BufMut> { + Idle(Option<Buf>), + Busy(JoinHandle<(io::Result<usize>, Buf)>), +} + +use State::{Busy, Idle}; + +/// Use a [`SyncReadIntoAsyncRead`] to asynchronously read from a +/// synchronous API. +#[derive(Debug)] +pub struct SyncReadIntoAsyncRead<R: Read + Send, Buf: bytes::Buf + bytes::BufMut> { + state: Mutex<State<Buf>>, + reader: Arc<Mutex<R>>, + rt: Handle, +} + +impl<R: Read + Send, Buf: bytes::Buf + bytes::BufMut> SyncReadIntoAsyncRead<R, Buf> { + /// This must be called from within a Tokio runtime context, or else it will panic. + #[track_caller] + pub fn new(rt: Handle, reader: R) -> Self { + Self { + rt, + state: State::Idle(None).into(), + reader: Arc::new(reader.into()), + } + } + + /// This must be called from within a Tokio runtime context, or else it will panic. + pub fn new_with_reader(readable: R) -> Self { + Self::new(Handle::current(), readable) + } +} + +/// Repeats operations that are interrupted. +macro_rules! uninterruptibly { + ($e:expr) => {{ + loop { + match $e { + Err(ref e) if e.kind() == io::ErrorKind::Interrupted => {} + res => break res, + } + } + }}; +} + +impl< + R: Read + Send + 'static + std::marker::Unpin, + Buf: bytes::Buf + bytes::BufMut + Send + Default + std::marker::Unpin + 'static, + > AsyncRead for SyncReadIntoAsyncRead<R, Buf> +{ + fn poll_read( + self: Pin<&mut Self>, + cx: &mut Context<'_>, + dst: &mut tokio::io::ReadBuf<'_>, + ) -> Poll<io::Result<()>> { + let me = self.get_mut(); + // Do we need this mutex? + let state = me.state.get_mut(); + + loop { + match state { + Idle(ref mut buf_cell) => { + let mut buf = buf_cell.take().unwrap_or_default(); + + if buf.has_remaining() { + // Here, we will split the `buf` into `[..dst.remaining()... ; rest ]` + // The `rest` is stuffed into the `buf_cell` for further poll_read. + // The other is completely consumed into the unfilled destination. + // `rest` can be empty. + let mut adjusted_src = + buf.copy_to_bytes(std::cmp::min(buf.remaining(), dst.remaining())); + let copied_size = adjusted_src.remaining(); + adjusted_src.copy_to_slice(dst.initialize_unfilled_to(copied_size)); + dst.set_filled(copied_size); + *buf_cell = Some(buf); + return Ready(Ok(())); + } + + let reader = me.reader.clone(); + *state = Busy(me.rt.spawn_blocking(move || { + let result = uninterruptibly!(reader.blocking_lock().read( + // SAFETY: `reader.read` will *ONLY* write initialized bytes + // and never *READ* uninitialized bytes + // inside this buffer. + // + // Furthermore, casting the slice as `*mut [u8]` + // is safe because it has the same layout. + // + // Finally, the pointer obtained is valid and owned + // by `buf` only as we have a valid mutable reference + // to it, it is valid for write. + // + // Here, we copy an nightly API: https://doc.rust-lang.org/stable/src/core/mem/maybe_uninit.rs.html#994-998 + unsafe { + &mut *(buf.chunk_mut().as_uninit_slice_mut() + as *mut [std::mem::MaybeUninit<u8>] + as *mut [u8]) + } + )); + + if let Ok(n) = result { + // SAFETY: given we initialize `n` bytes, we can move `n` bytes + // forward. + unsafe { + buf.advance_mut(n); + } + } + + (result, buf) + })); + } + Busy(ref mut rx) => { + let (result, mut buf) = ready!(Pin::new(rx).poll(cx))?; + + match result { + Ok(n) => { + if n > 0 { + let remaining = std::cmp::min(n, dst.remaining()); + let mut adjusted_src = buf.copy_to_bytes(remaining); + adjusted_src.copy_to_slice(dst.initialize_unfilled_to(remaining)); + dst.advance(remaining); + } + *state = Idle(Some(buf)); + return Ready(Ok(())); + } + Err(e) => { + *state = Idle(None); + return Ready(Err(e)); + } + } + } + } + } + } +} + +impl<R: Read + Send, Buf: bytes::Buf + bytes::BufMut> From<R> for SyncReadIntoAsyncRead<R, Buf> { + /// This must be called from within a Tokio runtime context, or else it will panic. + fn from(value: R) -> Self { + Self::new_with_reader(value) + } +} diff --git a/tvix/store/src/proto/tests/directory.rs b/tvix/store/src/proto/tests/directory.rs new file mode 100644 index 000000000000..8d6ca7241d7a --- /dev/null +++ b/tvix/store/src/proto/tests/directory.rs @@ -0,0 +1,289 @@ +use crate::{ + proto::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError}, + B3Digest, +}; +use lazy_static::lazy_static; + +lazy_static! { + static ref DUMMY_DIGEST: [u8; 32] = [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; +} +#[test] +fn size() { + { + let d = Directory::default(); + assert_eq!(d.size(), 0); + } + { + let d = Directory { + directories: vec![DirectoryNode { + name: String::from("foo"), + digest: DUMMY_DIGEST.to_vec(), + size: 0, + }], + ..Default::default() + }; + assert_eq!(d.size(), 1); + } + { + let d = Directory { + directories: vec![DirectoryNode { + name: String::from("foo"), + digest: DUMMY_DIGEST.to_vec(), + size: 4, + }], + ..Default::default() + }; + assert_eq!(d.size(), 5); + } + { + let d = Directory { + files: vec![FileNode { + name: String::from("foo"), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + executable: false, + }], + ..Default::default() + }; + assert_eq!(d.size(), 1); + } + { + let d = Directory { + symlinks: vec![SymlinkNode { + name: String::from("foo"), + target: String::from("bar"), + }], + ..Default::default() + }; + assert_eq!(d.size(), 1); + } +} + +#[test] +fn digest() { + let d = Directory::default(); + + assert_eq!( + d.digest(), + B3Digest::from_vec(vec![ + 0xaf, 0x13, 0x49, 0xb9, 0xf5, 0xf9, 0xa1, 0xa6, 0xa0, 0x40, 0x4d, 0xea, 0x36, 0xdc, + 0xc9, 0x49, 0x9b, 0xcb, 0x25, 0xc9, 0xad, 0xc1, 0x12, 0xb7, 0xcc, 0x9a, 0x93, 0xca, + 0xe4, 0x1f, 0x32, 0x62 + ]) + .unwrap() + ) +} + +#[test] +fn validate_empty() { + let d = Directory::default(); + assert_eq!(d.validate(), Ok(())); +} + +#[test] +fn validate_invalid_names() { + { + let d = Directory { + directories: vec![DirectoryNode { + name: "".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, "") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + directories: vec![DirectoryNode { + name: ".".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, ".") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + files: vec![FileNode { + name: "..".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + executable: false, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, "..") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + symlinks: vec![SymlinkNode { + name: "\x00".to_string(), + target: "foo".to_string(), + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, "\x00") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + symlinks: vec![SymlinkNode { + name: "foo/bar".to_string(), + target: "foo".to_string(), + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, "foo/bar") + } + _ => panic!("unexpected error"), + }; + } +} + +#[test] +fn validate_invalid_digest() { + let d = Directory { + directories: vec![DirectoryNode { + name: "foo".to_string(), + digest: vec![0x00, 0x42], // invalid length + size: 42, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidDigestLen(n) => { + assert_eq!(n, 2) + } + _ => panic!("unexpected error"), + } +} + +#[test] +fn validate_sorting() { + // "b" comes before "a", bad. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "b".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + DirectoryNode { + name: "a".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + ], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::WrongSorting(s) => { + assert_eq!(s, "a".to_string()); + } + _ => panic!("unexpected error"), + } + } + + // "a" exists twice, bad. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "a".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + DirectoryNode { + name: "a".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + ], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::DuplicateName(s) => { + assert_eq!(s, "a".to_string()); + } + _ => panic!("unexpected error"), + } + } + + // "a" comes before "b", all good. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "a".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + DirectoryNode { + name: "b".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + ], + ..Default::default() + }; + + d.validate().expect("validate shouldn't error"); + } + + // [b, c] and [a] are both properly sorted. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "b".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + DirectoryNode { + name: "c".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + ], + symlinks: vec![SymlinkNode { + name: "a".to_string(), + target: "foo".to_string(), + }], + ..Default::default() + }; + + d.validate().expect("validate shouldn't error"); + } +} diff --git a/tvix/store/src/proto/tests/directory_nodes_iterator.rs b/tvix/store/src/proto/tests/directory_nodes_iterator.rs new file mode 100644 index 000000000000..9a283f72bd45 --- /dev/null +++ b/tvix/store/src/proto/tests/directory_nodes_iterator.rs @@ -0,0 +1,80 @@ +use crate::proto::node::Node; +use crate::proto::Directory; +use crate::proto::DirectoryNode; +use crate::proto::FileNode; +use crate::proto::SymlinkNode; + +#[test] +fn iterator() { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "c".to_string(), + ..DirectoryNode::default() + }, + DirectoryNode { + name: "d".to_string(), + ..DirectoryNode::default() + }, + DirectoryNode { + name: "h".to_string(), + ..DirectoryNode::default() + }, + DirectoryNode { + name: "l".to_string(), + ..DirectoryNode::default() + }, + ], + files: vec![ + FileNode { + name: "b".to_string(), + ..FileNode::default() + }, + FileNode { + name: "e".to_string(), + ..FileNode::default() + }, + FileNode { + name: "g".to_string(), + ..FileNode::default() + }, + FileNode { + name: "j".to_string(), + ..FileNode::default() + }, + ], + symlinks: vec![ + SymlinkNode { + name: "a".to_string(), + ..SymlinkNode::default() + }, + SymlinkNode { + name: "f".to_string(), + ..SymlinkNode::default() + }, + SymlinkNode { + name: "i".to_string(), + ..SymlinkNode::default() + }, + SymlinkNode { + name: "k".to_string(), + ..SymlinkNode::default() + }, + ], + }; + + let mut node_names: Vec<String> = vec![]; + + for node in d.nodes() { + match node { + Node::Directory(n) => node_names.push(n.name), + Node::File(n) => node_names.push(n.name), + Node::Symlink(n) => node_names.push(n.name), + }; + } + + assert_eq!( + vec!["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l"], + node_names + ); +} diff --git a/tvix/store/src/proto/tests/grpc_blobservice.rs b/tvix/store/src/proto/tests/grpc_blobservice.rs new file mode 100644 index 000000000000..02e04e7d723f --- /dev/null +++ b/tvix/store/src/proto/tests/grpc_blobservice.rs @@ -0,0 +1,102 @@ +use crate::blobservice::BlobService; +use crate::proto::blob_service_server::BlobService as GRPCBlobService; +use crate::proto::{BlobChunk, GRPCBlobServiceWrapper, ReadBlobRequest, StatBlobRequest}; +use crate::tests::fixtures::{BLOB_A, BLOB_A_DIGEST}; +use crate::tests::utils::gen_blob_service; +use tokio_stream::StreamExt; + +fn gen_grpc_blob_service( +) -> GRPCBlobServiceWrapper<impl BlobService + Send + Sync + Clone + 'static> { + let blob_service = gen_blob_service(); + GRPCBlobServiceWrapper::from(blob_service) +} + +/// Trying to read a non-existent blob should return a not found error. +#[tokio::test] +async fn not_found_read() { + let service = gen_grpc_blob_service(); + + let resp = service + .read(tonic::Request::new(ReadBlobRequest { + digest: BLOB_A_DIGEST.to_vec(), + })) + .await; + + // We can't use unwrap_err here, because the Ok value doesn't implement + // debug. + if let Err(e) = resp { + assert_eq!(e.code(), tonic::Code::NotFound); + } else { + panic!("resp is not err") + } +} + +/// Trying to stat a non-existent blob should return a not found error. +#[tokio::test] +async fn not_found_stat() { + let service = gen_grpc_blob_service(); + + let resp = service + .stat(tonic::Request::new(StatBlobRequest { + digest: BLOB_A_DIGEST.to_vec(), + ..Default::default() + })) + .await + .expect_err("must fail"); + + // The resp should be a status with Code::NotFound + assert_eq!(resp.code(), tonic::Code::NotFound); +} + +/// Put a blob in the store, get it back. +#[tokio::test] +async fn put_read_stat() { + let service = gen_grpc_blob_service(); + + // Send blob A. + let put_resp = service + .put(tonic_mock::streaming_request(vec![BlobChunk { + data: BLOB_A.clone(), + }])) + .await + .expect("must succeed") + .into_inner(); + + assert_eq!(BLOB_A_DIGEST.to_vec(), put_resp.digest); + + // Stat for the digest of A. + // We currently don't ask for more granular chunking data, as we don't + // expose it yet. + let _resp = service + .stat(tonic::Request::new(StatBlobRequest { + digest: BLOB_A_DIGEST.to_vec(), + ..Default::default() + })) + .await + .expect("must succeed") + .into_inner(); + + // Read the blob. It should return the same data. + let resp = service + .read(tonic::Request::new(ReadBlobRequest { + digest: BLOB_A_DIGEST.to_vec(), + })) + .await; + + let mut rx = resp.ok().unwrap().into_inner(); + + // the stream should contain one element, a BlobChunk with the same contents as BLOB_A. + let item = rx + .next() + .await + .expect("must be some") + .expect("must succeed"); + + assert_eq!(BLOB_A.to_vec(), item.data); + + // … and no more elements + assert!(rx.next().await.is_none()); + + // TODO: we rely here on the blob being small enough to not get broken up into multiple chunks. + // Test with some bigger blob too +} diff --git a/tvix/store/src/proto/tests/grpc_directoryservice.rs b/tvix/store/src/proto/tests/grpc_directoryservice.rs new file mode 100644 index 000000000000..069e82f6463e --- /dev/null +++ b/tvix/store/src/proto/tests/grpc_directoryservice.rs @@ -0,0 +1,241 @@ +use crate::directoryservice::DirectoryService; +use crate::proto::directory_service_server::DirectoryService as GRPCDirectoryService; +use crate::proto::get_directory_request::ByWhat; +use crate::proto::{Directory, DirectoryNode, SymlinkNode}; +use crate::proto::{GRPCDirectoryServiceWrapper, GetDirectoryRequest}; +use crate::tests::fixtures::{DIRECTORY_A, DIRECTORY_B, DIRECTORY_C}; +use crate::tests::utils::gen_directory_service; +use tokio_stream::StreamExt; +use tonic::Status; + +fn gen_grpc_service( +) -> GRPCDirectoryServiceWrapper<impl DirectoryService + Send + Sync + Clone + 'static> { + let directory_service = gen_directory_service(); + GRPCDirectoryServiceWrapper::from(directory_service) +} + +/// Send the specified GetDirectoryRequest. +/// Returns an error in the case of an error response, or an error in one of +// the items in the stream, or a Vec<Directory> in the case of a successful +/// request. +async fn get_directories<S: GRPCDirectoryService>( + svc: &S, + get_directory_request: GetDirectoryRequest, +) -> Result<Vec<Directory>, Status> { + let resp = svc.get(tonic::Request::new(get_directory_request)).await; + + // if the response is an error itself, return the error, otherwise unpack + let stream = match resp { + Ok(resp) => resp, + Err(status) => return Err(status), + } + .into_inner(); + + let directory_results: Vec<Result<Directory, Status>> = stream.collect().await; + + // turn Vec<Result<Directory, Status> into Result<Vec<Directory>,Status> + directory_results.into_iter().collect() +} + +/// Trying to get a non-existent Directory should return a not found error. +#[tokio::test] +async fn not_found() { + let service = gen_grpc_service(); + + let resp = service + .get(tonic::Request::new(GetDirectoryRequest { + by_what: Some(ByWhat::Digest(DIRECTORY_A.digest().to_vec())), + ..Default::default() + })) + .await; + + let mut rx = resp.expect("must succeed").into_inner().into_inner(); + + // The stream should contain one element, an error with Code::NotFound. + let item = rx + .recv() + .await + .expect("must be some") + .expect_err("must be err"); + assert_eq!(item.code(), tonic::Code::NotFound); + + // … and nothing else + assert!(rx.recv().await.is_none()); +} + +/// Put a Directory into the store, get it back. +#[tokio::test] +async fn put_get() { + let service = gen_grpc_service(); + + let streaming_request = tonic_mock::streaming_request(vec![DIRECTORY_A.clone()]); + let put_resp = service + .put(streaming_request) + .await + .expect("must succeed") + .into_inner(); + + // the sent root_digest should match the calculated digest + assert_eq!(put_resp.root_digest, DIRECTORY_A.digest().to_vec()); + + // get it back + let items = get_directories( + &service, + GetDirectoryRequest { + by_what: Some(ByWhat::Digest(DIRECTORY_A.digest().to_vec())), + ..Default::default() + }, + ) + .await + .expect("must not error"); + + assert_eq!(vec![DIRECTORY_A.clone()], items); +} + +/// Put multiple Directories into the store, and get them back +#[tokio::test] +async fn put_get_multiple() { + let service = gen_grpc_service(); + + // sending "b" (which refers to "a") without sending "a" first should fail. + let put_resp = service + .put(tonic_mock::streaming_request(vec![DIRECTORY_B.clone()])) + .await + .expect_err("must fail"); + + assert_eq!(tonic::Code::InvalidArgument, put_resp.code()); + + // sending "a", then "b" should succeed, and the response should contain the digest of b. + let put_resp = service + .put(tonic_mock::streaming_request(vec![ + DIRECTORY_A.clone(), + DIRECTORY_B.clone(), + ])) + .await + .expect("must succeed"); + + assert_eq!( + DIRECTORY_B.digest().to_vec(), + put_resp.into_inner().root_digest + ); + + // now, request b, first in non-recursive mode. + let items = get_directories( + &service, + GetDirectoryRequest { + recursive: false, + by_what: Some(ByWhat::Digest(DIRECTORY_B.digest().to_vec())), + }, + ) + .await + .expect("must not error"); + + // We expect to only get b. + assert_eq!(vec![DIRECTORY_B.clone()], items); + + // now, request b, but in recursive mode. + let items = get_directories( + &service, + GetDirectoryRequest { + recursive: true, + by_what: Some(ByWhat::Digest(DIRECTORY_B.digest().to_vec())), + }, + ) + .await + .expect("must not error"); + + // We expect to get b, and then a, because that's how we traverse down. + assert_eq!(vec![DIRECTORY_B.clone(), DIRECTORY_A.clone()], items); +} + +/// Put multiple Directories into the store, and omit duplicates. +#[tokio::test] +async fn put_get_dedup() { + let service = gen_grpc_service(); + + // Send "A", then "C", which refers to "A" two times + // Pretend we're a dumb client sending A twice. + let put_resp = service + .put(tonic_mock::streaming_request(vec![ + DIRECTORY_A.clone(), + DIRECTORY_A.clone(), + DIRECTORY_C.clone(), + ])) + .await + .expect("must succeed"); + + assert_eq!( + DIRECTORY_C.digest().to_vec(), + put_resp.into_inner().root_digest + ); + + // Ask for "C" recursively. We expect to only get "A" once, as there's no point sending it twice. + let items = get_directories( + &service, + GetDirectoryRequest { + recursive: true, + by_what: Some(ByWhat::Digest(DIRECTORY_C.digest().to_vec())), + }, + ) + .await + .expect("must not error"); + + // We expect to get C, and then A (once, as the second A has been deduplicated). + assert_eq!(vec![DIRECTORY_C.clone(), DIRECTORY_A.clone()], items); +} + +/// Trying to upload a Directory failing validation should fail. +#[tokio::test] +async fn put_reject_failed_validation() { + let service = gen_grpc_service(); + + // construct a broken Directory message that fails validation + let broken_directory = Directory { + symlinks: vec![SymlinkNode { + name: "".to_string(), + target: "doesntmatter".to_string(), + }], + ..Default::default() + }; + assert!(broken_directory.validate().is_err()); + + // send it over, it must fail + let put_resp = service + .put(tonic_mock::streaming_request(vec![broken_directory])) + .await + .expect_err("must fail"); + + assert_eq!(put_resp.code(), tonic::Code::InvalidArgument); +} + +/// Trying to upload a Directory with wrong size should fail. +#[tokio::test] +async fn put_reject_wrong_size() { + let service = gen_grpc_service(); + + // Construct a directory referring to DIRECTORY_A, but with wrong size. + let broken_parent_directory = Directory { + directories: vec![DirectoryNode { + name: "foo".to_string(), + digest: DIRECTORY_A.digest().to_vec(), + size: 42, + }], + ..Default::default() + }; + // Make sure we got the size wrong. + assert_ne!( + broken_parent_directory.directories[0].size, + DIRECTORY_A.size() + ); + + // now upload both (first A, then the broken parent). This must fail. + let put_resp = service + .put(tonic_mock::streaming_request(vec![ + DIRECTORY_A.clone(), + broken_parent_directory, + ])) + .await + .expect_err("must fail"); + + assert_eq!(put_resp.code(), tonic::Code::InvalidArgument); +} diff --git a/tvix/store/src/proto/tests/grpc_pathinfoservice.rs b/tvix/store/src/proto/tests/grpc_pathinfoservice.rs new file mode 100644 index 000000000000..11cab2c264cc --- /dev/null +++ b/tvix/store/src/proto/tests/grpc_pathinfoservice.rs @@ -0,0 +1,67 @@ +use crate::nar::NonCachingNARCalculationService; +use crate::proto::get_path_info_request::ByWhat::ByOutputHash; +use crate::proto::node::Node::Symlink; +use crate::proto::path_info_service_server::PathInfoService as GRPCPathInfoService; +use crate::proto::GRPCPathInfoServiceWrapper; +use crate::proto::PathInfo; +use crate::proto::{GetPathInfoRequest, Node, SymlinkNode}; +use crate::tests::fixtures::DUMMY_OUTPUT_HASH; +use crate::tests::utils::{gen_blob_service, gen_directory_service, gen_pathinfo_service}; +use tonic::Request; + +/// generates a GRPCPathInfoService out of blob, directory and pathinfo services. +/// +/// We only interact with it via the PathInfo GRPC interface. +/// It uses the NonCachingNARCalculationService NARCalculationService to +/// calculate NARs. +fn gen_grpc_service() -> impl GRPCPathInfoService { + GRPCPathInfoServiceWrapper::new( + gen_pathinfo_service(), + NonCachingNARCalculationService::new(gen_blob_service(), gen_directory_service()), + ) +} + +/// Trying to get a non-existent PathInfo should return a not found error. +#[tokio::test] +async fn not_found() { + let service = gen_grpc_service(); + + let resp = service + .get(Request::new(GetPathInfoRequest { + by_what: Some(ByOutputHash(DUMMY_OUTPUT_HASH.to_vec())), + })) + .await; + + let resp = resp.expect_err("must fail"); + assert_eq!(resp.code(), tonic::Code::NotFound); +} + +/// Put a PathInfo into the store, get it back. +#[tokio::test] +async fn put_get() { + let service = gen_grpc_service(); + + let path_info = PathInfo { + node: Some(Node { + node: Some(Symlink(SymlinkNode { + name: "00000000000000000000000000000000-foo".to_string(), + target: "doesntmatter".to_string(), + })), + }), + ..Default::default() + }; + + let resp = service.put(Request::new(path_info.clone())).await; + + assert!(resp.is_ok()); + assert_eq!(resp.expect("must succeed").into_inner(), path_info); + + let resp = service + .get(Request::new(GetPathInfoRequest { + by_what: Some(ByOutputHash(DUMMY_OUTPUT_HASH.to_vec())), + })) + .await; + + assert!(resp.is_ok()); + assert_eq!(resp.expect("must succeed").into_inner(), path_info); +} diff --git a/tvix/store/src/proto/tests/mod.rs b/tvix/store/src/proto/tests/mod.rs new file mode 100644 index 000000000000..0a96ea3a0d59 --- /dev/null +++ b/tvix/store/src/proto/tests/mod.rs @@ -0,0 +1,6 @@ +mod directory; +mod directory_nodes_iterator; +mod grpc_blobservice; +mod grpc_directoryservice; +mod grpc_pathinfoservice; +mod pathinfo; diff --git a/tvix/store/src/proto/tests/pathinfo.rs b/tvix/store/src/proto/tests/pathinfo.rs new file mode 100644 index 000000000000..54a76fc6c554 --- /dev/null +++ b/tvix/store/src/proto/tests/pathinfo.rs @@ -0,0 +1,207 @@ +use crate::proto::{self, Node, PathInfo, ValidatePathInfoError}; +use lazy_static::lazy_static; +use nix_compat::store_path::{self, StorePath}; +use test_case::test_case; + +lazy_static! { + static ref DUMMY_DIGEST: Vec<u8> = vec![ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; + static ref DUMMY_DIGEST_2: Vec<u8> = vec![ + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, + ]; +} + +const DUMMY_NAME: &str = "00000000000000000000000000000000-dummy"; + +#[test_case( + None, + Err(ValidatePathInfoError::NoNodePresent()) ; + "No node" +)] +#[test_case( + Some(Node { node: None }), + Err(ValidatePathInfoError::NoNodePresent()); + "No node 2" +)] +fn validate_no_node( + t_node: Option<proto::Node>, + t_result: Result<StorePath, ValidatePathInfoError>, +) { + // construct the PathInfo object + let p = PathInfo { + node: t_node, + ..Default::default() + }; + assert_eq!(t_result, p.validate()); +} + +#[test_case( + proto::DirectoryNode { + name: DUMMY_NAME.to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 0, + }, + Ok(StorePath::from_string(DUMMY_NAME).expect("must succeed")); + "ok" +)] +#[test_case( + proto::DirectoryNode { + name: DUMMY_NAME.to_string(), + digest: vec![], + size: 0, + }, + Err(ValidatePathInfoError::InvalidDigestLen(0)); + "invalid digest length" +)] +#[test_case( + proto::DirectoryNode { + name: "invalid".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 0, + }, + Err(ValidatePathInfoError::InvalidNodeName( + "invalid".to_string(), + store_path::Error::InvalidName(store_path::NameError::InvalidName("".to_string())) + )); + "invalid node name" +)] +fn validate_directory( + t_directory_node: proto::DirectoryNode, + t_result: Result<StorePath, ValidatePathInfoError>, +) { + // construct the PathInfo object + let p = PathInfo { + node: Some(Node { + node: Some(proto::node::Node::Directory(t_directory_node)), + }), + ..Default::default() + }; + assert_eq!(t_result, p.validate()); +} + +#[test_case( + proto::FileNode { + name: DUMMY_NAME.to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 0, + executable: false, + }, + Ok(StorePath::from_string(DUMMY_NAME).expect("must succeed")); + "ok" +)] +#[test_case( + proto::FileNode { + name: DUMMY_NAME.to_string(), + digest: vec![], + ..Default::default() + }, + Err(ValidatePathInfoError::InvalidDigestLen(0)); + "invalid digest length" +)] +#[test_case( + proto::FileNode { + name: "invalid".to_string(), + digest: DUMMY_DIGEST.to_vec(), + ..Default::default() + }, + Err(ValidatePathInfoError::InvalidNodeName( + "invalid".to_string(), + store_path::Error::InvalidName(store_path::NameError::InvalidName("".to_string())) + )); + "invalid node name" +)] +fn validate_file(t_file_node: proto::FileNode, t_result: Result<StorePath, ValidatePathInfoError>) { + // construct the PathInfo object + let p = PathInfo { + node: Some(Node { + node: Some(proto::node::Node::File(t_file_node)), + }), + ..Default::default() + }; + assert_eq!(t_result, p.validate()); +} + +#[test_case( + proto::SymlinkNode { + name: DUMMY_NAME.to_string(), + ..Default::default() + }, + Ok(StorePath::from_string(DUMMY_NAME).expect("must succeed")); + "ok" +)] +#[test_case( + proto::SymlinkNode { + name: "invalid".to_string(), + ..Default::default() + }, + Err(ValidatePathInfoError::InvalidNodeName( + "invalid".to_string(), + store_path::Error::InvalidName(store_path::NameError::InvalidName("".to_string())) + )); + "invalid node name" +)] +fn validate_symlink( + t_symlink_node: proto::SymlinkNode, + t_result: Result<StorePath, ValidatePathInfoError>, +) { + // construct the PathInfo object + let p = PathInfo { + node: Some(Node { + node: Some(proto::node::Node::Symlink(t_symlink_node)), + }), + ..Default::default() + }; + assert_eq!(t_result, p.validate()); +} + +#[test] +fn validate_references() { + // create a PathInfo without narinfo field. + let path_info = PathInfo { + node: Some(Node { + node: Some(proto::node::Node::Directory(proto::DirectoryNode { + name: DUMMY_NAME.to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 0, + })), + }), + references: vec![DUMMY_DIGEST_2.to_vec()], + narinfo: None, + }; + assert!(path_info.validate().is_ok()); + + // create a PathInfo with a narinfo field, but an inconsistent set of references + let path_info_with_narinfo_missing_refs = PathInfo { + narinfo: Some(proto::NarInfo { + nar_size: 0, + nar_sha256: DUMMY_DIGEST.to_vec(), + signatures: vec![], + reference_names: vec![], + }), + ..path_info.clone() + }; + match path_info_with_narinfo_missing_refs + .validate() + .expect_err("must_fail") + { + ValidatePathInfoError::InconsistentNumberOfReferences(_, _) => {} + _ => panic!("unexpected error"), + }; + + // create a pathinfo with the correct number of references, should suceed + let path_info_with_narinfo = PathInfo { + narinfo: Some(proto::NarInfo { + nar_size: 0, + nar_sha256: DUMMY_DIGEST.to_vec(), + signatures: vec![], + reference_names: vec![format!("/nix/store/{}", DUMMY_NAME)], + }), + ..path_info + }; + assert!(path_info_with_narinfo.validate().is_ok()); +} |