From bb7c76739a30d2f312693799d8237eb0eb2da28d Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Fri, 9 Jun 2023 23:35:46 +0300 Subject: feat(tvix/store/directorysvc): add from_addr Add --directory-service-addr arg to tvix-store CLI. Change-Id: Iea1e6f08f27f7157b21ccf397297c68358bd78a0 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8743 Tested-by: BuildkiteCI Autosubmit: flokli Reviewed-by: tazjin --- tvix/store/src/bin/tvix-store.rs | 42 +++++------ tvix/store/src/directoryservice/from_addr.rs | 37 ++++++++++ tvix/store/src/directoryservice/grpc.rs | 56 ++++++++++++++- tvix/store/src/directoryservice/memory.rs | 61 ++++++++++++++++ tvix/store/src/directoryservice/mod.rs | 8 +++ tvix/store/src/directoryservice/sled.rs | 104 +++++++++++++++++++++++++++ 6 files changed, 287 insertions(+), 21 deletions(-) create mode 100644 tvix/store/src/directoryservice/from_addr.rs diff --git a/tvix/store/src/bin/tvix-store.rs b/tvix/store/src/bin/tvix-store.rs index 6e2424a7e578..c8d361212e5e 100644 --- a/tvix/store/src/bin/tvix-store.rs +++ b/tvix/store/src/bin/tvix-store.rs @@ -7,14 +7,11 @@ use std::path::PathBuf; use std::sync::Arc; use tracing_subscriber::prelude::*; use tvix_store::blobservice; -use tvix_store::directoryservice::DirectoryService; -use tvix_store::directoryservice::GRPCDirectoryService; -use tvix_store::directoryservice::SledDirectoryService; +use tvix_store::directoryservice; use tvix_store::pathinfoservice::GRPCPathInfoService; use tvix_store::pathinfoservice::PathInfoService; use tvix_store::pathinfoservice::SledPathInfoService; use tvix_store::proto::blob_service_server::BlobServiceServer; -use tvix_store::proto::directory_service_client::DirectoryServiceClient; use tvix_store::proto::directory_service_server::DirectoryServiceServer; use tvix_store::proto::node::Node; use tvix_store::proto::path_info_service_client::PathInfoServiceClient; @@ -55,6 +52,13 @@ enum Commands { #[arg(long, env, default_value = "sled:///var/lib/tvix-store/blobs.sled")] blob_service_addr: String, + + #[arg( + long, + env, + default_value = "sled:///var/lib/tvix-store/directories.sled" + )] + directory_service_addr: String, }, /// Imports a list of paths into the store (not using the daemon) Import { @@ -63,6 +67,9 @@ enum Commands { #[arg(long, env, default_value = "grpc+http://[::1]:8000")] blob_service_addr: String, + + #[arg(long, env, default_value = "grpc+http://[::1]:8000")] + directory_service_addr: String, }, /// Mounts a tvix-store at the given mountpoint #[cfg(feature = "fuse")] @@ -72,6 +79,9 @@ enum Commands { #[arg(long, env, default_value = "grpc+http://[::1]:8000")] blob_service_addr: String, + + #[arg(long, env, default_value = "grpc+http://[::1]:8000")] + directory_service_addr: String, }, } @@ -108,11 +118,11 @@ async fn main() -> Result<(), Box> { Commands::Daemon { listen_address, blob_service_addr, + directory_service_addr, } => { // initialize stores let blob_service = blobservice::from_addr(&blob_service_addr).await?; - let directory_service: Arc = - Arc::new(SledDirectoryService::new("directories.sled".into())?); + let directory_service = directoryservice::from_addr(&directory_service_addr)?; let path_info_service: Arc = Arc::new(SledPathInfoService::new( "pathinfo.sled".into(), blob_service.clone(), @@ -153,12 +163,10 @@ async fn main() -> Result<(), Box> { Commands::Import { paths, blob_service_addr, + directory_service_addr, } => { let blob_service = blobservice::from_addr(&blob_service_addr).await?; - - let directory_service = GRPCDirectoryService::from_client( - DirectoryServiceClient::connect("http://[::1]:8000").await?, - ); + let directory_service = directoryservice::from_addr(&directory_service_addr)?; let path_info_service_client = PathInfoServiceClient::connect("http://[::1]:8000").await?; let path_info_service = @@ -166,7 +174,7 @@ async fn main() -> Result<(), Box> { let io = Arc::new(TvixStoreIO::new( blob_service, - Arc::new(directory_service), + directory_service, Arc::new(path_info_service), )); @@ -191,23 +199,17 @@ async fn main() -> Result<(), Box> { Commands::Mount { dest, blob_service_addr, + directory_service_addr, } => { let blob_service = blobservice::from_addr(&blob_service_addr).await?; - - let directory_service = GRPCDirectoryService::from_client( - DirectoryServiceClient::connect("http://[::1]:8000").await?, - ); + let directory_service = directoryservice::from_addr(&directory_service_addr)?; let path_info_service_client = PathInfoServiceClient::connect("http://[::1]:8000").await?; let path_info_service = GRPCPathInfoService::from_client(path_info_service_client.clone()); tokio::task::spawn_blocking(move || { - let f = FUSE::new( - blob_service, - Arc::new(directory_service), - Arc::new(path_info_service), - ); + let f = FUSE::new(blob_service, directory_service, Arc::new(path_info_service)); fuser::mount2(f, &dest, &[]) }) .await?? diff --git a/tvix/store/src/directoryservice/from_addr.rs b/tvix/store/src/directoryservice/from_addr.rs new file mode 100644 index 000000000000..a3e2521e98e0 --- /dev/null +++ b/tvix/store/src/directoryservice/from_addr.rs @@ -0,0 +1,37 @@ +use std::sync::Arc; +use url::Url; + +use super::{DirectoryService, GRPCDirectoryService, MemoryDirectoryService, SledDirectoryService}; + +/// Constructs a new instance of a [DirectoryService] from an URI. +/// +/// The following URIs are supported: +/// - `memory:` +/// Uses a in-memory implementation. +/// - `sled:` +/// Uses a in-memory sled implementation. +/// - `sled:///absolute/path/to/somewhere` +/// Uses sled, using a path on the disk for persistency. Can be only opened +/// from one process at the same time. +/// - `grpc+unix:///absolute/path/to/somewhere` +/// Connects to a local tvix-store gRPC service via Unix socket. +/// - `grpc+http://host:port`, `grpc+https://host:port` +/// Connects to a (remote) tvix-store gRPC service. +pub fn from_addr(uri: &str) -> Result, crate::Error> { + let url = Url::parse(uri).map_err(|e| { + crate::Error::StorageError(format!("unable to parse url: {}", e.to_string())) + })?; + + Ok(if url.scheme() == "memory" { + Arc::new(MemoryDirectoryService::from_url(&url)?) + } else if url.scheme() == "sled" { + Arc::new(SledDirectoryService::from_url(&url)?) + } else if url.scheme().starts_with("grpc+") { + Arc::new(GRPCDirectoryService::from_url(&url)?) + } else { + Err(crate::Error::StorageError(format!( + "unknown scheme: {}", + url.scheme() + )))? + }) +} diff --git a/tvix/store/src/directoryservice/grpc.rs b/tvix/store/src/directoryservice/grpc.rs index 913dd5bad93f..cff3077c7e08 100644 --- a/tvix/store/src/directoryservice/grpc.rs +++ b/tvix/store/src/directoryservice/grpc.rs @@ -3,6 +3,7 @@ use std::collections::HashSet; use super::{DirectoryPutter, DirectoryService}; use crate::proto::{self, get_directory_request::ByWhat}; use crate::{B3Digest, Error}; +use tokio::net::UnixStream; use tokio::sync::mpsc::UnboundedSender; use tokio_stream::wrappers::UnboundedReceiverStream; use tonic::{transport::Channel, Status}; @@ -21,7 +22,7 @@ pub struct GRPCDirectoryService { } impl GRPCDirectoryService { - /// construct a [GRPCDirectoryService] from a [proto::blob_service_client::BlobServiceClient]. + /// construct a [GRPCDirectoryService] from a [proto::directory_service_client::DirectoryServiceClient]. /// panics if called outside the context of a tokio runtime. pub fn from_client( grpc_client: proto::directory_service_client::DirectoryServiceClient, @@ -34,6 +35,59 @@ impl GRPCDirectoryService { } impl DirectoryService for GRPCDirectoryService { + /// Constructs a [GRPCDirectoryService] from the passed [url::Url]: + /// - scheme has to match `grpc+*://`. + /// That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts. + /// - In the case of unix sockets, there must be a path, but may not be a host. + /// - In the case of non-unix sockets, there must be a host, but no path. + fn from_url(url: &url::Url) -> Result { + // Start checking for the scheme to start with grpc+. + match url.scheme().strip_prefix("grpc+") { + None => Err(crate::Error::StorageError("invalid scheme".to_string())), + Some(rest) => { + if rest == "unix" { + if url.host_str().is_some() { + return Err(crate::Error::StorageError( + "host may not be set".to_string(), + )); + } + let path = url.path().to_string(); + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051") // doesn't matter + .unwrap() + .connect_with_connector_lazy(tower::service_fn( + move |_: tonic::transport::Uri| UnixStream::connect(path.clone()), + )); + let grpc_client = + proto::directory_service_client::DirectoryServiceClient::new(channel); + Ok(Self::from_client(grpc_client)) + } else { + // ensure path is empty, not supported with gRPC. + if !url.path().is_empty() { + return Err(crate::Error::StorageError( + "path may not be set".to_string(), + )); + } + + // clone the uri, and drop the grpc+ from the scheme. + // Recreate a new uri with the `grpc+` prefix dropped from the scheme. + // We can't use `url.set_scheme(rest)`, as it disallows + // setting something http(s) that previously wasn't. + let url = { + let url_str = url.to_string(); + let s_stripped = url_str.strip_prefix("grpc+").unwrap(); + url::Url::parse(s_stripped).unwrap() + }; + let channel = tonic::transport::Endpoint::try_from(url.to_string()) + .unwrap() + .connect_lazy(); + + let grpc_client = + proto::directory_service_client::DirectoryServiceClient::new(channel); + Ok(Self::from_client(grpc_client)) + } + } + } + } fn get(&self, digest: &B3Digest) -> Result, crate::Error> { // Get a new handle to the gRPC client, and copy the digest. let mut grpc_client = self.grpc_client.clone(); diff --git a/tvix/store/src/directoryservice/memory.rs b/tvix/store/src/directoryservice/memory.rs index 1f203834a7cf..634dbf9922d0 100644 --- a/tvix/store/src/directoryservice/memory.rs +++ b/tvix/store/src/directoryservice/memory.rs @@ -12,6 +12,21 @@ pub struct MemoryDirectoryService { } impl DirectoryService for MemoryDirectoryService { + /// Constructs a [MemoryDirectoryService] from the passed [url::Url]: + /// - scheme has to be `memory://` + /// - there may not be a host. + /// - there may not be a path. + fn from_url(url: &url::Url) -> Result { + if url.scheme() != "memory" { + return Err(crate::Error::StorageError("invalid scheme".to_string())); + } + + if url.has_host() || !url.path().is_empty() { + return Err(crate::Error::StorageError("invalid url".to_string())); + } + + Ok(Self::default()) + } #[instrument(skip(self, digest), fields(directory.digest = %digest))] fn get(&self, digest: &B3Digest) -> Result, Error> { let db = self.db.read()?; @@ -84,3 +99,49 @@ impl DirectoryService for MemoryDirectoryService { Box::new(SimplePutter::new(self.clone())) } } + +#[cfg(test)] +mod tests { + use super::DirectoryService; + use super::MemoryDirectoryService; + + /// This uses a wrong scheme. + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_err()); + } + + /// This correctly sets the scheme, and doesn't set a path. + #[test] + fn test_valid_scheme() { + let url = url::Url::parse("memory://").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_ok()); + } + + /// This sets the host to `foo` + #[test] + fn test_invalid_host() { + let url = url::Url::parse("memory://foo").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_err()); + } + + /// This has the path "/", which is invalid. + #[test] + fn test_invalid_has_path() { + let url = url::Url::parse("memory:///").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_err()); + } + + /// This has the path "/foo", which is invalid. + #[test] + fn test_invalid_path2() { + let url = url::Url::parse("memory:///foo").expect("must parse"); + + assert!(MemoryDirectoryService::from_url(&url).is_err()); + } +} diff --git a/tvix/store/src/directoryservice/mod.rs b/tvix/store/src/directoryservice/mod.rs index 6589a5b62599..c99848c551fd 100644 --- a/tvix/store/src/directoryservice/mod.rs +++ b/tvix/store/src/directoryservice/mod.rs @@ -1,10 +1,13 @@ use crate::{proto, B3Digest, Error}; + +mod from_addr; mod grpc; mod memory; mod sled; mod traverse; mod utils; +pub use self::from_addr::from_addr; pub use self::grpc::GRPCDirectoryService; pub use self::memory::MemoryDirectoryService; pub use self::sled::SledDirectoryService; @@ -15,6 +18,11 @@ pub use self::utils::DirectoryTraverser; /// This is a simple get and put of [crate::proto::Directory], returning their /// digest. pub trait DirectoryService: Send + Sync { + /// Create a new instance by passing in a connection URL. + fn from_url(url: &url::Url) -> Result + where + Self: Sized; + /// Get looks up a single Directory message by its digest. /// In case the directory is not found, Ok(None) is returned. fn get(&self, digest: &B3Digest) -> Result, Error>; diff --git a/tvix/store/src/directoryservice/sled.rs b/tvix/store/src/directoryservice/sled.rs index 8ed2c59c2d0f..6dc09ed646c3 100644 --- a/tvix/store/src/directoryservice/sled.rs +++ b/tvix/store/src/directoryservice/sled.rs @@ -30,6 +30,36 @@ impl SledDirectoryService { } impl DirectoryService for SledDirectoryService { + /// Constructs a [SledDirectoryService] from the passed [url::Url]: + /// - scheme has to be `sled://` + /// - there may not be a host. + /// - a path to the sled needs to be provided (which may not be `/`). + fn from_url(url: &url::Url) -> Result { + if url.scheme() != "sled" { + return Err(crate::Error::StorageError("invalid scheme".to_string())); + } + + if url.has_host() { + return Err(crate::Error::StorageError(format!( + "invalid host: {}", + url.host().unwrap() + ))); + } + + // TODO: expose compression and other parameters as URL parameters, drop new and new_temporary? + if url.path().is_empty() { + Self::new_temporary().map_err(|e| Error::StorageError(e.to_string())) + } else { + if url.path() == "/" { + Err(crate::Error::StorageError( + "cowardly refusing to open / with sled".to_string(), + )) + } else { + Self::new(url.path().into()).map_err(|e| Error::StorageError(e.to_string())) + } + } + } + #[instrument(skip(self, digest), fields(directory.digest = %digest))] fn get(&self, digest: &B3Digest) -> Result, Error> { match self.db.get(digest.to_vec()) { @@ -109,3 +139,77 @@ impl DirectoryService for SledDirectoryService { Box::new(SimplePutter::new(self.clone())) } } + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use super::DirectoryService; + use super::SledDirectoryService; + + /// This uses a wrong scheme. + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } + + /// This uses the correct scheme, and doesn't specify a path (temporary sled). + #[test] + fn test_valid_scheme_temporary() { + let url = url::Url::parse("sled://").expect("must parse"); + + assert!(SledDirectoryService::from_url(&url).is_ok()); + } + + /// This sets the path to a location that doesn't exist, which should fail (as sled doesn't mkdir -p) + #[test] + fn test_nonexistent_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://foo.example").expect("must parse"); + url.set_path(tmpdir.path().join("foo").join("bar").to_str().unwrap()); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } + + /// This uses the correct scheme, and specifies / as path (which should fail + // for obvious reasons) + #[test] + fn test_invalid_path_root() { + let url = url::Url::parse("sled:///").expect("must parse"); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } + + /// This uses the correct scheme, and sets a tempdir as location. + #[test] + fn test_valid_scheme_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://").expect("must parse"); + url.set_path(tmpdir.path().to_str().unwrap()); + + assert!(SledDirectoryService::from_url(&url).is_ok()); + } + + /// This sets a host, rather than a path, which should fail. + #[test] + fn test_invalid_host() { + let url = url::Url::parse("sled://foo.example").expect("must parse"); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } + + /// This sets a host AND a valid path, which should fail + #[test] + fn test_invalid_host_and_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://foo.example").expect("must parse"); + url.set_path(tmpdir.path().to_str().unwrap()); + + assert!(SledDirectoryService::from_url(&url).is_err()); + } +} -- cgit 1.4.1