From 35bff2bda69d5189d9a439cd2032b86ebb4e6e41 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Mon, 12 Jun 2023 00:04:00 +0300 Subject: refactor(tvix/store/pathinfosvc): add from_addr Change-Id: I24e822351a837fce2aed568a647d009099ef32ec Reviewed-on: https://cl.tvl.fyi/c/depot/+/8747 Reviewed-by: tazjin Autosubmit: flokli Tested-by: BuildkiteCI --- tvix/store/src/bin/tvix-store.rs | 45 ++++-- tvix/store/src/pathinfoservice/from_addr.rs | 58 +++++++ tvix/store/src/pathinfoservice/grpc.rs | 232 +++++++++++++++++++++++++++- tvix/store/src/pathinfoservice/memory.rs | 84 ++++++++++ tvix/store/src/pathinfoservice/mod.rs | 17 ++ tvix/store/src/pathinfoservice/sled.rs | 134 ++++++++++++++++ 6 files changed, 552 insertions(+), 18 deletions(-) create mode 100644 tvix/store/src/pathinfoservice/from_addr.rs diff --git a/tvix/store/src/bin/tvix-store.rs b/tvix/store/src/bin/tvix-store.rs index c8d361212e5e..36969c1cd366 100644 --- a/tvix/store/src/bin/tvix-store.rs +++ b/tvix/store/src/bin/tvix-store.rs @@ -8,13 +8,10 @@ use std::sync::Arc; use tracing_subscriber::prelude::*; use tvix_store::blobservice; use tvix_store::directoryservice; -use tvix_store::pathinfoservice::GRPCPathInfoService; -use tvix_store::pathinfoservice::PathInfoService; -use tvix_store::pathinfoservice::SledPathInfoService; +use tvix_store::pathinfoservice; use tvix_store::proto::blob_service_server::BlobServiceServer; use tvix_store::proto::directory_service_server::DirectoryServiceServer; use tvix_store::proto::node::Node; -use tvix_store::proto::path_info_service_client::PathInfoServiceClient; use tvix_store::proto::path_info_service_server::PathInfoServiceServer; use tvix_store::proto::GRPCBlobServiceWrapper; use tvix_store::proto::GRPCDirectoryServiceWrapper; @@ -59,6 +56,9 @@ enum Commands { default_value = "sled:///var/lib/tvix-store/directories.sled" )] directory_service_addr: String, + + #[arg(long, env, default_value = "sled:///var/lib/tvix-store/pathinfo.sled")] + path_info_service_addr: String, }, /// Imports a list of paths into the store (not using the daemon) Import { @@ -70,6 +70,9 @@ enum Commands { #[arg(long, env, default_value = "grpc+http://[::1]:8000")] directory_service_addr: String, + + #[arg(long, env, default_value = "grpc+http://[::1]:8000")] + path_info_service_addr: String, }, /// Mounts a tvix-store at the given mountpoint #[cfg(feature = "fuse")] @@ -82,6 +85,9 @@ enum Commands { #[arg(long, env, default_value = "grpc+http://[::1]:8000")] directory_service_addr: String, + + #[arg(long, env, default_value = "grpc+http://[::1]:8000")] + path_info_service_addr: String, }, } @@ -119,15 +125,16 @@ async fn main() -> Result<(), Box> { listen_address, blob_service_addr, directory_service_addr, + path_info_service_addr, } => { // initialize stores let blob_service = blobservice::from_addr(&blob_service_addr).await?; let directory_service = directoryservice::from_addr(&directory_service_addr)?; - let path_info_service: Arc = Arc::new(SledPathInfoService::new( - "pathinfo.sled".into(), + let path_info_service = pathinfoservice::from_addr( + &path_info_service_addr, blob_service.clone(), directory_service.clone(), - )?); + )?; let listen_address = listen_address .unwrap_or_else(|| "[::]:8000".to_string()) @@ -164,18 +171,20 @@ async fn main() -> Result<(), Box> { paths, blob_service_addr, directory_service_addr, + path_info_service_addr, } => { let blob_service = blobservice::from_addr(&blob_service_addr).await?; let directory_service = directoryservice::from_addr(&directory_service_addr)?; - let path_info_service_client = - PathInfoServiceClient::connect("http://[::1]:8000").await?; - let path_info_service = - GRPCPathInfoService::from_client(path_info_service_client.clone()); + let path_info_service = pathinfoservice::from_addr( + &path_info_service_addr, + blob_service.clone(), + directory_service.clone(), + )?; let io = Arc::new(TvixStoreIO::new( blob_service, directory_service, - Arc::new(path_info_service), + path_info_service, )); let tasks = paths @@ -200,16 +209,18 @@ async fn main() -> Result<(), Box> { dest, blob_service_addr, directory_service_addr, + path_info_service_addr, } => { let blob_service = blobservice::from_addr(&blob_service_addr).await?; let directory_service = directoryservice::from_addr(&directory_service_addr)?; - let path_info_service_client = - PathInfoServiceClient::connect("http://[::1]:8000").await?; - let path_info_service = - GRPCPathInfoService::from_client(path_info_service_client.clone()); + let path_info_service = pathinfoservice::from_addr( + &path_info_service_addr, + blob_service.clone(), + directory_service.clone(), + )?; tokio::task::spawn_blocking(move || { - let f = FUSE::new(blob_service, directory_service, Arc::new(path_info_service)); + let f = FUSE::new(blob_service, directory_service, path_info_service); fuser::mount2(f, &dest, &[]) }) .await?? diff --git a/tvix/store/src/pathinfoservice/from_addr.rs b/tvix/store/src/pathinfoservice/from_addr.rs new file mode 100644 index 000000000000..2f712f451442 --- /dev/null +++ b/tvix/store/src/pathinfoservice/from_addr.rs @@ -0,0 +1,58 @@ +use std::sync::Arc; +use url::Url; + +use crate::{blobservice::BlobService, directoryservice::DirectoryService}; + +use super::{GRPCPathInfoService, MemoryPathInfoService, PathInfoService, SledPathInfoService}; + +/// Constructs a new instance of a [PathInfoService] from an URI. +/// +/// The following URIs are supported: +/// - `memory:` +/// Uses a in-memory implementation. +/// - `sled:` +/// Uses a in-memory sled implementation. +/// - `sled:///absolute/path/to/somewhere` +/// Uses sled, using a path on the disk for persistency. Can be only opened +/// from one process at the same time. +/// - `grpc+unix:///absolute/path/to/somewhere` +/// Connects to a local tvix-store gRPC service via Unix socket. +/// - `grpc+http://host:port`, `grpc+https://host:port` +/// Connects to a (remote) tvix-store gRPC service. +/// +/// As the [PathInfoService] needs to talk to [BlobService] and [DirectoryService], +/// these also need to be passed in. +pub fn from_addr( + uri: &str, + blob_service: Arc, + directory_service: Arc, +) -> Result, crate::Error> { + let url = Url::parse(uri).map_err(|e| { + crate::Error::StorageError(format!("unable to parse url: {}", e.to_string())) + })?; + + Ok(if url.scheme() == "memory" { + Arc::new(MemoryPathInfoService::from_url( + &url, + blob_service, + directory_service, + )?) + } else if url.scheme() == "sled" { + Arc::new(SledPathInfoService::from_url( + &url, + blob_service, + directory_service, + )?) + } else if url.scheme().starts_with("grpc+") { + Arc::new(GRPCPathInfoService::from_url( + &url, + blob_service, + directory_service, + )?) + } else { + Err(crate::Error::StorageError(format!( + "unknown scheme: {}", + url.scheme() + )))? + }) +} diff --git a/tvix/store/src/pathinfoservice/grpc.rs b/tvix/store/src/pathinfoservice/grpc.rs index 230d630cf476..a14f51c9f890 100644 --- a/tvix/store/src/pathinfoservice/grpc.rs +++ b/tvix/store/src/pathinfoservice/grpc.rs @@ -1,5 +1,7 @@ use super::PathInfoService; -use crate::proto; +use crate::{blobservice::BlobService, directoryservice::DirectoryService, proto}; +use std::sync::Arc; +use tokio::net::UnixStream; use tonic::{transport::Channel, Code, Status}; /// Connects to a (remote) tvix-store PathInfoService over gRPC. @@ -27,6 +29,65 @@ impl GRPCPathInfoService { } impl PathInfoService for GRPCPathInfoService { + /// Constructs a [GRPCPathInfoService] from the passed [url::Url]: + /// - scheme has to match `grpc+*://`. + /// That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts. + /// - In the case of unix sockets, there must be a path, but may not be a host. + /// - In the case of non-unix sockets, there must be a host, but no path. + /// The blob_service and directory_service arguments are ignored, because the gRPC service already provides answers to these questions. + fn from_url( + url: &url::Url, + _blob_service: Arc, + _directory_service: Arc, + ) -> Result { + // Start checking for the scheme to start with grpc+. + match url.scheme().strip_prefix("grpc+") { + None => Err(crate::Error::StorageError("invalid scheme".to_string())), + Some(rest) => { + if rest == "unix" { + if url.host_str().is_some() { + return Err(crate::Error::StorageError( + "host may not be set".to_string(), + )); + } + let path = url.path().to_string(); + let channel = tonic::transport::Endpoint::try_from("http://[::]:50051") // doesn't matter + .unwrap() + .connect_with_connector_lazy(tower::service_fn( + move |_: tonic::transport::Uri| UnixStream::connect(path.clone()), + )); + let grpc_client = + proto::path_info_service_client::PathInfoServiceClient::new(channel); + Ok(Self::from_client(grpc_client)) + } else { + // ensure path is empty, not supported with gRPC. + if !url.path().is_empty() { + return Err(crate::Error::StorageError( + "path may not be set".to_string(), + )); + } + + // clone the uri, and drop the grpc+ from the scheme. + // Recreate a new uri with the `grpc+` prefix dropped from the scheme. + // We can't use `url.set_scheme(rest)`, as it disallows + // setting something http(s) that previously wasn't. + let url = { + let url_str = url.to_string(); + let s_stripped = url_str.strip_prefix("grpc+").unwrap(); + url::Url::parse(s_stripped).unwrap() + }; + let channel = tonic::transport::Endpoint::try_from(url.to_string()) + .unwrap() + .connect_lazy(); + + let grpc_client = + proto::path_info_service_client::PathInfoServiceClient::new(channel); + Ok(Self::from_client(grpc_client)) + } + } + } + } + fn get(&self, digest: [u8; 20]) -> Result, crate::Error> { // Get a new handle to the gRPC client. let mut grpc_client = self.grpc_client.clone(); @@ -99,3 +160,172 @@ impl PathInfoService for GRPCPathInfoService { Ok((resp.nar_size, nar_sha256)) } } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + use std::thread; + + use tempfile::TempDir; + use tokio::net::UnixListener; + use tokio::task; + use tokio::time; + use tokio_stream::wrappers::UnixListenerStream; + + use crate::pathinfoservice::MemoryPathInfoService; + use crate::proto::GRPCPathInfoServiceWrapper; + use crate::tests::fixtures; + use crate::tests::utils::gen_blob_service; + use crate::tests::utils::gen_directory_service; + + use super::GRPCPathInfoService; + use super::PathInfoService; + + /// This uses the wrong scheme + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!( + GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This uses the correct scheme for a unix socket. + /// The fact that /path/to/somewhere doesn't exist yet is no problem, because we connect lazily. + #[tokio::test] + async fn test_valid_unix_path() { + let url = url::Url::parse("grpc+unix:///path/to/somewhere").expect("must parse"); + + assert!( + GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_ok() + ); + } + + /// This uses the correct scheme for a unix socket, + /// but sets a host, which is unsupported. + #[tokio::test] + async fn test_invalid_unix_path_with_domain() { + let url = + url::Url::parse("grpc+unix://host.example/path/to/somewhere").expect("must parse"); + + assert!( + GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This uses the correct scheme for a HTTP server. + /// The fact that nothing is listening there is no problem, because we connect lazily. + #[tokio::test] + async fn test_valid_http() { + let url = url::Url::parse("grpc+http://localhost").expect("must parse"); + + assert!( + GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_ok() + ); + } + + /// This uses the correct scheme for a HTTPS server. + /// The fact that nothing is listening there is no problem, because we connect lazily. + #[tokio::test] + async fn test_valid_https() { + let url = url::Url::parse("grpc+https://localhost").expect("must parse"); + + assert!( + GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_ok() + ); + } + + /// This uses the correct scheme, but also specifies + /// an additional path, which is not supported for gRPC. + /// The fact that nothing is listening there is no problem, because we connect lazily. + #[tokio::test] + async fn test_invalid_http_with_path() { + let url = url::Url::parse("grpc+https://localhost/some-path").expect("must parse"); + + assert!( + GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This uses the correct scheme for a unix socket, and provides a server on the other side. + #[tokio::test] + async fn test_valid_unix_path_ping_pong() { + let tmpdir = TempDir::new().unwrap(); + let path = tmpdir.path().join("daemon"); + + // let mut join_set = JoinSet::new(); + + // prepare a client + let client = { + let mut url = url::Url::parse("grpc+unix:///path/to/somewhere").expect("must parse"); + url.set_path(path.to_str().unwrap()); + GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .expect("must succeed") + }; + + let path_copy = path.clone(); + + // Spin up a server, in a thread far away, which spawns its own tokio runtime, + // and blocks on the task. + thread::spawn(move || { + // Create the runtime + let rt = tokio::runtime::Runtime::new().unwrap(); + // Get a handle from this runtime + let handle = rt.handle(); + + let task = handle.spawn(async { + let uds = UnixListener::bind(path_copy).unwrap(); + let uds_stream = UnixListenerStream::new(uds); + + // spin up a new server + let mut server = tonic::transport::Server::builder(); + let router = server.add_service( + crate::proto::path_info_service_server::PathInfoServiceServer::new( + GRPCPathInfoServiceWrapper::from(Arc::new(MemoryPathInfoService::new( + gen_blob_service(), + gen_directory_service(), + )) + as Arc), + ), + ); + router.serve_with_incoming(uds_stream).await + }); + + handle.block_on(task) + }); + + // wait for the socket to be created + { + let mut socket_created = false; + for _try in 1..20 { + if path.exists() { + socket_created = true; + break; + } + tokio::time::sleep(time::Duration::from_millis(20)).await; + } + + assert!( + socket_created, + "expected socket path to eventually get created, but never happened" + ); + } + + let pi = task::spawn_blocking(move || { + client + .get(fixtures::DUMMY_OUTPUT_HASH.to_vec().try_into().unwrap()) + .expect("must not be error") + }) + .await + .expect("must not be err"); + + assert!(pi.is_none()); + } +} diff --git a/tvix/store/src/pathinfoservice/memory.rs b/tvix/store/src/pathinfoservice/memory.rs index 35455313cb51..f7abb2180ef7 100644 --- a/tvix/store/src/pathinfoservice/memory.rs +++ b/tvix/store/src/pathinfoservice/memory.rs @@ -29,6 +29,26 @@ impl MemoryPathInfoService { } impl PathInfoService for MemoryPathInfoService { + /// Constructs a [MemoryPathInfoService] from the passed [url::Url]: + /// - scheme has to be `memory://` + /// - there may not be a host. + /// - there may not be a path. + fn from_url( + url: &url::Url, + blob_service: Arc, + directory_service: Arc, + ) -> Result { + if url.scheme() != "memory" { + return Err(crate::Error::StorageError("invalid scheme".to_string())); + } + + if url.has_host() || !url.path().is_empty() { + return Err(crate::Error::StorageError("invalid url".to_string())); + } + + Ok(Self::new(blob_service, directory_service)) + } + fn get(&self, digest: [u8; 20]) -> Result, Error> { let db = self.db.read().unwrap(); @@ -66,3 +86,67 @@ impl PathInfoService for MemoryPathInfoService { .map_err(|e| Error::StorageError(e.to_string())) } } + +#[cfg(test)] +mod tests { + use crate::tests::utils::gen_blob_service; + use crate::tests::utils::gen_directory_service; + + use super::MemoryPathInfoService; + use super::PathInfoService; + + /// This uses a wrong scheme. + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!( + MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This correctly sets the scheme, and doesn't set a path. + #[test] + fn test_valid_scheme() { + let url = url::Url::parse("memory://").expect("must parse"); + + assert!( + MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_ok() + ); + } + + /// This sets the host to `foo` + #[test] + fn test_invalid_host() { + let url = url::Url::parse("memory://foo").expect("must parse"); + + assert!( + MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This has the path "/", which is invalid. + #[test] + fn test_invalid_has_path() { + let url = url::Url::parse("memory:///").expect("must parse"); + + assert!( + MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This has the path "/foo", which is invalid. + #[test] + fn test_invalid_path2() { + let url = url::Url::parse("memory:///foo").expect("must parse"); + + assert!( + MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } +} diff --git a/tvix/store/src/pathinfoservice/mod.rs b/tvix/store/src/pathinfoservice/mod.rs index 937d8f2a1119..191e8dbd602e 100644 --- a/tvix/store/src/pathinfoservice/mod.rs +++ b/tvix/store/src/pathinfoservice/mod.rs @@ -1,9 +1,15 @@ +mod from_addr; mod grpc; mod memory; mod sled; +use std::sync::Arc; + +use crate::blobservice::BlobService; +use crate::directoryservice::DirectoryService; use crate::{proto, Error}; +pub use self::from_addr::from_addr; pub use self::grpc::GRPCPathInfoService; pub use self::memory::MemoryPathInfoService; pub use self::sled::SledPathInfoService; @@ -11,6 +17,17 @@ pub use self::sled::SledPathInfoService; /// The base trait all PathInfo services need to implement. /// This is a simple get and put of [proto::Directory], returning their digest. pub trait PathInfoService: Send + Sync { + /// Create a new instance by passing in a connection URL, as well + /// as instances of a [PathInfoService] and [DirectoryService] (as the + /// [PathInfoService] needs to talk to them). + fn from_url( + url: &url::Url, + blob_service: Arc, + directory_service: Arc, + ) -> Result + where + Self: Sized; + /// Retrieve a PathInfo message by the output digest. fn get(&self, digest: [u8; 20]) -> Result, Error>; diff --git a/tvix/store/src/pathinfoservice/sled.rs b/tvix/store/src/pathinfoservice/sled.rs index f06a905cef8d..48db6b8b5cd8 100644 --- a/tvix/store/src/pathinfoservice/sled.rs +++ b/tvix/store/src/pathinfoservice/sled.rs @@ -50,6 +50,42 @@ impl SledPathInfoService { } impl PathInfoService for SledPathInfoService { + /// Constructs a [SledBlobService] from the passed [url::Url]: + /// - scheme has to be `sled://` + /// - there may not be a host. + /// - a path to the sled needs to be provided (which may not be `/`). + fn from_url( + url: &url::Url, + blob_service: Arc, + directory_service: Arc, + ) -> Result { + if url.scheme() != "sled" { + return Err(crate::Error::StorageError("invalid scheme".to_string())); + } + + if url.has_host() { + return Err(crate::Error::StorageError(format!( + "invalid host: {}", + url.host().unwrap() + ))); + } + + // TODO: expose compression and other parameters as URL parameters, drop new and new_temporary? + if url.path().is_empty() { + Self::new_temporary(blob_service, directory_service) + .map_err(|e| Error::StorageError(e.to_string())) + } else { + if url.path() == "/" { + Err(crate::Error::StorageError( + "cowardly refusing to open / with sled".to_string(), + )) + } else { + Self::new(url.path().into(), blob_service, directory_service) + .map_err(|e| Error::StorageError(e.to_string())) + } + } + } + fn get(&self, digest: [u8; 20]) -> Result, Error> { match self.db.get(digest) { Ok(None) => Ok(None), @@ -103,3 +139,101 @@ impl PathInfoService for SledPathInfoService { .map_err(|e| Error::StorageError(e.to_string())) } } + +#[cfg(test)] +mod tests { + use tempfile::TempDir; + + use crate::tests::utils::gen_blob_service; + use crate::tests::utils::gen_directory_service; + + use super::PathInfoService; + use super::SledPathInfoService; + + /// This uses a wrong scheme. + #[test] + fn test_invalid_scheme() { + let url = url::Url::parse("http://foo.example/test").expect("must parse"); + + assert!( + SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This uses the correct scheme, and doesn't specify a path (temporary sled). + #[test] + fn test_valid_scheme_temporary() { + let url = url::Url::parse("sled://").expect("must parse"); + + assert!( + SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_ok() + ); + } + + /// This sets the path to a location that doesn't exist, which should fail (as sled doesn't mkdir -p) + #[test] + fn test_nonexistent_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://foo.example").expect("must parse"); + url.set_path(tmpdir.path().join("foo").join("bar").to_str().unwrap()); + + assert!( + SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This uses the correct scheme, and specifies / as path (which should fail + // for obvious reasons) + #[test] + fn test_invalid_path_root() { + let url = url::Url::parse("sled:///").expect("must parse"); + + assert!( + SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This uses the correct scheme, and sets a tempdir as location. + #[test] + fn test_valid_scheme_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://").expect("must parse"); + url.set_path(tmpdir.path().to_str().unwrap()); + + assert!( + SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_ok() + ); + } + + /// This sets a host, rather than a path, which should fail. + #[test] + fn test_invalid_host() { + let url = url::Url::parse("sled://foo.example").expect("must parse"); + + assert!( + SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } + + /// This sets a host AND a valid path, which should fail + #[test] + fn test_invalid_host_and_path() { + let tmpdir = TempDir::new().unwrap(); + + let mut url = url::Url::parse("sled://foo.example").expect("must parse"); + url.set_path(tmpdir.path().to_str().unwrap()); + + assert!( + SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service()) + .is_err() + ); + } +} -- cgit 1.4.1