diff options
author | Yureka <tvl@yuka.dev> | 2024-07-18T17·09+0200 |
---|---|---|
committer | yuka <tvl@yuka.dev> | 2024-07-18T19·19+0000 |
commit | 168e4fda5909e535f33051051ef426e221ef20d4 (patch) | |
tree | e23b8ad4ced3f4232bdb0ad186f3b63f693c57e5 /tvix/castore | |
parent | 79317be214ce2f1e3347438319d3482bb773a649 (diff) |
refactor(tvix): use composition & registry for from_addr r/8368
Change-Id: I3c94ecb5958294b5973c6fcdf5ee9c0d37fa54ad Reviewed-on: https://cl.tvl.fyi/c/depot/+/11976 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI Autosubmit: yuka <yuka@yuka.dev>
Diffstat (limited to 'tvix/castore')
-rw-r--r-- | tvix/castore/src/blobservice/combinator.rs | 12 | ||||
-rw-r--r-- | tvix/castore/src/blobservice/from_addr.rs | 62 | ||||
-rw-r--r-- | tvix/castore/src/blobservice/grpc.rs | 13 | ||||
-rw-r--r-- | tvix/castore/src/blobservice/memory.rs | 13 | ||||
-rw-r--r-- | tvix/castore/src/blobservice/object_store.rs | 91 | ||||
-rw-r--r-- | tvix/castore/src/composition.rs | 79 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/bigtable.rs | 20 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/combinators.rs | 10 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/from_addr.rs | 113 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/grpc.rs | 13 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/memory.rs | 11 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/object_store.rs | 26 | ||||
-rw-r--r-- | tvix/castore/src/directoryservice/sled.rs | 25 | ||||
-rw-r--r-- | tvix/castore/src/tests/import.rs | 9 |
14 files changed, 295 insertions, 202 deletions
diff --git a/tvix/castore/src/blobservice/combinator.rs b/tvix/castore/src/blobservice/combinator.rs index 0bce657e9176..8ec5a859bcda 100644 --- a/tvix/castore/src/blobservice/combinator.rs +++ b/tvix/castore/src/blobservice/combinator.rs @@ -6,7 +6,7 @@ use tonic::async_trait; use tracing::{instrument, warn}; use crate::composition::{CompositionContext, ServiceBuilder}; -use crate::B3Digest; +use crate::{B3Digest, Error}; use super::{naive_seeker::NaiveSeeker, BlobReader, BlobService, BlobWriter}; @@ -103,6 +103,16 @@ pub struct CombinedBlobServiceConfig { remote: String, } +impl TryFrom<url::Url> for CombinedBlobServiceConfig { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(_url: url::Url) -> Result<Self, Self::Error> { + Err(Error::StorageError( + "Instantiating a CombinedBlobService from a url is not supported".into(), + ) + .into()) + } +} + #[async_trait] impl ServiceBuilder for CombinedBlobServiceConfig { type Output = dyn BlobService; diff --git a/tvix/castore/src/blobservice/from_addr.rs b/tvix/castore/src/blobservice/from_addr.rs index b7e266c4eaec..c5cabaa9d945 100644 --- a/tvix/castore/src/blobservice/from_addr.rs +++ b/tvix/castore/src/blobservice/from_addr.rs @@ -1,8 +1,12 @@ +use std::sync::Arc; + use url::Url; -use crate::{proto::blob_service_client::BlobServiceClient, Error}; +use crate::composition::{ + with_registry, CompositionContext, DeserializeWithRegistry, ServiceBuilder, REG, +}; -use super::{BlobService, GRPCBlobService, MemoryBlobService, ObjectStoreBlobService}; +use super::BlobService; /// Constructs a new instance of a [BlobService] from an URI. /// @@ -12,53 +16,19 @@ use super::{BlobService, GRPCBlobService, MemoryBlobService, ObjectStoreBlobServ /// - `objectstore+*://` ([ObjectStoreBlobService]) /// /// See their `from_url` methods for more details about their syntax. -pub async fn from_addr(uri: &str) -> Result<Box<dyn BlobService>, crate::Error> { +pub async fn from_addr( + uri: &str, +) -> Result<Arc<dyn BlobService>, Box<dyn std::error::Error + Send + Sync>> { let url = Url::parse(uri) .map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?; - let blob_service: Box<dyn BlobService> = match url.scheme() { - "memory" => { - // memory doesn't support host or path in the URL. - if url.has_host() || !url.path().is_empty() { - return Err(Error::StorageError("invalid url".to_string())); - } - Box::<MemoryBlobService>::default() - } - scheme if scheme.starts_with("grpc+") => { - // schemes starting with grpc+ go to the GRPCPathInfoService. - // That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts. - // - In the case of unix sockets, there must be a path, but may not be a host. - // - In the case of non-unix sockets, there must be a host, but no path. - // Constructing the channel is handled by tvix_castore::channel::from_url. - Box::new(GRPCBlobService::from_client( - BlobServiceClient::with_interceptor( - crate::tonic::channel_from_url(&url).await?, - tvix_tracing::propagate::tonic::send_trace, - ), - )) - } - scheme if scheme.starts_with("objectstore+") => { - // We need to convert the URL to string, strip the prefix there, and then - // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do. - let trimmed_url = { - let s = url.to_string(); - let mut url = Url::parse(s.strip_prefix("objectstore+").unwrap()).unwrap(); - // trim the query pairs, they might contain credentials or local settings we don't want to send as-is. - url.set_query(None); - url - }; - Box::new( - ObjectStoreBlobService::parse_url_opts(&trimmed_url, url.query_pairs()) - .map_err(|e| Error::StorageError(e.to_string()))?, - ) - } - scheme => { - return Err(crate::Error::StorageError(format!( - "unknown scheme: {}", - scheme - ))) - } - }; + let blob_service_config = with_registry(®, || { + <DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = dyn BlobService>>>>::try_from(url) + })? + .0; + let blob_service = blob_service_config + .build("anonymous", &CompositionContext::blank()) + .await?; Ok(blob_service) } diff --git a/tvix/castore/src/blobservice/grpc.rs b/tvix/castore/src/blobservice/grpc.rs index 56a2ebf00038..f5705adbf432 100644 --- a/tvix/castore/src/blobservice/grpc.rs +++ b/tvix/castore/src/blobservice/grpc.rs @@ -187,6 +187,19 @@ pub struct GRPCBlobServiceConfig { url: String, } +impl TryFrom<url::Url> for GRPCBlobServiceConfig { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(url: url::Url) -> Result<Self, Self::Error> { + // normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts. + // - In the case of unix sockets, there must be a path, but may not be a host. + // - In the case of non-unix sockets, there must be a host, but no path. + // Constructing the channel is handled by tvix_castore::channel::from_url. + Ok(GRPCBlobServiceConfig { + url: url.to_string(), + }) + } +} + #[async_trait] impl ServiceBuilder for GRPCBlobServiceConfig { type Output = dyn BlobService; diff --git a/tvix/castore/src/blobservice/memory.rs b/tvix/castore/src/blobservice/memory.rs index 0205dcf7bd70..83b37edb1c89 100644 --- a/tvix/castore/src/blobservice/memory.rs +++ b/tvix/castore/src/blobservice/memory.rs @@ -7,7 +7,7 @@ use tracing::instrument; use super::{BlobReader, BlobService, BlobWriter}; use crate::composition::{CompositionContext, ServiceBuilder}; -use crate::B3Digest; +use crate::{B3Digest, Error}; #[derive(Clone, Default)] pub struct MemoryBlobService { @@ -42,6 +42,17 @@ impl BlobService for MemoryBlobService { #[serde(deny_unknown_fields)] pub struct MemoryBlobServiceConfig {} +impl TryFrom<url::Url> for MemoryBlobServiceConfig { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(url: url::Url) -> Result<Self, Self::Error> { + // memory doesn't support host or path in the URL. + if url.has_host() || !url.path().is_empty() { + return Err(Error::StorageError("invalid url".to_string()).into()); + } + Ok(MemoryBlobServiceConfig {}) + } +} + #[async_trait] impl ServiceBuilder for MemoryBlobServiceConfig { type Output = dyn BlobService; diff --git a/tvix/castore/src/blobservice/object_store.rs b/tvix/castore/src/blobservice/object_store.rs index bd8c2bd747fb..224ebae9e2c6 100644 --- a/tvix/castore/src/blobservice/object_store.rs +++ b/tvix/castore/src/blobservice/object_store.rs @@ -21,21 +21,11 @@ use url::Url; use crate::{ composition::{CompositionContext, ServiceBuilder}, proto::{stat_blob_response::ChunkMeta, StatBlobResponse}, - B3Digest, B3HashingReader, + B3Digest, B3HashingReader, Error, }; use super::{BlobReader, BlobService, BlobWriter, ChunkedReader}; -#[derive(Clone)] -pub struct ObjectStoreBlobService { - object_store: Arc<dyn ObjectStore>, - base_path: Path, - - /// Average chunk size for FastCDC, in bytes. - /// min value is half, max value double of that number. - avg_chunk_size: u32, -} - /// Uses any object storage supported by the [object_store] crate to provide a /// tvix-castore [BlobService]. /// @@ -72,31 +62,14 @@ pub struct ObjectStoreBlobService { /// It also allows signalling any compression of chunks in the content-type. /// Migration *should* be possible by simply adding the right content-types to /// all keys stored so far, but no promises ;-) -impl ObjectStoreBlobService { - /// Constructs a new [ObjectStoreBlobService] from a [Url] supported by - /// [object_store]. - /// Any path suffix becomes the base path of the object store. - /// additional options, the same as in [object_store::parse_url_opts] can - /// be passed. - pub fn parse_url_opts<I, K, V>(url: &Url, options: I) -> Result<Self, object_store::Error> - where - I: IntoIterator<Item = (K, V)>, - K: AsRef<str>, - V: Into<String>, - { - let (object_store, path) = object_store::parse_url_opts(url, options)?; - - Ok(Self { - object_store: Arc::new(object_store), - base_path: path, - avg_chunk_size: 256 * 1024, - }) - } +#[derive(Clone)] +pub struct ObjectStoreBlobService { + object_store: Arc<dyn ObjectStore>, + base_path: Path, - /// Like [Self::parse_url_opts], except without the options. - pub fn parse_url(url: &Url) -> Result<Self, object_store::Error> { - Self::parse_url_opts(url, Vec::<(String, String)>::new()) - } + /// Average chunk size for FastCDC, in bytes. + /// min value is half, max value double of that number. + avg_chunk_size: u32, } #[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))] @@ -281,10 +254,41 @@ pub struct ObjectStoreBlobServiceConfig { object_store_url: String, #[serde(default = "default_avg_chunk_size")] avg_chunk_size: u32, - #[serde(default)] object_store_options: HashMap<String, String>, } +impl TryFrom<url::Url> for ObjectStoreBlobServiceConfig { + type Error = Box<dyn std::error::Error + Send + Sync>; + /// Constructs a new [ObjectStoreBlobService] from a [Url] supported by + /// [object_store]. + /// Any path suffix becomes the base path of the object store. + /// additional options, the same as in [object_store::parse_url_opts] can + /// be passed. + fn try_from(url: url::Url) -> Result<Self, Self::Error> { + // We need to convert the URL to string, strip the prefix there, and then + // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do. + let trimmed_url = { + let s = url.to_string(); + let mut url = Url::parse( + s.strip_prefix("objectstore+") + .ok_or(Error::StorageError("Missing objectstore uri".into()))?, + )?; + // trim the query pairs, they might contain credentials or local settings we don't want to send as-is. + url.set_query(None); + url + }; + Ok(ObjectStoreBlobServiceConfig { + object_store_url: trimmed_url.into(), + object_store_options: url + .query_pairs() + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(), + avg_chunk_size: 256 * 1024, + }) + } +} + #[async_trait] impl ServiceBuilder for ObjectStoreBlobServiceConfig { type Output = dyn BlobService; @@ -548,7 +552,7 @@ where #[cfg(test)] mod test { - use super::chunk_and_upload; + use super::{chunk_and_upload, default_avg_chunk_size}; use crate::{ blobservice::{BlobService, ObjectStoreBlobService}, fixtures::{BLOB_A, BLOB_A_DIGEST}, @@ -559,13 +563,18 @@ mod test { /// Tests chunk_and_upload directly, bypassing the BlobWriter at open_write(). #[tokio::test] async fn test_chunk_and_upload() { - let blobsvc = Arc::new( - ObjectStoreBlobService::parse_url(&Url::parse("memory:///").unwrap()).unwrap(), - ); + let (object_store, base_path) = + object_store::parse_url(&Url::parse("memory:///").unwrap()).unwrap(); + let object_store: Arc<dyn object_store::ObjectStore> = Arc::from(object_store); + let blobsvc = Arc::new(ObjectStoreBlobService { + object_store: object_store.clone(), + avg_chunk_size: default_avg_chunk_size(), + base_path, + }); let blob_digest = chunk_and_upload( &mut Cursor::new(BLOB_A.to_vec()), - blobsvc.object_store.clone(), + object_store, object_store::path::Path::from("/"), 1024 / 2, 1024, diff --git a/tvix/castore/src/composition.rs b/tvix/castore/src/composition.rs index cd4064af9a3e..9e7b3712fb7a 100644 --- a/tvix/castore/src/composition.rs +++ b/tvix/castore/src/composition.rs @@ -31,6 +31,13 @@ //! } //! } //! +//! impl TryFrom<url::Url> for MyBlobServiceConfig { +//! type Error = Box<dyn std::error::Error + Send + Sync>; +//! fn try_from(url: url::Url) -> Result<Self, Self::Error> { +//! todo!() +//! } +//! } +//! //! pub fn add_my_service(reg: &mut Registry) { //! reg.register::<Box<dyn ServiceBuilder<Output = dyn BlobService>>, MyBlobServiceConfig>("myblobservicetype"); //! } @@ -100,7 +107,7 @@ use tonic::async_trait; // This is really ugly. Really we would want to store this as a generic static field: // // ``` -// struct Registry<T>(BTreeMap<(&'static str), BoxSeedFn<T>); +// struct Registry<T>(BTreeMap<(&'static str), RegistryEntry<T>); // static REG<T>: Registry<T>; // ``` // @@ -116,6 +123,12 @@ use tonic::async_trait; // I said it was ugly... #[derive(Default)] pub struct Registry(BTreeMap<(TypeId, &'static str), Box<dyn Any + Sync>>); +pub type FromUrlSeed<T> = + Box<dyn Fn(url::Url) -> Result<T, Box<dyn std::error::Error + Send + Sync>> + Sync>; +pub struct RegistryEntry<T> { + serde_deserialize_seed: BoxFnSeed<DeserializeWithRegistry<T>>, + from_url_seed: FromUrlSeed<DeserializeWithRegistry<T>>, +} struct RegistryWithFakeType<'r, T>(&'r Registry, PhantomData<T>); @@ -137,7 +150,9 @@ impl<'r, 'de: 'r, T: 'static> SeedFactory<'de, TagString<'de>> for RegistryWithF .ok_or_else(|| serde::de::Error::custom("Unknown tag"))? .1; - Ok(<dyn Any>::downcast_ref(&**seed).unwrap()) + let entry: &RegistryEntry<T> = <dyn Any>::downcast_ref(&**seed).unwrap(); + + Ok(&entry.serde_deserialize_seed) } } @@ -146,7 +161,7 @@ impl<'r, 'de: 'r, T: 'static> SeedFactory<'de, TagString<'de>> for RegistryWithF /// Wrap your type in this in order to deserialize it using a registry, e.g. /// `RegistryWithFakeType<Box<dyn MyTrait>>`, then the types registered for `Box<dyn MyTrait>` /// will be used. -pub struct DeserializeWithRegistry<T>(T); +pub struct DeserializeWithRegistry<T>(pub T); impl Registry { /// Registers a mapping from type tag to a concrete type into the registry. @@ -156,14 +171,30 @@ impl Registry { /// deserializes into an input with the type tag "myblobservicetype" into a /// `Box<dyn FooTrait>`, it will first call the Deserialize imple of `FooStruct` and /// then convert it into a `Box<dyn FooTrait>` using From::from. - pub fn register<T: 'static, C: DeserializeOwned + Into<T>>(&mut self, type_name: &'static str) { - let seed = BoxFnSeed::new(|x| { - deserialize::<C>(x) - .map(Into::into) - .map(DeserializeWithRegistry) - }); - self.0 - .insert((TypeId::of::<T>(), type_name), Box::new(seed)); + pub fn register< + T: 'static, + C: DeserializeOwned + + TryFrom<url::Url, Error = Box<dyn std::error::Error + Send + Sync>> + + Into<T>, + >( + &mut self, + type_name: &'static str, + ) { + self.0.insert( + (TypeId::of::<T>(), type_name), + Box::new(RegistryEntry { + serde_deserialize_seed: BoxFnSeed::new(|x| { + deserialize::<C>(x) + .map(Into::into) + .map(DeserializeWithRegistry) + }), + from_url_seed: Box::new(|url| { + C::try_from(url) + .map(Into::into) + .map(DeserializeWithRegistry) + }), + }), + ); } } @@ -180,6 +211,30 @@ impl<'de, T: 'static> serde::Deserialize<'de> for DeserializeWithRegistry<T> { } } +#[derive(Debug, thiserror::Error)] +enum TryFromUrlError { + #[error("Unknown tag: {0}")] + UnknownTag(String), +} + +impl<T: 'static> TryFrom<url::Url> for DeserializeWithRegistry<T> { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(url: url::Url) -> Result<Self, Self::Error> { + let tag = url.scheme().split('+').next().unwrap(); + // same as in the SeedFactory impl: using find() and not get() because of https://github.com/rust-lang/rust/issues/80389 + let seed = ACTIVE_REG + .get() + .unwrap() + .0 + .iter() + .find(|(k, _)| *k == &(TypeId::of::<T>(), tag)) + .ok_or_else(|| Box::new(TryFromUrlError::UnknownTag(tag.into())))? + .1; + let entry: &RegistryEntry<T> = <dyn Any>::downcast_ref(&**seed).unwrap(); + (entry.from_url_seed)(url) + } +} + thread_local! { /// The active Registry is global state, because there is no convenient and universal way to pass state /// into the functions usually used for deserialization, e.g. `serde_json::from_str`, `toml::from_str`, @@ -200,7 +255,7 @@ pub fn with_registry<R>(reg: &'static Registry, f: impl FnOnce() -> R) -> R { lazy_static! { /// The provided registry of tvix_castore, with all builtin BlobStore/DirectoryStore implementations pub static ref REG: Registry = { - let mut reg = Registry(Default::default()); + let mut reg = Default::default(); add_default_services(&mut reg); reg }; diff --git a/tvix/castore/src/directoryservice/bigtable.rs b/tvix/castore/src/directoryservice/bigtable.rs index 69edc05d787d..596094930614 100644 --- a/tvix/castore/src/directoryservice/bigtable.rs +++ b/tvix/castore/src/directoryservice/bigtable.rs @@ -361,6 +361,26 @@ impl ServiceBuilder for BigtableParameters { } } +impl TryFrom<url::Url> for BigtableParameters { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(mut url: url::Url) -> Result<Self, Self::Error> { + // parse the instance name from the hostname. + let instance_name = url + .host_str() + .ok_or_else(|| Error::StorageError("instance name missing".into()))? + .to_string(); + + // … but add it to the query string now, so we just need to parse that. + url.query_pairs_mut() + .append_pair("instance_name", &instance_name); + + let params: BigtableParameters = serde_qs::from_str(url.query().unwrap_or_default()) + .map_err(|e| Error::InvalidRequest(format!("failed to parse parameters: {}", e)))?; + + Ok(params) + } +} + fn default_app_profile_id() -> String { "default".to_owned() } diff --git a/tvix/castore/src/directoryservice/combinators.rs b/tvix/castore/src/directoryservice/combinators.rs index 2364a313d5f4..74d02f1ad2b9 100644 --- a/tvix/castore/src/directoryservice/combinators.rs +++ b/tvix/castore/src/directoryservice/combinators.rs @@ -151,6 +151,16 @@ pub struct CacheConfig { far: String, } +impl TryFrom<url::Url> for CacheConfig { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(_url: url::Url) -> Result<Self, Self::Error> { + Err(Error::StorageError( + "Instantiating a CombinedDirectoryService from a url is not supported".into(), + ) + .into()) + } +} + #[async_trait] impl ServiceBuilder for CacheConfig { type Output = dyn DirectoryService; diff --git a/tvix/castore/src/directoryservice/from_addr.rs b/tvix/castore/src/directoryservice/from_addr.rs index 999170dcd13f..bc63f129fe9e 100644 --- a/tvix/castore/src/directoryservice/from_addr.rs +++ b/tvix/castore/src/directoryservice/from_addr.rs @@ -1,12 +1,13 @@ -use url::Url; +use std::sync::Arc; -use crate::{proto::directory_service_client::DirectoryServiceClient, Error}; +use url::Url; -use super::{ - DirectoryService, GRPCDirectoryService, MemoryDirectoryService, ObjectStoreDirectoryService, - SledDirectoryService, +use crate::composition::{ + with_registry, CompositionContext, DeserializeWithRegistry, ServiceBuilder, REG, }; +use super::DirectoryService; + /// Constructs a new instance of a [DirectoryService] from an URI. /// /// The following URIs are supported: @@ -21,101 +22,23 @@ use super::{ /// Connects to a local tvix-store gRPC service via Unix socket. /// - `grpc+http://host:port`, `grpc+https://host:port` /// Connects to a (remote) tvix-store gRPC service. -pub async fn from_addr(uri: &str) -> Result<Box<dyn DirectoryService>, crate::Error> { +pub async fn from_addr( + uri: &str, +) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync>> { #[allow(unused_mut)] let mut url = Url::parse(uri) .map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?; - let directory_service: Box<dyn DirectoryService> = match url.scheme() { - "memory" => { - // memory doesn't support host or path in the URL. - if url.has_host() || !url.path().is_empty() { - return Err(Error::StorageError("invalid url".to_string())); - } - Box::<MemoryDirectoryService>::default() - } - "sled" => { - // sled doesn't support host, and a path can be provided (otherwise - // it'll live in memory only). - if url.has_host() { - return Err(Error::StorageError("no host allowed".to_string())); - } - - if url.path() == "/" { - return Err(Error::StorageError( - "cowardly refusing to open / with sled".to_string(), - )); - } - - // TODO: expose compression and other parameters as URL parameters? - - Box::new(if url.path().is_empty() { - SledDirectoryService::new_temporary() - .map_err(|e| Error::StorageError(e.to_string()))? - } else { - SledDirectoryService::new(url.path()) - .map_err(|e| Error::StorageError(e.to_string()))? - }) - } - scheme if scheme.starts_with("grpc+") => { - // schemes starting with grpc+ go to the GRPCPathInfoService. - // That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts. - // - In the case of unix sockets, there must be a path, but may not be a host. - // - In the case of non-unix sockets, there must be a host, but no path. - // Constructing the channel is handled by tvix_castore::channel::from_url. - Box::new(GRPCDirectoryService::from_client( - DirectoryServiceClient::with_interceptor( - crate::tonic::channel_from_url(&url).await?, - tvix_tracing::propagate::tonic::send_trace, - ), - )) - } - scheme if scheme.starts_with("objectstore+") => { - // We need to convert the URL to string, strip the prefix there, and then - // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do. - let trimmed_url = { - let s = url.to_string(); - let mut url = Url::parse(s.strip_prefix("objectstore+").unwrap()).unwrap(); - // trim the query pairs, they might contain credentials or local settings we don't want to send as-is. - url.set_query(None); - url - }; - Box::new( - ObjectStoreDirectoryService::parse_url_opts(&trimmed_url, url.query_pairs()) - .map_err(|e| Error::StorageError(e.to_string()))?, - ) - } - #[cfg(feature = "cloud")] - "bigtable" => { - use super::bigtable::BigtableParameters; - use super::BigtableDirectoryService; - - // parse the instance name from the hostname. - let instance_name = url - .host_str() - .ok_or_else(|| Error::StorageError("instance name missing".into()))? - .to_string(); - - // … but add it to the query string now, so we just need to parse that. - url.query_pairs_mut() - .append_pair("instance_name", &instance_name); - - let params: BigtableParameters = serde_qs::from_str(url.query().unwrap_or_default()) - .map_err(|e| Error::InvalidRequest(format!("failed to parse parameters: {}", e)))?; + let directory_service_config = with_registry(®, || { + <DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>>>::try_from( + url, + ) + })? + .0; + let directory_service = directory_service_config + .build("anonymous", &CompositionContext::blank()) + .await?; - Box::new( - BigtableDirectoryService::connect(params) - .await - .map_err(|e| Error::StorageError(e.to_string()))?, - ) - } - _ => { - return Err(crate::Error::StorageError(format!( - "unknown scheme: {}", - url.scheme() - ))) - } - }; Ok(directory_service) } diff --git a/tvix/castore/src/directoryservice/grpc.rs b/tvix/castore/src/directoryservice/grpc.rs index e2ca3954c112..415796fa52cc 100644 --- a/tvix/castore/src/directoryservice/grpc.rs +++ b/tvix/castore/src/directoryservice/grpc.rs @@ -224,6 +224,19 @@ pub struct GRPCDirectoryServiceConfig { url: String, } +impl TryFrom<url::Url> for GRPCDirectoryServiceConfig { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(url: url::Url) -> Result<Self, Self::Error> { + // This is normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts. + // - In the case of unix sockets, there must be a path, but may not be a host. + // - In the case of non-unix sockets, there must be a host, but no path. + // Constructing the channel is handled by tvix_castore::channel::from_url. + Ok(GRPCDirectoryServiceConfig { + url: url.to_string(), + }) + } +} + #[async_trait] impl ServiceBuilder for GRPCDirectoryServiceConfig { type Output = dyn DirectoryService; diff --git a/tvix/castore/src/directoryservice/memory.rs b/tvix/castore/src/directoryservice/memory.rs index f12ef6e977d0..c1fc361f0d59 100644 --- a/tvix/castore/src/directoryservice/memory.rs +++ b/tvix/castore/src/directoryservice/memory.rs @@ -91,6 +91,17 @@ impl DirectoryService for MemoryDirectoryService { #[serde(deny_unknown_fields)] pub struct MemoryDirectoryServiceConfig {} +impl TryFrom<url::Url> for MemoryDirectoryServiceConfig { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(url: url::Url) -> Result<Self, Self::Error> { + // memory doesn't support host or path in the URL. + if url.has_host() || !url.path().is_empty() { + return Err(Error::StorageError("invalid url".to_string()).into()); + } + Ok(MemoryDirectoryServiceConfig {}) + } +} + #[async_trait] impl ServiceBuilder for MemoryDirectoryServiceConfig { type Output = dyn DirectoryService; diff --git a/tvix/castore/src/directoryservice/object_store.rs b/tvix/castore/src/directoryservice/object_store.rs index 1977de18fbec..0f0423a49e5b 100644 --- a/tvix/castore/src/directoryservice/object_store.rs +++ b/tvix/castore/src/directoryservice/object_store.rs @@ -179,6 +179,32 @@ pub struct ObjectStoreDirectoryServiceConfig { object_store_options: HashMap<String, String>, } +impl TryFrom<url::Url> for ObjectStoreDirectoryServiceConfig { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(url: url::Url) -> Result<Self, Self::Error> { + // We need to convert the URL to string, strip the prefix there, and then + // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do. + let trimmed_url = { + let s = url.to_string(); + let mut url = Url::parse( + s.strip_prefix("objectstore+") + .ok_or(Error::StorageError("Missing objectstore uri".into()))?, + )?; + // trim the query pairs, they might contain credentials or local settings we don't want to send as-is. + url.set_query(None); + url + }; + Ok(ObjectStoreDirectoryServiceConfig { + object_store_url: trimmed_url.into(), + object_store_options: url + .query_pairs() + .into_iter() + .map(|(k, v)| (k.to_string(), v.to_string())) + .collect(), + }) + } +} + #[async_trait] impl ServiceBuilder for ObjectStoreDirectoryServiceConfig { type Output = dyn DirectoryService; diff --git a/tvix/castore/src/directoryservice/sled.rs b/tvix/castore/src/directoryservice/sled.rs index 8e74227b3e62..61058b392bb3 100644 --- a/tvix/castore/src/directoryservice/sled.rs +++ b/tvix/castore/src/directoryservice/sled.rs @@ -145,6 +145,31 @@ pub struct SledDirectoryServiceConfig { path: Option<String>, } +impl TryFrom<url::Url> for SledDirectoryServiceConfig { + type Error = Box<dyn std::error::Error + Send + Sync>; + fn try_from(url: url::Url) -> Result<Self, Self::Error> { + // sled doesn't support host, and a path can be provided (otherwise + // it'll live in memory only). + if url.has_host() { + return Err(Error::StorageError("no host allowed".to_string()).into()); + } + + // TODO: expose compression and other parameters as URL parameters? + + Ok(if url.path().is_empty() { + SledDirectoryServiceConfig { + is_temporary: true, + path: None, + } + } else { + SledDirectoryServiceConfig { + is_temporary: false, + path: Some(url.path().to_string()), + } + }) + } +} + #[async_trait] impl ServiceBuilder for SledDirectoryServiceConfig { type Output = dyn DirectoryService; diff --git a/tvix/castore/src/tests/import.rs b/tvix/castore/src/tests/import.rs index 8b3bd5ce0ffc..72fb06aea877 100644 --- a/tvix/castore/src/tests/import.rs +++ b/tvix/castore/src/tests/import.rs @@ -4,7 +4,6 @@ use crate::fixtures::*; use crate::import::fs::ingest_path; use crate::proto; -use std::sync::Arc; use tempfile::TempDir; #[cfg(target_family = "unix")] @@ -26,7 +25,7 @@ async fn symlink() { .unwrap(); let root_node = ingest_path( - Arc::from(blob_service), + blob_service, directory_service, tmpdir.path().join("doesntmatter"), ) @@ -44,8 +43,7 @@ async fn symlink() { #[tokio::test] async fn single_file() { - let blob_service = - Arc::from(blobservice::from_addr("memory://").await.unwrap()) as Arc<dyn BlobService>; + let blob_service = blobservice::from_addr("memory://").await.unwrap(); let directory_service = directoryservice::from_addr("memory://").await.unwrap(); let tmpdir = TempDir::new().unwrap(); @@ -77,8 +75,7 @@ async fn single_file() { #[cfg(target_family = "unix")] #[tokio::test] async fn complicated() { - let blob_service = - Arc::from(blobservice::from_addr("memory://").await.unwrap()) as Arc<dyn BlobService>; + let blob_service = blobservice::from_addr("memory://").await.unwrap(); let directory_service = directoryservice::from_addr("memory://").await.unwrap(); let tmpdir = TempDir::new().unwrap(); |