about summary refs log tree commit diff
path: root/tvix/castore
diff options
context:
space:
mode:
authorYureka <tvl@yuka.dev>2024-07-18T17·09+0200
committeryuka <tvl@yuka.dev>2024-07-18T19·19+0000
commit168e4fda5909e535f33051051ef426e221ef20d4 (patch)
treee23b8ad4ced3f4232bdb0ad186f3b63f693c57e5 /tvix/castore
parent79317be214ce2f1e3347438319d3482bb773a649 (diff)
refactor(tvix): use composition & registry for from_addr r/8368
Change-Id: I3c94ecb5958294b5973c6fcdf5ee9c0d37fa54ad
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11976
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Autosubmit: yuka <yuka@yuka.dev>
Diffstat (limited to 'tvix/castore')
-rw-r--r--tvix/castore/src/blobservice/combinator.rs12
-rw-r--r--tvix/castore/src/blobservice/from_addr.rs62
-rw-r--r--tvix/castore/src/blobservice/grpc.rs13
-rw-r--r--tvix/castore/src/blobservice/memory.rs13
-rw-r--r--tvix/castore/src/blobservice/object_store.rs91
-rw-r--r--tvix/castore/src/composition.rs79
-rw-r--r--tvix/castore/src/directoryservice/bigtable.rs20
-rw-r--r--tvix/castore/src/directoryservice/combinators.rs10
-rw-r--r--tvix/castore/src/directoryservice/from_addr.rs113
-rw-r--r--tvix/castore/src/directoryservice/grpc.rs13
-rw-r--r--tvix/castore/src/directoryservice/memory.rs11
-rw-r--r--tvix/castore/src/directoryservice/object_store.rs26
-rw-r--r--tvix/castore/src/directoryservice/sled.rs25
-rw-r--r--tvix/castore/src/tests/import.rs9
14 files changed, 295 insertions, 202 deletions
diff --git a/tvix/castore/src/blobservice/combinator.rs b/tvix/castore/src/blobservice/combinator.rs
index 0bce657e9176..8ec5a859bcda 100644
--- a/tvix/castore/src/blobservice/combinator.rs
+++ b/tvix/castore/src/blobservice/combinator.rs
@@ -6,7 +6,7 @@ use tonic::async_trait;
 use tracing::{instrument, warn};
 
 use crate::composition::{CompositionContext, ServiceBuilder};
-use crate::B3Digest;
+use crate::{B3Digest, Error};
 
 use super::{naive_seeker::NaiveSeeker, BlobReader, BlobService, BlobWriter};
 
@@ -103,6 +103,16 @@ pub struct CombinedBlobServiceConfig {
     remote: String,
 }
 
+impl TryFrom<url::Url> for CombinedBlobServiceConfig {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(_url: url::Url) -> Result<Self, Self::Error> {
+        Err(Error::StorageError(
+            "Instantiating a CombinedBlobService from a url is not supported".into(),
+        )
+        .into())
+    }
+}
+
 #[async_trait]
 impl ServiceBuilder for CombinedBlobServiceConfig {
     type Output = dyn BlobService;
diff --git a/tvix/castore/src/blobservice/from_addr.rs b/tvix/castore/src/blobservice/from_addr.rs
index b7e266c4eaec..c5cabaa9d945 100644
--- a/tvix/castore/src/blobservice/from_addr.rs
+++ b/tvix/castore/src/blobservice/from_addr.rs
@@ -1,8 +1,12 @@
+use std::sync::Arc;
+
 use url::Url;
 
-use crate::{proto::blob_service_client::BlobServiceClient, Error};
+use crate::composition::{
+    with_registry, CompositionContext, DeserializeWithRegistry, ServiceBuilder, REG,
+};
 
-use super::{BlobService, GRPCBlobService, MemoryBlobService, ObjectStoreBlobService};
+use super::BlobService;
 
 /// Constructs a new instance of a [BlobService] from an URI.
 ///
@@ -12,53 +16,19 @@ use super::{BlobService, GRPCBlobService, MemoryBlobService, ObjectStoreBlobServ
 /// - `objectstore+*://` ([ObjectStoreBlobService])
 ///
 /// See their `from_url` methods for more details about their syntax.
-pub async fn from_addr(uri: &str) -> Result<Box<dyn BlobService>, crate::Error> {
+pub async fn from_addr(
+    uri: &str,
+) -> Result<Arc<dyn BlobService>, Box<dyn std::error::Error + Send + Sync>> {
     let url = Url::parse(uri)
         .map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?;
 
-    let blob_service: Box<dyn BlobService> = match url.scheme() {
-        "memory" => {
-            // memory doesn't support host or path in the URL.
-            if url.has_host() || !url.path().is_empty() {
-                return Err(Error::StorageError("invalid url".to_string()));
-            }
-            Box::<MemoryBlobService>::default()
-        }
-        scheme if scheme.starts_with("grpc+") => {
-            // schemes starting with grpc+ go to the GRPCPathInfoService.
-            //   That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
-            // - In the case of unix sockets, there must be a path, but may not be a host.
-            // - In the case of non-unix sockets, there must be a host, but no path.
-            // Constructing the channel is handled by tvix_castore::channel::from_url.
-            Box::new(GRPCBlobService::from_client(
-                BlobServiceClient::with_interceptor(
-                    crate::tonic::channel_from_url(&url).await?,
-                    tvix_tracing::propagate::tonic::send_trace,
-                ),
-            ))
-        }
-        scheme if scheme.starts_with("objectstore+") => {
-            // We need to convert the URL to string, strip the prefix there, and then
-            // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
-            let trimmed_url = {
-                let s = url.to_string();
-                let mut url = Url::parse(s.strip_prefix("objectstore+").unwrap()).unwrap();
-                // trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
-                url.set_query(None);
-                url
-            };
-            Box::new(
-                ObjectStoreBlobService::parse_url_opts(&trimmed_url, url.query_pairs())
-                    .map_err(|e| Error::StorageError(e.to_string()))?,
-            )
-        }
-        scheme => {
-            return Err(crate::Error::StorageError(format!(
-                "unknown scheme: {}",
-                scheme
-            )))
-        }
-    };
+    let blob_service_config = with_registry(&REG, || {
+        <DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = dyn BlobService>>>>::try_from(url)
+    })?
+    .0;
+    let blob_service = blob_service_config
+        .build("anonymous", &CompositionContext::blank())
+        .await?;
 
     Ok(blob_service)
 }
diff --git a/tvix/castore/src/blobservice/grpc.rs b/tvix/castore/src/blobservice/grpc.rs
index 56a2ebf00038..f5705adbf432 100644
--- a/tvix/castore/src/blobservice/grpc.rs
+++ b/tvix/castore/src/blobservice/grpc.rs
@@ -187,6 +187,19 @@ pub struct GRPCBlobServiceConfig {
     url: String,
 }
 
+impl TryFrom<url::Url> for GRPCBlobServiceConfig {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
+        //   normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
+        // - In the case of unix sockets, there must be a path, but may not be a host.
+        // - In the case of non-unix sockets, there must be a host, but no path.
+        // Constructing the channel is handled by tvix_castore::channel::from_url.
+        Ok(GRPCBlobServiceConfig {
+            url: url.to_string(),
+        })
+    }
+}
+
 #[async_trait]
 impl ServiceBuilder for GRPCBlobServiceConfig {
     type Output = dyn BlobService;
diff --git a/tvix/castore/src/blobservice/memory.rs b/tvix/castore/src/blobservice/memory.rs
index 0205dcf7bd70..83b37edb1c89 100644
--- a/tvix/castore/src/blobservice/memory.rs
+++ b/tvix/castore/src/blobservice/memory.rs
@@ -7,7 +7,7 @@ use tracing::instrument;
 
 use super::{BlobReader, BlobService, BlobWriter};
 use crate::composition::{CompositionContext, ServiceBuilder};
-use crate::B3Digest;
+use crate::{B3Digest, Error};
 
 #[derive(Clone, Default)]
 pub struct MemoryBlobService {
@@ -42,6 +42,17 @@ impl BlobService for MemoryBlobService {
 #[serde(deny_unknown_fields)]
 pub struct MemoryBlobServiceConfig {}
 
+impl TryFrom<url::Url> for MemoryBlobServiceConfig {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
+        // memory doesn't support host or path in the URL.
+        if url.has_host() || !url.path().is_empty() {
+            return Err(Error::StorageError("invalid url".to_string()).into());
+        }
+        Ok(MemoryBlobServiceConfig {})
+    }
+}
+
 #[async_trait]
 impl ServiceBuilder for MemoryBlobServiceConfig {
     type Output = dyn BlobService;
diff --git a/tvix/castore/src/blobservice/object_store.rs b/tvix/castore/src/blobservice/object_store.rs
index bd8c2bd747fb..224ebae9e2c6 100644
--- a/tvix/castore/src/blobservice/object_store.rs
+++ b/tvix/castore/src/blobservice/object_store.rs
@@ -21,21 +21,11 @@ use url::Url;
 use crate::{
     composition::{CompositionContext, ServiceBuilder},
     proto::{stat_blob_response::ChunkMeta, StatBlobResponse},
-    B3Digest, B3HashingReader,
+    B3Digest, B3HashingReader, Error,
 };
 
 use super::{BlobReader, BlobService, BlobWriter, ChunkedReader};
 
-#[derive(Clone)]
-pub struct ObjectStoreBlobService {
-    object_store: Arc<dyn ObjectStore>,
-    base_path: Path,
-
-    /// Average chunk size for FastCDC, in bytes.
-    /// min value is half, max value double of that number.
-    avg_chunk_size: u32,
-}
-
 /// Uses any object storage supported by the [object_store] crate to provide a
 /// tvix-castore [BlobService].
 ///
@@ -72,31 +62,14 @@ pub struct ObjectStoreBlobService {
 /// It also allows signalling any compression of chunks in the content-type.
 /// Migration *should* be possible by simply adding the right content-types to
 /// all keys stored so far, but no promises ;-)
-impl ObjectStoreBlobService {
-    /// Constructs a new [ObjectStoreBlobService] from a [Url] supported by
-    /// [object_store].
-    /// Any path suffix becomes the base path of the object store.
-    /// additional options, the same as in [object_store::parse_url_opts] can
-    /// be passed.
-    pub fn parse_url_opts<I, K, V>(url: &Url, options: I) -> Result<Self, object_store::Error>
-    where
-        I: IntoIterator<Item = (K, V)>,
-        K: AsRef<str>,
-        V: Into<String>,
-    {
-        let (object_store, path) = object_store::parse_url_opts(url, options)?;
-
-        Ok(Self {
-            object_store: Arc::new(object_store),
-            base_path: path,
-            avg_chunk_size: 256 * 1024,
-        })
-    }
+#[derive(Clone)]
+pub struct ObjectStoreBlobService {
+    object_store: Arc<dyn ObjectStore>,
+    base_path: Path,
 
-    /// Like [Self::parse_url_opts], except without the options.
-    pub fn parse_url(url: &Url) -> Result<Self, object_store::Error> {
-        Self::parse_url_opts(url, Vec::<(String, String)>::new())
-    }
+    /// Average chunk size for FastCDC, in bytes.
+    /// min value is half, max value double of that number.
+    avg_chunk_size: u32,
 }
 
 #[instrument(level=Level::TRACE, skip_all,fields(base_path=%base_path,blob.digest=%digest),ret(Display))]
@@ -281,10 +254,41 @@ pub struct ObjectStoreBlobServiceConfig {
     object_store_url: String,
     #[serde(default = "default_avg_chunk_size")]
     avg_chunk_size: u32,
-    #[serde(default)]
     object_store_options: HashMap<String, String>,
 }
 
+impl TryFrom<url::Url> for ObjectStoreBlobServiceConfig {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    /// Constructs a new [ObjectStoreBlobService] from a [Url] supported by
+    /// [object_store].
+    /// Any path suffix becomes the base path of the object store.
+    /// additional options, the same as in [object_store::parse_url_opts] can
+    /// be passed.
+    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
+        // We need to convert the URL to string, strip the prefix there, and then
+        // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
+        let trimmed_url = {
+            let s = url.to_string();
+            let mut url = Url::parse(
+                s.strip_prefix("objectstore+")
+                    .ok_or(Error::StorageError("Missing objectstore uri".into()))?,
+            )?;
+            // trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
+            url.set_query(None);
+            url
+        };
+        Ok(ObjectStoreBlobServiceConfig {
+            object_store_url: trimmed_url.into(),
+            object_store_options: url
+                .query_pairs()
+                .into_iter()
+                .map(|(k, v)| (k.to_string(), v.to_string()))
+                .collect(),
+            avg_chunk_size: 256 * 1024,
+        })
+    }
+}
+
 #[async_trait]
 impl ServiceBuilder for ObjectStoreBlobServiceConfig {
     type Output = dyn BlobService;
@@ -548,7 +552,7 @@ where
 
 #[cfg(test)]
 mod test {
-    use super::chunk_and_upload;
+    use super::{chunk_and_upload, default_avg_chunk_size};
     use crate::{
         blobservice::{BlobService, ObjectStoreBlobService},
         fixtures::{BLOB_A, BLOB_A_DIGEST},
@@ -559,13 +563,18 @@ mod test {
     /// Tests chunk_and_upload directly, bypassing the BlobWriter at open_write().
     #[tokio::test]
     async fn test_chunk_and_upload() {
-        let blobsvc = Arc::new(
-            ObjectStoreBlobService::parse_url(&Url::parse("memory:///").unwrap()).unwrap(),
-        );
+        let (object_store, base_path) =
+            object_store::parse_url(&Url::parse("memory:///").unwrap()).unwrap();
+        let object_store: Arc<dyn object_store::ObjectStore> = Arc::from(object_store);
+        let blobsvc = Arc::new(ObjectStoreBlobService {
+            object_store: object_store.clone(),
+            avg_chunk_size: default_avg_chunk_size(),
+            base_path,
+        });
 
         let blob_digest = chunk_and_upload(
             &mut Cursor::new(BLOB_A.to_vec()),
-            blobsvc.object_store.clone(),
+            object_store,
             object_store::path::Path::from("/"),
             1024 / 2,
             1024,
diff --git a/tvix/castore/src/composition.rs b/tvix/castore/src/composition.rs
index cd4064af9a3e..9e7b3712fb7a 100644
--- a/tvix/castore/src/composition.rs
+++ b/tvix/castore/src/composition.rs
@@ -31,6 +31,13 @@
 //!     }
 //! }
 //!
+//! impl TryFrom<url::Url> for MyBlobServiceConfig {
+//!     type Error = Box<dyn std::error::Error + Send + Sync>;
+//!     fn try_from(url: url::Url) -> Result<Self, Self::Error> {
+//!         todo!()
+//!     }
+//! }
+//!
 //! pub fn add_my_service(reg: &mut Registry) {
 //!     reg.register::<Box<dyn ServiceBuilder<Output = dyn BlobService>>, MyBlobServiceConfig>("myblobservicetype");
 //! }
@@ -100,7 +107,7 @@ use tonic::async_trait;
 // This is really ugly. Really we would want to store this as a generic static field:
 //
 // ```
-// struct Registry<T>(BTreeMap<(&'static str), BoxSeedFn<T>);
+// struct Registry<T>(BTreeMap<(&'static str), RegistryEntry<T>);
 // static REG<T>: Registry<T>;
 // ```
 //
@@ -116,6 +123,12 @@ use tonic::async_trait;
 // I said it was ugly...
 #[derive(Default)]
 pub struct Registry(BTreeMap<(TypeId, &'static str), Box<dyn Any + Sync>>);
+pub type FromUrlSeed<T> =
+    Box<dyn Fn(url::Url) -> Result<T, Box<dyn std::error::Error + Send + Sync>> + Sync>;
+pub struct RegistryEntry<T> {
+    serde_deserialize_seed: BoxFnSeed<DeserializeWithRegistry<T>>,
+    from_url_seed: FromUrlSeed<DeserializeWithRegistry<T>>,
+}
 
 struct RegistryWithFakeType<'r, T>(&'r Registry, PhantomData<T>);
 
@@ -137,7 +150,9 @@ impl<'r, 'de: 'r, T: 'static> SeedFactory<'de, TagString<'de>> for RegistryWithF
             .ok_or_else(|| serde::de::Error::custom("Unknown tag"))?
             .1;
 
-        Ok(<dyn Any>::downcast_ref(&**seed).unwrap())
+        let entry: &RegistryEntry<T> = <dyn Any>::downcast_ref(&**seed).unwrap();
+
+        Ok(&entry.serde_deserialize_seed)
     }
 }
 
@@ -146,7 +161,7 @@ impl<'r, 'de: 'r, T: 'static> SeedFactory<'de, TagString<'de>> for RegistryWithF
 /// Wrap your type in this in order to deserialize it using a registry, e.g.
 /// `RegistryWithFakeType<Box<dyn MyTrait>>`, then the types registered for `Box<dyn MyTrait>`
 /// will be used.
-pub struct DeserializeWithRegistry<T>(T);
+pub struct DeserializeWithRegistry<T>(pub T);
 
 impl Registry {
     /// Registers a mapping from type tag to a concrete type into the registry.
@@ -156,14 +171,30 @@ impl Registry {
     /// deserializes into an input with the type tag "myblobservicetype" into a
     /// `Box<dyn FooTrait>`, it will first call the Deserialize imple of `FooStruct` and
     /// then convert it into a `Box<dyn FooTrait>` using From::from.
-    pub fn register<T: 'static, C: DeserializeOwned + Into<T>>(&mut self, type_name: &'static str) {
-        let seed = BoxFnSeed::new(|x| {
-            deserialize::<C>(x)
-                .map(Into::into)
-                .map(DeserializeWithRegistry)
-        });
-        self.0
-            .insert((TypeId::of::<T>(), type_name), Box::new(seed));
+    pub fn register<
+        T: 'static,
+        C: DeserializeOwned
+            + TryFrom<url::Url, Error = Box<dyn std::error::Error + Send + Sync>>
+            + Into<T>,
+    >(
+        &mut self,
+        type_name: &'static str,
+    ) {
+        self.0.insert(
+            (TypeId::of::<T>(), type_name),
+            Box::new(RegistryEntry {
+                serde_deserialize_seed: BoxFnSeed::new(|x| {
+                    deserialize::<C>(x)
+                        .map(Into::into)
+                        .map(DeserializeWithRegistry)
+                }),
+                from_url_seed: Box::new(|url| {
+                    C::try_from(url)
+                        .map(Into::into)
+                        .map(DeserializeWithRegistry)
+                }),
+            }),
+        );
     }
 }
 
@@ -180,6 +211,30 @@ impl<'de, T: 'static> serde::Deserialize<'de> for DeserializeWithRegistry<T> {
     }
 }
 
+#[derive(Debug, thiserror::Error)]
+enum TryFromUrlError {
+    #[error("Unknown tag: {0}")]
+    UnknownTag(String),
+}
+
+impl<T: 'static> TryFrom<url::Url> for DeserializeWithRegistry<T> {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
+        let tag = url.scheme().split('+').next().unwrap();
+        // same as in the SeedFactory impl: using find() and not get() because of https://github.com/rust-lang/rust/issues/80389
+        let seed = ACTIVE_REG
+            .get()
+            .unwrap()
+            .0
+            .iter()
+            .find(|(k, _)| *k == &(TypeId::of::<T>(), tag))
+            .ok_or_else(|| Box::new(TryFromUrlError::UnknownTag(tag.into())))?
+            .1;
+        let entry: &RegistryEntry<T> = <dyn Any>::downcast_ref(&**seed).unwrap();
+        (entry.from_url_seed)(url)
+    }
+}
+
 thread_local! {
     /// The active Registry is global state, because there is no convenient and universal way to pass state
     /// into the functions usually used for deserialization, e.g. `serde_json::from_str`, `toml::from_str`,
@@ -200,7 +255,7 @@ pub fn with_registry<R>(reg: &'static Registry, f: impl FnOnce() -> R) -> R {
 lazy_static! {
     /// The provided registry of tvix_castore, with all builtin BlobStore/DirectoryStore implementations
     pub static ref REG: Registry = {
-        let mut reg = Registry(Default::default());
+        let mut reg = Default::default();
         add_default_services(&mut reg);
         reg
     };
diff --git a/tvix/castore/src/directoryservice/bigtable.rs b/tvix/castore/src/directoryservice/bigtable.rs
index 69edc05d787d..596094930614 100644
--- a/tvix/castore/src/directoryservice/bigtable.rs
+++ b/tvix/castore/src/directoryservice/bigtable.rs
@@ -361,6 +361,26 @@ impl ServiceBuilder for BigtableParameters {
     }
 }
 
+impl TryFrom<url::Url> for BigtableParameters {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(mut url: url::Url) -> Result<Self, Self::Error> {
+        // parse the instance name from the hostname.
+        let instance_name = url
+            .host_str()
+            .ok_or_else(|| Error::StorageError("instance name missing".into()))?
+            .to_string();
+
+        // … but add it to the query string now, so we just need to parse that.
+        url.query_pairs_mut()
+            .append_pair("instance_name", &instance_name);
+
+        let params: BigtableParameters = serde_qs::from_str(url.query().unwrap_or_default())
+            .map_err(|e| Error::InvalidRequest(format!("failed to parse parameters: {}", e)))?;
+
+        Ok(params)
+    }
+}
+
 fn default_app_profile_id() -> String {
     "default".to_owned()
 }
diff --git a/tvix/castore/src/directoryservice/combinators.rs b/tvix/castore/src/directoryservice/combinators.rs
index 2364a313d5f4..74d02f1ad2b9 100644
--- a/tvix/castore/src/directoryservice/combinators.rs
+++ b/tvix/castore/src/directoryservice/combinators.rs
@@ -151,6 +151,16 @@ pub struct CacheConfig {
     far: String,
 }
 
+impl TryFrom<url::Url> for CacheConfig {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(_url: url::Url) -> Result<Self, Self::Error> {
+        Err(Error::StorageError(
+            "Instantiating a CombinedDirectoryService from a url is not supported".into(),
+        )
+        .into())
+    }
+}
+
 #[async_trait]
 impl ServiceBuilder for CacheConfig {
     type Output = dyn DirectoryService;
diff --git a/tvix/castore/src/directoryservice/from_addr.rs b/tvix/castore/src/directoryservice/from_addr.rs
index 999170dcd13f..bc63f129fe9e 100644
--- a/tvix/castore/src/directoryservice/from_addr.rs
+++ b/tvix/castore/src/directoryservice/from_addr.rs
@@ -1,12 +1,13 @@
-use url::Url;
+use std::sync::Arc;
 
-use crate::{proto::directory_service_client::DirectoryServiceClient, Error};
+use url::Url;
 
-use super::{
-    DirectoryService, GRPCDirectoryService, MemoryDirectoryService, ObjectStoreDirectoryService,
-    SledDirectoryService,
+use crate::composition::{
+    with_registry, CompositionContext, DeserializeWithRegistry, ServiceBuilder, REG,
 };
 
+use super::DirectoryService;
+
 /// Constructs a new instance of a [DirectoryService] from an URI.
 ///
 /// The following URIs are supported:
@@ -21,101 +22,23 @@ use super::{
 ///   Connects to a local tvix-store gRPC service via Unix socket.
 /// - `grpc+http://host:port`, `grpc+https://host:port`
 ///    Connects to a (remote) tvix-store gRPC service.
-pub async fn from_addr(uri: &str) -> Result<Box<dyn DirectoryService>, crate::Error> {
+pub async fn from_addr(
+    uri: &str,
+) -> Result<Arc<dyn DirectoryService>, Box<dyn std::error::Error + Send + Sync>> {
     #[allow(unused_mut)]
     let mut url = Url::parse(uri)
         .map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?;
 
-    let directory_service: Box<dyn DirectoryService> = match url.scheme() {
-        "memory" => {
-            // memory doesn't support host or path in the URL.
-            if url.has_host() || !url.path().is_empty() {
-                return Err(Error::StorageError("invalid url".to_string()));
-            }
-            Box::<MemoryDirectoryService>::default()
-        }
-        "sled" => {
-            // sled doesn't support host, and a path can be provided (otherwise
-            // it'll live in memory only).
-            if url.has_host() {
-                return Err(Error::StorageError("no host allowed".to_string()));
-            }
-
-            if url.path() == "/" {
-                return Err(Error::StorageError(
-                    "cowardly refusing to open / with sled".to_string(),
-                ));
-            }
-
-            // TODO: expose compression and other parameters as URL parameters?
-
-            Box::new(if url.path().is_empty() {
-                SledDirectoryService::new_temporary()
-                    .map_err(|e| Error::StorageError(e.to_string()))?
-            } else {
-                SledDirectoryService::new(url.path())
-                    .map_err(|e| Error::StorageError(e.to_string()))?
-            })
-        }
-        scheme if scheme.starts_with("grpc+") => {
-            // schemes starting with grpc+ go to the GRPCPathInfoService.
-            //   That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
-            // - In the case of unix sockets, there must be a path, but may not be a host.
-            // - In the case of non-unix sockets, there must be a host, but no path.
-            // Constructing the channel is handled by tvix_castore::channel::from_url.
-            Box::new(GRPCDirectoryService::from_client(
-                DirectoryServiceClient::with_interceptor(
-                    crate::tonic::channel_from_url(&url).await?,
-                    tvix_tracing::propagate::tonic::send_trace,
-                ),
-            ))
-        }
-        scheme if scheme.starts_with("objectstore+") => {
-            // We need to convert the URL to string, strip the prefix there, and then
-            // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
-            let trimmed_url = {
-                let s = url.to_string();
-                let mut url = Url::parse(s.strip_prefix("objectstore+").unwrap()).unwrap();
-                // trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
-                url.set_query(None);
-                url
-            };
-            Box::new(
-                ObjectStoreDirectoryService::parse_url_opts(&trimmed_url, url.query_pairs())
-                    .map_err(|e| Error::StorageError(e.to_string()))?,
-            )
-        }
-        #[cfg(feature = "cloud")]
-        "bigtable" => {
-            use super::bigtable::BigtableParameters;
-            use super::BigtableDirectoryService;
-
-            // parse the instance name from the hostname.
-            let instance_name = url
-                .host_str()
-                .ok_or_else(|| Error::StorageError("instance name missing".into()))?
-                .to_string();
-
-            // … but add it to the query string now, so we just need to parse that.
-            url.query_pairs_mut()
-                .append_pair("instance_name", &instance_name);
-
-            let params: BigtableParameters = serde_qs::from_str(url.query().unwrap_or_default())
-                .map_err(|e| Error::InvalidRequest(format!("failed to parse parameters: {}", e)))?;
+    let directory_service_config = with_registry(&REG, || {
+        <DeserializeWithRegistry<Box<dyn ServiceBuilder<Output = dyn DirectoryService>>>>::try_from(
+            url,
+        )
+    })?
+    .0;
+    let directory_service = directory_service_config
+        .build("anonymous", &CompositionContext::blank())
+        .await?;
 
-            Box::new(
-                BigtableDirectoryService::connect(params)
-                    .await
-                    .map_err(|e| Error::StorageError(e.to_string()))?,
-            )
-        }
-        _ => {
-            return Err(crate::Error::StorageError(format!(
-                "unknown scheme: {}",
-                url.scheme()
-            )))
-        }
-    };
     Ok(directory_service)
 }
 
diff --git a/tvix/castore/src/directoryservice/grpc.rs b/tvix/castore/src/directoryservice/grpc.rs
index e2ca3954c112..415796fa52cc 100644
--- a/tvix/castore/src/directoryservice/grpc.rs
+++ b/tvix/castore/src/directoryservice/grpc.rs
@@ -224,6 +224,19 @@ pub struct GRPCDirectoryServiceConfig {
     url: String,
 }
 
+impl TryFrom<url::Url> for GRPCDirectoryServiceConfig {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
+        //   This is normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
+        // - In the case of unix sockets, there must be a path, but may not be a host.
+        // - In the case of non-unix sockets, there must be a host, but no path.
+        // Constructing the channel is handled by tvix_castore::channel::from_url.
+        Ok(GRPCDirectoryServiceConfig {
+            url: url.to_string(),
+        })
+    }
+}
+
 #[async_trait]
 impl ServiceBuilder for GRPCDirectoryServiceConfig {
     type Output = dyn DirectoryService;
diff --git a/tvix/castore/src/directoryservice/memory.rs b/tvix/castore/src/directoryservice/memory.rs
index f12ef6e977d0..c1fc361f0d59 100644
--- a/tvix/castore/src/directoryservice/memory.rs
+++ b/tvix/castore/src/directoryservice/memory.rs
@@ -91,6 +91,17 @@ impl DirectoryService for MemoryDirectoryService {
 #[serde(deny_unknown_fields)]
 pub struct MemoryDirectoryServiceConfig {}
 
+impl TryFrom<url::Url> for MemoryDirectoryServiceConfig {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
+        // memory doesn't support host or path in the URL.
+        if url.has_host() || !url.path().is_empty() {
+            return Err(Error::StorageError("invalid url".to_string()).into());
+        }
+        Ok(MemoryDirectoryServiceConfig {})
+    }
+}
+
 #[async_trait]
 impl ServiceBuilder for MemoryDirectoryServiceConfig {
     type Output = dyn DirectoryService;
diff --git a/tvix/castore/src/directoryservice/object_store.rs b/tvix/castore/src/directoryservice/object_store.rs
index 1977de18fbec..0f0423a49e5b 100644
--- a/tvix/castore/src/directoryservice/object_store.rs
+++ b/tvix/castore/src/directoryservice/object_store.rs
@@ -179,6 +179,32 @@ pub struct ObjectStoreDirectoryServiceConfig {
     object_store_options: HashMap<String, String>,
 }
 
+impl TryFrom<url::Url> for ObjectStoreDirectoryServiceConfig {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
+        // We need to convert the URL to string, strip the prefix there, and then
+        // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
+        let trimmed_url = {
+            let s = url.to_string();
+            let mut url = Url::parse(
+                s.strip_prefix("objectstore+")
+                    .ok_or(Error::StorageError("Missing objectstore uri".into()))?,
+            )?;
+            // trim the query pairs, they might contain credentials or local settings we don't want to send as-is.
+            url.set_query(None);
+            url
+        };
+        Ok(ObjectStoreDirectoryServiceConfig {
+            object_store_url: trimmed_url.into(),
+            object_store_options: url
+                .query_pairs()
+                .into_iter()
+                .map(|(k, v)| (k.to_string(), v.to_string()))
+                .collect(),
+        })
+    }
+}
+
 #[async_trait]
 impl ServiceBuilder for ObjectStoreDirectoryServiceConfig {
     type Output = dyn DirectoryService;
diff --git a/tvix/castore/src/directoryservice/sled.rs b/tvix/castore/src/directoryservice/sled.rs
index 8e74227b3e62..61058b392bb3 100644
--- a/tvix/castore/src/directoryservice/sled.rs
+++ b/tvix/castore/src/directoryservice/sled.rs
@@ -145,6 +145,31 @@ pub struct SledDirectoryServiceConfig {
     path: Option<String>,
 }
 
+impl TryFrom<url::Url> for SledDirectoryServiceConfig {
+    type Error = Box<dyn std::error::Error + Send + Sync>;
+    fn try_from(url: url::Url) -> Result<Self, Self::Error> {
+        // sled doesn't support host, and a path can be provided (otherwise
+        // it'll live in memory only).
+        if url.has_host() {
+            return Err(Error::StorageError("no host allowed".to_string()).into());
+        }
+
+        // TODO: expose compression and other parameters as URL parameters?
+
+        Ok(if url.path().is_empty() {
+            SledDirectoryServiceConfig {
+                is_temporary: true,
+                path: None,
+            }
+        } else {
+            SledDirectoryServiceConfig {
+                is_temporary: false,
+                path: Some(url.path().to_string()),
+            }
+        })
+    }
+}
+
 #[async_trait]
 impl ServiceBuilder for SledDirectoryServiceConfig {
     type Output = dyn DirectoryService;
diff --git a/tvix/castore/src/tests/import.rs b/tvix/castore/src/tests/import.rs
index 8b3bd5ce0ffc..72fb06aea877 100644
--- a/tvix/castore/src/tests/import.rs
+++ b/tvix/castore/src/tests/import.rs
@@ -4,7 +4,6 @@ use crate::fixtures::*;
 use crate::import::fs::ingest_path;
 use crate::proto;
 
-use std::sync::Arc;
 use tempfile::TempDir;
 
 #[cfg(target_family = "unix")]
@@ -26,7 +25,7 @@ async fn symlink() {
     .unwrap();
 
     let root_node = ingest_path(
-        Arc::from(blob_service),
+        blob_service,
         directory_service,
         tmpdir.path().join("doesntmatter"),
     )
@@ -44,8 +43,7 @@ async fn symlink() {
 
 #[tokio::test]
 async fn single_file() {
-    let blob_service =
-        Arc::from(blobservice::from_addr("memory://").await.unwrap()) as Arc<dyn BlobService>;
+    let blob_service = blobservice::from_addr("memory://").await.unwrap();
     let directory_service = directoryservice::from_addr("memory://").await.unwrap();
 
     let tmpdir = TempDir::new().unwrap();
@@ -77,8 +75,7 @@ async fn single_file() {
 #[cfg(target_family = "unix")]
 #[tokio::test]
 async fn complicated() {
-    let blob_service =
-        Arc::from(blobservice::from_addr("memory://").await.unwrap()) as Arc<dyn BlobService>;
+    let blob_service = blobservice::from_addr("memory://").await.unwrap();
     let directory_service = directoryservice::from_addr("memory://").await.unwrap();
 
     let tmpdir = TempDir::new().unwrap();