about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-06-09T20·35+0300
committerclbot <clbot@tvl.fyi>2023-06-14T23·15+0000
commitbb7c76739a30d2f312693799d8237eb0eb2da28d (patch)
tree99a849fe4c8d2fee7e8ba49504c192607ad1128f
parenta1acb5bcb301bd599d97909abebcda6235421dc4 (diff)
feat(tvix/store/directorysvc): add from_addr r/6303
Add --directory-service-addr arg to tvix-store CLI.

Change-Id: Iea1e6f08f27f7157b21ccf397297c68358bd78a0
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8743
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: tazjin <tazjin@tvl.su>
-rw-r--r--tvix/store/src/bin/tvix-store.rs42
-rw-r--r--tvix/store/src/directoryservice/from_addr.rs37
-rw-r--r--tvix/store/src/directoryservice/grpc.rs56
-rw-r--r--tvix/store/src/directoryservice/memory.rs61
-rw-r--r--tvix/store/src/directoryservice/mod.rs8
-rw-r--r--tvix/store/src/directoryservice/sled.rs104
6 files changed, 287 insertions, 21 deletions
diff --git a/tvix/store/src/bin/tvix-store.rs b/tvix/store/src/bin/tvix-store.rs
index 6e2424a7e5..c8d361212e 100644
--- a/tvix/store/src/bin/tvix-store.rs
+++ b/tvix/store/src/bin/tvix-store.rs
@@ -7,14 +7,11 @@ use std::path::PathBuf;
 use std::sync::Arc;
 use tracing_subscriber::prelude::*;
 use tvix_store::blobservice;
-use tvix_store::directoryservice::DirectoryService;
-use tvix_store::directoryservice::GRPCDirectoryService;
-use tvix_store::directoryservice::SledDirectoryService;
+use tvix_store::directoryservice;
 use tvix_store::pathinfoservice::GRPCPathInfoService;
 use tvix_store::pathinfoservice::PathInfoService;
 use tvix_store::pathinfoservice::SledPathInfoService;
 use tvix_store::proto::blob_service_server::BlobServiceServer;
-use tvix_store::proto::directory_service_client::DirectoryServiceClient;
 use tvix_store::proto::directory_service_server::DirectoryServiceServer;
 use tvix_store::proto::node::Node;
 use tvix_store::proto::path_info_service_client::PathInfoServiceClient;
@@ -55,6 +52,13 @@ enum Commands {
 
         #[arg(long, env, default_value = "sled:///var/lib/tvix-store/blobs.sled")]
         blob_service_addr: String,
+
+        #[arg(
+            long,
+            env,
+            default_value = "sled:///var/lib/tvix-store/directories.sled"
+        )]
+        directory_service_addr: String,
     },
     /// Imports a list of paths into the store (not using the daemon)
     Import {
@@ -63,6 +67,9 @@ enum Commands {
 
         #[arg(long, env, default_value = "grpc+http://[::1]:8000")]
         blob_service_addr: String,
+
+        #[arg(long, env, default_value = "grpc+http://[::1]:8000")]
+        directory_service_addr: String,
     },
     /// Mounts a tvix-store at the given mountpoint
     #[cfg(feature = "fuse")]
@@ -72,6 +79,9 @@ enum Commands {
 
         #[arg(long, env, default_value = "grpc+http://[::1]:8000")]
         blob_service_addr: String,
+
+        #[arg(long, env, default_value = "grpc+http://[::1]:8000")]
+        directory_service_addr: String,
     },
 }
 
@@ -108,11 +118,11 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         Commands::Daemon {
             listen_address,
             blob_service_addr,
+            directory_service_addr,
         } => {
             // initialize stores
             let blob_service = blobservice::from_addr(&blob_service_addr).await?;
-            let directory_service: Arc<dyn DirectoryService> =
-                Arc::new(SledDirectoryService::new("directories.sled".into())?);
+            let directory_service = directoryservice::from_addr(&directory_service_addr)?;
             let path_info_service: Arc<dyn PathInfoService> = Arc::new(SledPathInfoService::new(
                 "pathinfo.sled".into(),
                 blob_service.clone(),
@@ -153,12 +163,10 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         Commands::Import {
             paths,
             blob_service_addr,
+            directory_service_addr,
         } => {
             let blob_service = blobservice::from_addr(&blob_service_addr).await?;
-
-            let directory_service = GRPCDirectoryService::from_client(
-                DirectoryServiceClient::connect("http://[::1]:8000").await?,
-            );
+            let directory_service = directoryservice::from_addr(&directory_service_addr)?;
             let path_info_service_client =
                 PathInfoServiceClient::connect("http://[::1]:8000").await?;
             let path_info_service =
@@ -166,7 +174,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
 
             let io = Arc::new(TvixStoreIO::new(
                 blob_service,
-                Arc::new(directory_service),
+                directory_service,
                 Arc::new(path_info_service),
             ));
 
@@ -191,23 +199,17 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
         Commands::Mount {
             dest,
             blob_service_addr,
+            directory_service_addr,
         } => {
             let blob_service = blobservice::from_addr(&blob_service_addr).await?;
-
-            let directory_service = GRPCDirectoryService::from_client(
-                DirectoryServiceClient::connect("http://[::1]:8000").await?,
-            );
+            let directory_service = directoryservice::from_addr(&directory_service_addr)?;
             let path_info_service_client =
                 PathInfoServiceClient::connect("http://[::1]:8000").await?;
             let path_info_service =
                 GRPCPathInfoService::from_client(path_info_service_client.clone());
 
             tokio::task::spawn_blocking(move || {
-                let f = FUSE::new(
-                    blob_service,
-                    Arc::new(directory_service),
-                    Arc::new(path_info_service),
-                );
+                let f = FUSE::new(blob_service, directory_service, Arc::new(path_info_service));
                 fuser::mount2(f, &dest, &[])
             })
             .await??
diff --git a/tvix/store/src/directoryservice/from_addr.rs b/tvix/store/src/directoryservice/from_addr.rs
new file mode 100644
index 0000000000..a3e2521e98
--- /dev/null
+++ b/tvix/store/src/directoryservice/from_addr.rs
@@ -0,0 +1,37 @@
+use std::sync::Arc;
+use url::Url;
+
+use super::{DirectoryService, GRPCDirectoryService, MemoryDirectoryService, SledDirectoryService};
+
+/// Constructs a new instance of a [DirectoryService] from an URI.
+///
+/// The following URIs are supported:
+/// - `memory:`
+///   Uses a in-memory implementation.
+/// - `sled:`
+///   Uses a in-memory sled implementation.
+/// - `sled:///absolute/path/to/somewhere`
+///   Uses sled, using a path on the disk for persistency. Can be only opened
+///   from one process at the same time.
+/// - `grpc+unix:///absolute/path/to/somewhere`
+///   Connects to a local tvix-store gRPC service via Unix socket.
+/// - `grpc+http://host:port`, `grpc+https://host:port`
+///    Connects to a (remote) tvix-store gRPC service.
+pub fn from_addr(uri: &str) -> Result<Arc<dyn DirectoryService>, crate::Error> {
+    let url = Url::parse(uri).map_err(|e| {
+        crate::Error::StorageError(format!("unable to parse url: {}", e.to_string()))
+    })?;
+
+    Ok(if url.scheme() == "memory" {
+        Arc::new(MemoryDirectoryService::from_url(&url)?)
+    } else if url.scheme() == "sled" {
+        Arc::new(SledDirectoryService::from_url(&url)?)
+    } else if url.scheme().starts_with("grpc+") {
+        Arc::new(GRPCDirectoryService::from_url(&url)?)
+    } else {
+        Err(crate::Error::StorageError(format!(
+            "unknown scheme: {}",
+            url.scheme()
+        )))?
+    })
+}
diff --git a/tvix/store/src/directoryservice/grpc.rs b/tvix/store/src/directoryservice/grpc.rs
index 913dd5bad9..cff3077c7e 100644
--- a/tvix/store/src/directoryservice/grpc.rs
+++ b/tvix/store/src/directoryservice/grpc.rs
@@ -3,6 +3,7 @@ use std::collections::HashSet;
 use super::{DirectoryPutter, DirectoryService};
 use crate::proto::{self, get_directory_request::ByWhat};
 use crate::{B3Digest, Error};
+use tokio::net::UnixStream;
 use tokio::sync::mpsc::UnboundedSender;
 use tokio_stream::wrappers::UnboundedReceiverStream;
 use tonic::{transport::Channel, Status};
@@ -21,7 +22,7 @@ pub struct GRPCDirectoryService {
 }
 
 impl GRPCDirectoryService {
-    /// construct a [GRPCDirectoryService] from a [proto::blob_service_client::BlobServiceClient<Channel>].
+    /// construct a [GRPCDirectoryService] from a [proto::directory_service_client::DirectoryServiceClient<Channel>].
     /// panics if called outside the context of a tokio runtime.
     pub fn from_client(
         grpc_client: proto::directory_service_client::DirectoryServiceClient<Channel>,
@@ -34,6 +35,59 @@ impl GRPCDirectoryService {
 }
 
 impl DirectoryService for GRPCDirectoryService {
+    /// Constructs a [GRPCDirectoryService] from the passed [url::Url]:
+    /// - scheme has to match `grpc+*://`.
+    ///   That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
+    /// - In the case of unix sockets, there must be a path, but may not be a host.
+    /// - In the case of non-unix sockets, there must be a host, but no path.
+    fn from_url(url: &url::Url) -> Result<Self, crate::Error> {
+        // Start checking for the scheme to start with grpc+.
+        match url.scheme().strip_prefix("grpc+") {
+            None => Err(crate::Error::StorageError("invalid scheme".to_string())),
+            Some(rest) => {
+                if rest == "unix" {
+                    if url.host_str().is_some() {
+                        return Err(crate::Error::StorageError(
+                            "host may not be set".to_string(),
+                        ));
+                    }
+                    let path = url.path().to_string();
+                    let channel = tonic::transport::Endpoint::try_from("http://[::]:50051") // doesn't matter
+                        .unwrap()
+                        .connect_with_connector_lazy(tower::service_fn(
+                            move |_: tonic::transport::Uri| UnixStream::connect(path.clone()),
+                        ));
+                    let grpc_client =
+                        proto::directory_service_client::DirectoryServiceClient::new(channel);
+                    Ok(Self::from_client(grpc_client))
+                } else {
+                    // ensure path is empty, not supported with gRPC.
+                    if !url.path().is_empty() {
+                        return Err(crate::Error::StorageError(
+                            "path may not be set".to_string(),
+                        ));
+                    }
+
+                    // clone the uri, and drop the grpc+ from the scheme.
+                    // Recreate a new uri with the `grpc+` prefix dropped from the scheme.
+                    // We can't use `url.set_scheme(rest)`, as it disallows
+                    // setting something http(s) that previously wasn't.
+                    let url = {
+                        let url_str = url.to_string();
+                        let s_stripped = url_str.strip_prefix("grpc+").unwrap();
+                        url::Url::parse(s_stripped).unwrap()
+                    };
+                    let channel = tonic::transport::Endpoint::try_from(url.to_string())
+                        .unwrap()
+                        .connect_lazy();
+
+                    let grpc_client =
+                        proto::directory_service_client::DirectoryServiceClient::new(channel);
+                    Ok(Self::from_client(grpc_client))
+                }
+            }
+        }
+    }
     fn get(&self, digest: &B3Digest) -> Result<Option<crate::proto::Directory>, crate::Error> {
         // Get a new handle to the gRPC client, and copy the digest.
         let mut grpc_client = self.grpc_client.clone();
diff --git a/tvix/store/src/directoryservice/memory.rs b/tvix/store/src/directoryservice/memory.rs
index 1f203834a7..634dbf9922 100644
--- a/tvix/store/src/directoryservice/memory.rs
+++ b/tvix/store/src/directoryservice/memory.rs
@@ -12,6 +12,21 @@ pub struct MemoryDirectoryService {
 }
 
 impl DirectoryService for MemoryDirectoryService {
+    /// Constructs a [MemoryDirectoryService] from the passed [url::Url]:
+    /// - scheme has to be `memory://`
+    /// - there may not be a host.
+    /// - there may not be a path.
+    fn from_url(url: &url::Url) -> Result<Self, Error> {
+        if url.scheme() != "memory" {
+            return Err(crate::Error::StorageError("invalid scheme".to_string()));
+        }
+
+        if url.has_host() || !url.path().is_empty() {
+            return Err(crate::Error::StorageError("invalid url".to_string()));
+        }
+
+        Ok(Self::default())
+    }
     #[instrument(skip(self, digest), fields(directory.digest = %digest))]
     fn get(&self, digest: &B3Digest) -> Result<Option<proto::Directory>, Error> {
         let db = self.db.read()?;
@@ -84,3 +99,49 @@ impl DirectoryService for MemoryDirectoryService {
         Box::new(SimplePutter::new(self.clone()))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use super::DirectoryService;
+    use super::MemoryDirectoryService;
+
+    /// This uses a wrong scheme.
+    #[test]
+    fn test_invalid_scheme() {
+        let url = url::Url::parse("http://foo.example/test").expect("must parse");
+
+        assert!(MemoryDirectoryService::from_url(&url).is_err());
+    }
+
+    /// This correctly sets the scheme, and doesn't set a path.
+    #[test]
+    fn test_valid_scheme() {
+        let url = url::Url::parse("memory://").expect("must parse");
+
+        assert!(MemoryDirectoryService::from_url(&url).is_ok());
+    }
+
+    /// This sets the host to `foo`
+    #[test]
+    fn test_invalid_host() {
+        let url = url::Url::parse("memory://foo").expect("must parse");
+
+        assert!(MemoryDirectoryService::from_url(&url).is_err());
+    }
+
+    /// This has the path "/", which is invalid.
+    #[test]
+    fn test_invalid_has_path() {
+        let url = url::Url::parse("memory:///").expect("must parse");
+
+        assert!(MemoryDirectoryService::from_url(&url).is_err());
+    }
+
+    /// This has the path "/foo", which is invalid.
+    #[test]
+    fn test_invalid_path2() {
+        let url = url::Url::parse("memory:///foo").expect("must parse");
+
+        assert!(MemoryDirectoryService::from_url(&url).is_err());
+    }
+}
diff --git a/tvix/store/src/directoryservice/mod.rs b/tvix/store/src/directoryservice/mod.rs
index 6589a5b625..c99848c551 100644
--- a/tvix/store/src/directoryservice/mod.rs
+++ b/tvix/store/src/directoryservice/mod.rs
@@ -1,10 +1,13 @@
 use crate::{proto, B3Digest, Error};
+
+mod from_addr;
 mod grpc;
 mod memory;
 mod sled;
 mod traverse;
 mod utils;
 
+pub use self::from_addr::from_addr;
 pub use self::grpc::GRPCDirectoryService;
 pub use self::memory::MemoryDirectoryService;
 pub use self::sled::SledDirectoryService;
@@ -15,6 +18,11 @@ pub use self::utils::DirectoryTraverser;
 /// This is a simple get and put of [crate::proto::Directory], returning their
 /// digest.
 pub trait DirectoryService: Send + Sync {
+    /// Create a new instance by passing in a connection URL.
+    fn from_url(url: &url::Url) -> Result<Self, Error>
+    where
+        Self: Sized;
+
     /// Get looks up a single Directory message by its digest.
     /// In case the directory is not found, Ok(None) is returned.
     fn get(&self, digest: &B3Digest) -> Result<Option<proto::Directory>, Error>;
diff --git a/tvix/store/src/directoryservice/sled.rs b/tvix/store/src/directoryservice/sled.rs
index 8ed2c59c2d..6dc09ed646 100644
--- a/tvix/store/src/directoryservice/sled.rs
+++ b/tvix/store/src/directoryservice/sled.rs
@@ -30,6 +30,36 @@ impl SledDirectoryService {
 }
 
 impl DirectoryService for SledDirectoryService {
+    /// Constructs a [SledDirectoryService] from the passed [url::Url]:
+    /// - scheme has to be `sled://`
+    /// - there may not be a host.
+    /// - a path to the sled needs to be provided (which may not be `/`).
+    fn from_url(url: &url::Url) -> Result<Self, Error> {
+        if url.scheme() != "sled" {
+            return Err(crate::Error::StorageError("invalid scheme".to_string()));
+        }
+
+        if url.has_host() {
+            return Err(crate::Error::StorageError(format!(
+                "invalid host: {}",
+                url.host().unwrap()
+            )));
+        }
+
+        // TODO: expose compression and other parameters as URL parameters, drop new and new_temporary?
+        if url.path().is_empty() {
+            Self::new_temporary().map_err(|e| Error::StorageError(e.to_string()))
+        } else {
+            if url.path() == "/" {
+                Err(crate::Error::StorageError(
+                    "cowardly refusing to open / with sled".to_string(),
+                ))
+            } else {
+                Self::new(url.path().into()).map_err(|e| Error::StorageError(e.to_string()))
+            }
+        }
+    }
+
     #[instrument(skip(self, digest), fields(directory.digest = %digest))]
     fn get(&self, digest: &B3Digest) -> Result<Option<proto::Directory>, Error> {
         match self.db.get(digest.to_vec()) {
@@ -109,3 +139,77 @@ impl DirectoryService for SledDirectoryService {
         Box::new(SimplePutter::new(self.clone()))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use tempfile::TempDir;
+
+    use super::DirectoryService;
+    use super::SledDirectoryService;
+
+    /// This uses a wrong scheme.
+    #[test]
+    fn test_invalid_scheme() {
+        let url = url::Url::parse("http://foo.example/test").expect("must parse");
+
+        assert!(SledDirectoryService::from_url(&url).is_err());
+    }
+
+    /// This uses the correct scheme, and doesn't specify a path (temporary sled).
+    #[test]
+    fn test_valid_scheme_temporary() {
+        let url = url::Url::parse("sled://").expect("must parse");
+
+        assert!(SledDirectoryService::from_url(&url).is_ok());
+    }
+
+    /// This sets the path to a location that doesn't exist, which should fail (as sled doesn't mkdir -p)
+    #[test]
+    fn test_nonexistent_path() {
+        let tmpdir = TempDir::new().unwrap();
+
+        let mut url = url::Url::parse("sled://foo.example").expect("must parse");
+        url.set_path(tmpdir.path().join("foo").join("bar").to_str().unwrap());
+
+        assert!(SledDirectoryService::from_url(&url).is_err());
+    }
+
+    /// This uses the correct scheme, and specifies / as path (which should fail
+    // for obvious reasons)
+    #[test]
+    fn test_invalid_path_root() {
+        let url = url::Url::parse("sled:///").expect("must parse");
+
+        assert!(SledDirectoryService::from_url(&url).is_err());
+    }
+
+    /// This uses the correct scheme, and sets a tempdir as location.
+    #[test]
+    fn test_valid_scheme_path() {
+        let tmpdir = TempDir::new().unwrap();
+
+        let mut url = url::Url::parse("sled://").expect("must parse");
+        url.set_path(tmpdir.path().to_str().unwrap());
+
+        assert!(SledDirectoryService::from_url(&url).is_ok());
+    }
+
+    /// This sets a host, rather than a path, which should fail.
+    #[test]
+    fn test_invalid_host() {
+        let url = url::Url::parse("sled://foo.example").expect("must parse");
+
+        assert!(SledDirectoryService::from_url(&url).is_err());
+    }
+
+    /// This sets a host AND a valid path, which should fail
+    #[test]
+    fn test_invalid_host_and_path() {
+        let tmpdir = TempDir::new().unwrap();
+
+        let mut url = url::Url::parse("sled://foo.example").expect("must parse");
+        url.set_path(tmpdir.path().to_str().unwrap());
+
+        assert!(SledDirectoryService::from_url(&url).is_err());
+    }
+}