about summary refs log tree commit diff
path: root/tvix/store/src/pathinfoservice
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-06-11T21·04+0300
committerclbot <clbot@tvl.fyi>2023-06-14T23·15+0000
commit35bff2bda69d5189d9a439cd2032b86ebb4e6e41 (patch)
treea5b22053e83463e8d87ff6edca7e29a31c2606d3 /tvix/store/src/pathinfoservice
parentbb7c76739a30d2f312693799d8237eb0eb2da28d (diff)
refactor(tvix/store/pathinfosvc): add from_addr r/6304
Change-Id: I24e822351a837fce2aed568a647d009099ef32ec
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8747
Reviewed-by: tazjin <tazjin@tvl.su>
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/store/src/pathinfoservice')
-rw-r--r--tvix/store/src/pathinfoservice/from_addr.rs58
-rw-r--r--tvix/store/src/pathinfoservice/grpc.rs232
-rw-r--r--tvix/store/src/pathinfoservice/memory.rs84
-rw-r--r--tvix/store/src/pathinfoservice/mod.rs17
-rw-r--r--tvix/store/src/pathinfoservice/sled.rs134
5 files changed, 524 insertions, 1 deletions
diff --git a/tvix/store/src/pathinfoservice/from_addr.rs b/tvix/store/src/pathinfoservice/from_addr.rs
new file mode 100644
index 0000000000..2f712f4514
--- /dev/null
+++ b/tvix/store/src/pathinfoservice/from_addr.rs
@@ -0,0 +1,58 @@
+use std::sync::Arc;
+use url::Url;
+
+use crate::{blobservice::BlobService, directoryservice::DirectoryService};
+
+use super::{GRPCPathInfoService, MemoryPathInfoService, PathInfoService, SledPathInfoService};
+
+/// Constructs a new instance of a [PathInfoService] from an URI.
+///
+/// The following URIs are supported:
+/// - `memory:`
+///   Uses a in-memory implementation.
+/// - `sled:`
+///   Uses a in-memory sled implementation.
+/// - `sled:///absolute/path/to/somewhere`
+///   Uses sled, using a path on the disk for persistency. Can be only opened
+///   from one process at the same time.
+/// - `grpc+unix:///absolute/path/to/somewhere`
+///   Connects to a local tvix-store gRPC service via Unix socket.
+/// - `grpc+http://host:port`, `grpc+https://host:port`
+///    Connects to a (remote) tvix-store gRPC service.
+///
+/// As the [PathInfoService] needs to talk to [BlobService] and [DirectoryService],
+/// these also need to be passed in.
+pub fn from_addr(
+    uri: &str,
+    blob_service: Arc<dyn BlobService>,
+    directory_service: Arc<dyn DirectoryService>,
+) -> Result<Arc<dyn PathInfoService>, crate::Error> {
+    let url = Url::parse(uri).map_err(|e| {
+        crate::Error::StorageError(format!("unable to parse url: {}", e.to_string()))
+    })?;
+
+    Ok(if url.scheme() == "memory" {
+        Arc::new(MemoryPathInfoService::from_url(
+            &url,
+            blob_service,
+            directory_service,
+        )?)
+    } else if url.scheme() == "sled" {
+        Arc::new(SledPathInfoService::from_url(
+            &url,
+            blob_service,
+            directory_service,
+        )?)
+    } else if url.scheme().starts_with("grpc+") {
+        Arc::new(GRPCPathInfoService::from_url(
+            &url,
+            blob_service,
+            directory_service,
+        )?)
+    } else {
+        Err(crate::Error::StorageError(format!(
+            "unknown scheme: {}",
+            url.scheme()
+        )))?
+    })
+}
diff --git a/tvix/store/src/pathinfoservice/grpc.rs b/tvix/store/src/pathinfoservice/grpc.rs
index 230d630cf4..a14f51c9f8 100644
--- a/tvix/store/src/pathinfoservice/grpc.rs
+++ b/tvix/store/src/pathinfoservice/grpc.rs
@@ -1,5 +1,7 @@
 use super::PathInfoService;
-use crate::proto;
+use crate::{blobservice::BlobService, directoryservice::DirectoryService, proto};
+use std::sync::Arc;
+use tokio::net::UnixStream;
 use tonic::{transport::Channel, Code, Status};
 
 /// Connects to a (remote) tvix-store PathInfoService over gRPC.
@@ -27,6 +29,65 @@ impl GRPCPathInfoService {
 }
 
 impl PathInfoService for GRPCPathInfoService {
+    /// Constructs a [GRPCPathInfoService] from the passed [url::Url]:
+    /// - scheme has to match `grpc+*://`.
+    ///   That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
+    /// - In the case of unix sockets, there must be a path, but may not be a host.
+    /// - In the case of non-unix sockets, there must be a host, but no path.
+    /// The blob_service and directory_service arguments are ignored, because the gRPC service already provides answers to these questions.
+    fn from_url(
+        url: &url::Url,
+        _blob_service: Arc<dyn BlobService>,
+        _directory_service: Arc<dyn DirectoryService>,
+    ) -> Result<Self, crate::Error> {
+        // Start checking for the scheme to start with grpc+.
+        match url.scheme().strip_prefix("grpc+") {
+            None => Err(crate::Error::StorageError("invalid scheme".to_string())),
+            Some(rest) => {
+                if rest == "unix" {
+                    if url.host_str().is_some() {
+                        return Err(crate::Error::StorageError(
+                            "host may not be set".to_string(),
+                        ));
+                    }
+                    let path = url.path().to_string();
+                    let channel = tonic::transport::Endpoint::try_from("http://[::]:50051") // doesn't matter
+                        .unwrap()
+                        .connect_with_connector_lazy(tower::service_fn(
+                            move |_: tonic::transport::Uri| UnixStream::connect(path.clone()),
+                        ));
+                    let grpc_client =
+                        proto::path_info_service_client::PathInfoServiceClient::new(channel);
+                    Ok(Self::from_client(grpc_client))
+                } else {
+                    // ensure path is empty, not supported with gRPC.
+                    if !url.path().is_empty() {
+                        return Err(crate::Error::StorageError(
+                            "path may not be set".to_string(),
+                        ));
+                    }
+
+                    // clone the uri, and drop the grpc+ from the scheme.
+                    // Recreate a new uri with the `grpc+` prefix dropped from the scheme.
+                    // We can't use `url.set_scheme(rest)`, as it disallows
+                    // setting something http(s) that previously wasn't.
+                    let url = {
+                        let url_str = url.to_string();
+                        let s_stripped = url_str.strip_prefix("grpc+").unwrap();
+                        url::Url::parse(s_stripped).unwrap()
+                    };
+                    let channel = tonic::transport::Endpoint::try_from(url.to_string())
+                        .unwrap()
+                        .connect_lazy();
+
+                    let grpc_client =
+                        proto::path_info_service_client::PathInfoServiceClient::new(channel);
+                    Ok(Self::from_client(grpc_client))
+                }
+            }
+        }
+    }
+
     fn get(&self, digest: [u8; 20]) -> Result<Option<proto::PathInfo>, crate::Error> {
         // Get a new handle to the gRPC client.
         let mut grpc_client = self.grpc_client.clone();
@@ -99,3 +160,172 @@ impl PathInfoService for GRPCPathInfoService {
         Ok((resp.nar_size, nar_sha256))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use std::sync::Arc;
+    use std::thread;
+
+    use tempfile::TempDir;
+    use tokio::net::UnixListener;
+    use tokio::task;
+    use tokio::time;
+    use tokio_stream::wrappers::UnixListenerStream;
+
+    use crate::pathinfoservice::MemoryPathInfoService;
+    use crate::proto::GRPCPathInfoServiceWrapper;
+    use crate::tests::fixtures;
+    use crate::tests::utils::gen_blob_service;
+    use crate::tests::utils::gen_directory_service;
+
+    use super::GRPCPathInfoService;
+    use super::PathInfoService;
+
+    /// This uses the wrong scheme
+    #[test]
+    fn test_invalid_scheme() {
+        let url = url::Url::parse("http://foo.example/test").expect("must parse");
+
+        assert!(
+            GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This uses the correct scheme for a unix socket.
+    /// The fact that /path/to/somewhere doesn't exist yet is no problem, because we connect lazily.
+    #[tokio::test]
+    async fn test_valid_unix_path() {
+        let url = url::Url::parse("grpc+unix:///path/to/somewhere").expect("must parse");
+
+        assert!(
+            GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_ok()
+        );
+    }
+
+    /// This uses the correct scheme for a unix socket,
+    /// but sets a host, which is unsupported.
+    #[tokio::test]
+    async fn test_invalid_unix_path_with_domain() {
+        let url =
+            url::Url::parse("grpc+unix://host.example/path/to/somewhere").expect("must parse");
+
+        assert!(
+            GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This uses the correct scheme for a HTTP server.
+    /// The fact that nothing is listening there is no problem, because we connect lazily.
+    #[tokio::test]
+    async fn test_valid_http() {
+        let url = url::Url::parse("grpc+http://localhost").expect("must parse");
+
+        assert!(
+            GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_ok()
+        );
+    }
+
+    /// This uses the correct scheme for a HTTPS server.
+    /// The fact that nothing is listening there is no problem, because we connect lazily.
+    #[tokio::test]
+    async fn test_valid_https() {
+        let url = url::Url::parse("grpc+https://localhost").expect("must parse");
+
+        assert!(
+            GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_ok()
+        );
+    }
+
+    /// This uses the correct scheme, but also specifies
+    /// an additional path, which is not supported for gRPC.
+    /// The fact that nothing is listening there is no problem, because we connect lazily.
+    #[tokio::test]
+    async fn test_invalid_http_with_path() {
+        let url = url::Url::parse("grpc+https://localhost/some-path").expect("must parse");
+
+        assert!(
+            GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This uses the correct scheme for a unix socket, and provides a server on the other side.
+    #[tokio::test]
+    async fn test_valid_unix_path_ping_pong() {
+        let tmpdir = TempDir::new().unwrap();
+        let path = tmpdir.path().join("daemon");
+
+        // let mut join_set = JoinSet::new();
+
+        // prepare a client
+        let client = {
+            let mut url = url::Url::parse("grpc+unix:///path/to/somewhere").expect("must parse");
+            url.set_path(path.to_str().unwrap());
+            GRPCPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .expect("must succeed")
+        };
+
+        let path_copy = path.clone();
+
+        // Spin up a server, in a thread far away, which spawns its own tokio runtime,
+        // and blocks on the task.
+        thread::spawn(move || {
+            // Create the runtime
+            let rt = tokio::runtime::Runtime::new().unwrap();
+            // Get a handle from this runtime
+            let handle = rt.handle();
+
+            let task = handle.spawn(async {
+                let uds = UnixListener::bind(path_copy).unwrap();
+                let uds_stream = UnixListenerStream::new(uds);
+
+                // spin up a new server
+                let mut server = tonic::transport::Server::builder();
+                let router = server.add_service(
+                    crate::proto::path_info_service_server::PathInfoServiceServer::new(
+                        GRPCPathInfoServiceWrapper::from(Arc::new(MemoryPathInfoService::new(
+                            gen_blob_service(),
+                            gen_directory_service(),
+                        ))
+                            as Arc<dyn PathInfoService>),
+                    ),
+                );
+                router.serve_with_incoming(uds_stream).await
+            });
+
+            handle.block_on(task)
+        });
+
+        // wait for the socket to be created
+        {
+            let mut socket_created = false;
+            for _try in 1..20 {
+                if path.exists() {
+                    socket_created = true;
+                    break;
+                }
+                tokio::time::sleep(time::Duration::from_millis(20)).await;
+            }
+
+            assert!(
+                socket_created,
+                "expected socket path to eventually get created, but never happened"
+            );
+        }
+
+        let pi = task::spawn_blocking(move || {
+            client
+                .get(fixtures::DUMMY_OUTPUT_HASH.to_vec().try_into().unwrap())
+                .expect("must not be error")
+        })
+        .await
+        .expect("must not be err");
+
+        assert!(pi.is_none());
+    }
+}
diff --git a/tvix/store/src/pathinfoservice/memory.rs b/tvix/store/src/pathinfoservice/memory.rs
index 35455313cb..f7abb2180e 100644
--- a/tvix/store/src/pathinfoservice/memory.rs
+++ b/tvix/store/src/pathinfoservice/memory.rs
@@ -29,6 +29,26 @@ impl MemoryPathInfoService {
 }
 
 impl PathInfoService for MemoryPathInfoService {
+    /// Constructs a [MemoryPathInfoService] from the passed [url::Url]:
+    /// - scheme has to be `memory://`
+    /// - there may not be a host.
+    /// - there may not be a path.
+    fn from_url(
+        url: &url::Url,
+        blob_service: Arc<dyn BlobService>,
+        directory_service: Arc<dyn DirectoryService>,
+    ) -> Result<Self, Error> {
+        if url.scheme() != "memory" {
+            return Err(crate::Error::StorageError("invalid scheme".to_string()));
+        }
+
+        if url.has_host() || !url.path().is_empty() {
+            return Err(crate::Error::StorageError("invalid url".to_string()));
+        }
+
+        Ok(Self::new(blob_service, directory_service))
+    }
+
     fn get(&self, digest: [u8; 20]) -> Result<Option<proto::PathInfo>, Error> {
         let db = self.db.read().unwrap();
 
@@ -66,3 +86,67 @@ impl PathInfoService for MemoryPathInfoService {
         .map_err(|e| Error::StorageError(e.to_string()))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use crate::tests::utils::gen_blob_service;
+    use crate::tests::utils::gen_directory_service;
+
+    use super::MemoryPathInfoService;
+    use super::PathInfoService;
+
+    /// This uses a wrong scheme.
+    #[test]
+    fn test_invalid_scheme() {
+        let url = url::Url::parse("http://foo.example/test").expect("must parse");
+
+        assert!(
+            MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This correctly sets the scheme, and doesn't set a path.
+    #[test]
+    fn test_valid_scheme() {
+        let url = url::Url::parse("memory://").expect("must parse");
+
+        assert!(
+            MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_ok()
+        );
+    }
+
+    /// This sets the host to `foo`
+    #[test]
+    fn test_invalid_host() {
+        let url = url::Url::parse("memory://foo").expect("must parse");
+
+        assert!(
+            MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This has the path "/", which is invalid.
+    #[test]
+    fn test_invalid_has_path() {
+        let url = url::Url::parse("memory:///").expect("must parse");
+
+        assert!(
+            MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This has the path "/foo", which is invalid.
+    #[test]
+    fn test_invalid_path2() {
+        let url = url::Url::parse("memory:///foo").expect("must parse");
+
+        assert!(
+            MemoryPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+}
diff --git a/tvix/store/src/pathinfoservice/mod.rs b/tvix/store/src/pathinfoservice/mod.rs
index 937d8f2a11..191e8dbd60 100644
--- a/tvix/store/src/pathinfoservice/mod.rs
+++ b/tvix/store/src/pathinfoservice/mod.rs
@@ -1,9 +1,15 @@
+mod from_addr;
 mod grpc;
 mod memory;
 mod sled;
 
+use std::sync::Arc;
+
+use crate::blobservice::BlobService;
+use crate::directoryservice::DirectoryService;
 use crate::{proto, Error};
 
+pub use self::from_addr::from_addr;
 pub use self::grpc::GRPCPathInfoService;
 pub use self::memory::MemoryPathInfoService;
 pub use self::sled::SledPathInfoService;
@@ -11,6 +17,17 @@ pub use self::sled::SledPathInfoService;
 /// The base trait all PathInfo services need to implement.
 /// This is a simple get and put of [proto::Directory], returning their digest.
 pub trait PathInfoService: Send + Sync {
+    /// Create a new instance by passing in a connection URL, as well
+    /// as instances of a [PathInfoService] and [DirectoryService] (as the
+    /// [PathInfoService] needs to talk to them).
+    fn from_url(
+        url: &url::Url,
+        blob_service: Arc<dyn BlobService>,
+        directory_service: Arc<dyn DirectoryService>,
+    ) -> Result<Self, Error>
+    where
+        Self: Sized;
+
     /// Retrieve a PathInfo message by the output digest.
     fn get(&self, digest: [u8; 20]) -> Result<Option<proto::PathInfo>, Error>;
 
diff --git a/tvix/store/src/pathinfoservice/sled.rs b/tvix/store/src/pathinfoservice/sled.rs
index f06a905cef..48db6b8b5c 100644
--- a/tvix/store/src/pathinfoservice/sled.rs
+++ b/tvix/store/src/pathinfoservice/sled.rs
@@ -50,6 +50,42 @@ impl SledPathInfoService {
 }
 
 impl PathInfoService for SledPathInfoService {
+    /// Constructs a [SledBlobService] from the passed [url::Url]:
+    /// - scheme has to be `sled://`
+    /// - there may not be a host.
+    /// - a path to the sled needs to be provided (which may not be `/`).
+    fn from_url(
+        url: &url::Url,
+        blob_service: Arc<dyn BlobService>,
+        directory_service: Arc<dyn DirectoryService>,
+    ) -> Result<Self, Error> {
+        if url.scheme() != "sled" {
+            return Err(crate::Error::StorageError("invalid scheme".to_string()));
+        }
+
+        if url.has_host() {
+            return Err(crate::Error::StorageError(format!(
+                "invalid host: {}",
+                url.host().unwrap()
+            )));
+        }
+
+        // TODO: expose compression and other parameters as URL parameters, drop new and new_temporary?
+        if url.path().is_empty() {
+            Self::new_temporary(blob_service, directory_service)
+                .map_err(|e| Error::StorageError(e.to_string()))
+        } else {
+            if url.path() == "/" {
+                Err(crate::Error::StorageError(
+                    "cowardly refusing to open / with sled".to_string(),
+                ))
+            } else {
+                Self::new(url.path().into(), blob_service, directory_service)
+                    .map_err(|e| Error::StorageError(e.to_string()))
+            }
+        }
+    }
+
     fn get(&self, digest: [u8; 20]) -> Result<Option<proto::PathInfo>, Error> {
         match self.db.get(digest) {
             Ok(None) => Ok(None),
@@ -103,3 +139,101 @@ impl PathInfoService for SledPathInfoService {
         .map_err(|e| Error::StorageError(e.to_string()))
     }
 }
+
+#[cfg(test)]
+mod tests {
+    use tempfile::TempDir;
+
+    use crate::tests::utils::gen_blob_service;
+    use crate::tests::utils::gen_directory_service;
+
+    use super::PathInfoService;
+    use super::SledPathInfoService;
+
+    /// This uses a wrong scheme.
+    #[test]
+    fn test_invalid_scheme() {
+        let url = url::Url::parse("http://foo.example/test").expect("must parse");
+
+        assert!(
+            SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This uses the correct scheme, and doesn't specify a path (temporary sled).
+    #[test]
+    fn test_valid_scheme_temporary() {
+        let url = url::Url::parse("sled://").expect("must parse");
+
+        assert!(
+            SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_ok()
+        );
+    }
+
+    /// This sets the path to a location that doesn't exist, which should fail (as sled doesn't mkdir -p)
+    #[test]
+    fn test_nonexistent_path() {
+        let tmpdir = TempDir::new().unwrap();
+
+        let mut url = url::Url::parse("sled://foo.example").expect("must parse");
+        url.set_path(tmpdir.path().join("foo").join("bar").to_str().unwrap());
+
+        assert!(
+            SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This uses the correct scheme, and specifies / as path (which should fail
+    // for obvious reasons)
+    #[test]
+    fn test_invalid_path_root() {
+        let url = url::Url::parse("sled:///").expect("must parse");
+
+        assert!(
+            SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This uses the correct scheme, and sets a tempdir as location.
+    #[test]
+    fn test_valid_scheme_path() {
+        let tmpdir = TempDir::new().unwrap();
+
+        let mut url = url::Url::parse("sled://").expect("must parse");
+        url.set_path(tmpdir.path().to_str().unwrap());
+
+        assert!(
+            SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_ok()
+        );
+    }
+
+    /// This sets a host, rather than a path, which should fail.
+    #[test]
+    fn test_invalid_host() {
+        let url = url::Url::parse("sled://foo.example").expect("must parse");
+
+        assert!(
+            SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+
+    /// This sets a host AND a valid path, which should fail
+    #[test]
+    fn test_invalid_host_and_path() {
+        let tmpdir = TempDir::new().unwrap();
+
+        let mut url = url::Url::parse("sled://foo.example").expect("must parse");
+        url.set_path(tmpdir.path().to_str().unwrap());
+
+        assert!(
+            SledPathInfoService::from_url(&url, gen_blob_service(), gen_directory_service())
+                .is_err()
+        );
+    }
+}