about summary refs log tree commit diff
path: root/tvix/castore/src/blobservice/from_addr.rs
blob: 3e3f943e59313d97e9002b07731999631e530a98 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
use url::Url;

use crate::{proto::blob_service_client::BlobServiceClient, Error};

use super::{
    BlobService, GRPCBlobService, MemoryBlobService, ObjectStoreBlobService, SledBlobService,
};

/// Constructs a new instance of a [BlobService] from an URI.
///
/// The following schemes are supported by the following services:
/// - `memory://` ([MemoryBlobService])
/// - `sled://` ([SledBlobService])
/// - `grpc+*://` ([GRPCBlobService])
/// - `objectstore+*://` ([ObjectStoreBlobService])
///
/// See their `from_url` methods for more details about their syntax.
pub async fn from_addr(uri: &str) -> Result<Box<dyn BlobService>, crate::Error> {
    let url = Url::parse(uri)
        .map_err(|e| crate::Error::StorageError(format!("unable to parse url: {}", e)))?;

    let blob_service: Box<dyn BlobService> = match url.scheme() {
        "memory" => {
            // memory doesn't support host or path in the URL.
            if url.has_host() || !url.path().is_empty() {
                return Err(Error::StorageError("invalid url".to_string()));
            }
            Box::<MemoryBlobService>::default()
        }
        "sled" => {
            // sled doesn't support host, and a path can be provided (otherwise
            // it'll live in memory only).
            if url.has_host() {
                return Err(Error::StorageError("no host allowed".to_string()));
            }

            if url.path() == "/" {
                return Err(Error::StorageError(
                    "cowardly refusing to open / with sled".to_string(),
                ));
            }

            // TODO: expose other parameters as URL parameters?

            Box::new(if url.path().is_empty() {
                SledBlobService::new_temporary().map_err(|e| Error::StorageError(e.to_string()))?
            } else {
                SledBlobService::new(url.path()).map_err(|e| Error::StorageError(e.to_string()))?
            })
        }
        scheme if scheme.starts_with("grpc+") => {
            // schemes starting with grpc+ go to the GRPCPathInfoService.
            //   That's normally grpc+unix for unix sockets, and grpc+http(s) for the HTTP counterparts.
            // - In the case of unix sockets, there must be a path, but may not be a host.
            // - In the case of non-unix sockets, there must be a host, but no path.
            // Constructing the channel is handled by tvix_castore::channel::from_url.
            let client = BlobServiceClient::new(crate::tonic::channel_from_url(&url).await?);
            Box::new(GRPCBlobService::from_client(client))
        }
        scheme if scheme.starts_with("objectstore+") => {
            // We need to convert the URL to string, strip the prefix there, and then
            // parse it back as url, as Url::set_scheme() rejects some of the transitions we want to do.
            let trimmed_url = {
                let s = url.to_string();
                Url::parse(s.strip_prefix("objectstore+").unwrap()).unwrap()
            };
            Box::new(
                ObjectStoreBlobService::parse_url(&trimmed_url)
                    .map_err(|e| Error::StorageError(e.to_string()))?,
            )
        }
        scheme => {
            return Err(crate::Error::StorageError(format!(
                "unknown scheme: {}",
                scheme
            )))
        }
    };

    Ok(blob_service)
}

#[cfg(test)]
mod tests {
    use super::from_addr;
    use lazy_static::lazy_static;
    use rstest::rstest;
    use tempfile::TempDir;

    lazy_static! {
        static ref TMPDIR_SLED_1: TempDir = TempDir::new().unwrap();
        static ref TMPDIR_SLED_2: TempDir = TempDir::new().unwrap();
    }

    #[rstest]
    /// This uses an unsupported scheme.
    #[case::unsupported_scheme("http://foo.example/test", false)]
    /// This configures sled in temporary mode.
    #[case::sled_temporary("sled://", true)]
    /// This configures sled with /, which should fail.
    #[case::sled_invalid_root("sled:///", false)]
    /// This configures sled with a host, not path, which should fail.
    #[case::sled_invalid_host("sled://foo.example", false)]
    /// This configures sled with a valid path path, which should succeed.
    #[case::sled_valid_path(&format!("sled://{}", &TMPDIR_SLED_1.path().to_str().unwrap()), true)]
    /// This configures sled with a host, and a valid path path, which should fail.
    #[case::sled_invalid_host_with_valid_path(&format!("sled://foo.example{}", &TMPDIR_SLED_2.path().to_str().unwrap()), false)]
    /// This correctly sets the scheme, and doesn't set a path.
    #[case::memory_valid("memory://", true)]
    /// This sets a memory url host to `foo`
    #[case::memory_invalid_host("memory://foo", false)]
    /// This sets a memory url path to "/", which is invalid.
    #[case::memory_invalid_root_path("memory:///", false)]
    /// This sets a memory url path to "/foo", which is invalid.
    #[case::memory_invalid_root_path_foo("memory:///foo", false)]
    /// Correct scheme to connect to a unix socket.
    #[case::grpc_valid_unix_socket("grpc+unix:///path/to/somewhere", true)]
    /// Correct scheme for unix socket, but setting a host too, which is invalid.
    #[case::grpc_invalid_unix_socket_and_host("grpc+unix://host.example/path/to/somewhere", false)]
    /// Correct scheme to connect to localhost, with port 12345
    #[case::grpc_valid_ipv6_localhost_port_12345("grpc+http://[::1]:12345", true)]
    /// Correct scheme to connect to localhost over http, without specifying a port.
    #[case::grpc_valid_http_host_without_port("grpc+http://localhost", true)]
    /// Correct scheme to connect to localhost over http, without specifying a port.
    #[case::grpc_valid_https_host_without_port("grpc+https://localhost", true)]
    /// Correct scheme to connect to localhost over http, but with additional path, which is invalid.
    #[case::grpc_invalid_has_path("grpc+http://localhost/some-path", false)]
    /// An example for object store (InMemory)
    #[case::objectstore_valid_memory("objectstore+memory:///", true)]
    /// An example for object store (LocalFileSystem)
    #[case::objectstore_valid_file("objectstore+file:///foo/bar", true)]
    // An example for object store (HTTP / WebDAV)
    #[case::objectstore_valid_http_url("objectstore+https://localhost:8080/some-path", true)]
    /// An example for object store (S3)
    #[cfg_attr(
        feature = "cloud",
        case::objectstore_valid_s3_url("objectstore+s3://bucket/path", true)
    )]
    /// An example for object store (GCS)
    #[cfg_attr(
        feature = "cloud",
        case::objectstore_valid_gcs_url("objectstore+gs://bucket/path", true)
    )]
    #[tokio::test]
    async fn test_from_addr_tokio(#[case] uri_str: &str, #[case] exp_succeed: bool) {
        if exp_succeed {
            from_addr(uri_str).await.expect("should succeed");
        } else {
            assert!(from_addr(uri_str).await.is_err(), "should fail");
        }
    }
}