about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRyan Lahfa <tvl@lahfa.xyz>2024-01-08T08·50+0100
committerclbot <clbot@tvl.fyi>2024-01-20T18·26+0000
commit4c3ba46ba36b2fa2d9079fcc92ef27875f26418b (patch)
tree0e1115eabc25445a8567661e0fc8866008d8892f
parent7275288f0e0a0f29c3f023a7d8a4c38157fb637b (diff)
refactor(tvix/store): `import_path` → `import_path_as_nar_ca` r/7432
Add multiple additional helpers such as:

- `path_to_name`: derive the basename of a given path
- `derive_nar_ca_path_info`: derive the `PathInfo` for a content
  addressed NAR

which isolates further the tree walking feature and the ingestion feature.

Additionally, we don't `expect` anymore and propagate properly ingestion errors up.

Change-Id: I60edb5b633911c58ade7e19f5002e6f75f90e262
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10574
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Autosubmit: raitobezarius <tvl@lahfa.xyz>
-rw-r--r--tvix/glue/src/tvix_store_io.rs2
-rw-r--r--tvix/store/src/bin/tvix-store.rs2
-rw-r--r--tvix/store/src/import.rs156
-rw-r--r--tvix/store/src/lib.rs1
-rw-r--r--tvix/store/src/utils.rs118
5 files changed, 161 insertions, 118 deletions
diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs
index c72b5d108504..45bcf0df3ae8 100644
--- a/tvix/glue/src/tvix_store_io.rs
+++ b/tvix/glue/src/tvix_store_io.rs
@@ -274,7 +274,7 @@ impl EvalIO for TvixStoreIO {
     #[instrument(skip(self), ret, err)]
     fn import_path(&self, path: &Path) -> io::Result<PathBuf> {
         let output_path = self.tokio_handle.block_on(async {
-            tvix_store::utils::import_path(
+            tvix_store::import::import_path_as_nar_ca(
                 path,
                 &self.blob_service,
                 &self.directory_service,
diff --git a/tvix/store/src/bin/tvix-store.rs b/tvix/store/src/bin/tvix-store.rs
index 1a5c379b89af..9b07117d71ff 100644
--- a/tvix/store/src/bin/tvix-store.rs
+++ b/tvix/store/src/bin/tvix-store.rs
@@ -305,7 +305,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
                         let path_info_service = path_info_service.clone();
 
                         async move {
-                            let resp = tvix_store::utils::import_path(
+                            let resp = tvix_store::import::import_path_as_nar_ca(
                                 path,
                                 blob_service,
                                 directory_service,
diff --git a/tvix/store/src/import.rs b/tvix/store/src/import.rs
new file mode 100644
index 000000000000..84c73e15544c
--- /dev/null
+++ b/tvix/store/src/import.rs
@@ -0,0 +1,156 @@
+use std::path::Path;
+
+use data_encoding::BASE64;
+use tracing::{debug, instrument};
+use tvix_castore::{
+    blobservice::BlobService, directoryservice::DirectoryService, proto::node::Node,
+};
+
+use nix_compat::store_path::{self, StorePath};
+
+use crate::{
+    pathinfoservice::PathInfoService,
+    proto::{nar_info, NarInfo, PathInfo},
+};
+
+fn log_node(node: &Node, path: &Path) {
+    match node {
+        Node::Directory(directory_node) => {
+            debug!(
+                path = ?path,
+                name = ?directory_node.name,
+                digest = BASE64.encode(&directory_node.digest),
+                "import successful",
+            )
+        }
+        Node::File(file_node) => {
+            debug!(
+                path = ?path,
+                name = ?file_node.name,
+                digest = BASE64.encode(&file_node.digest),
+                "import successful"
+            )
+        }
+        Node::Symlink(symlink_node) => {
+            debug!(
+                path = ?path,
+                name = ?symlink_node.name,
+                target = ?symlink_node.target,
+                "import successful"
+            )
+        }
+    }
+}
+
+/// Transform a path into its base name and returns an [`std::io::Error`] if it is `..` or if the
+/// basename is not valid unicode.
+#[inline]
+pub fn path_to_name(path: &Path) -> std::io::Result<&str> {
+    path.file_name()
+        .and_then(|file_name| file_name.to_str())
+        .ok_or_else(|| {
+            std::io::Error::new(
+                std::io::ErrorKind::InvalidInput,
+                "path must not be .. and the basename valid unicode",
+            )
+        })
+}
+
+/// Takes the NAR size, SHA-256 of the NAR representation and the root node.
+/// Returns the path information object for a content addressed NAR-style (recursive) object.
+///
+/// This [`PathInfo`] can be further filled for signatures, deriver or verified for the expected
+/// hashes.
+#[inline]
+pub fn derive_nar_ca_path_info(nar_size: u64, nar_sha256: [u8; 32], root_node: Node) -> PathInfo {
+    // assemble the [crate::proto::PathInfo] object.
+    PathInfo {
+        node: Some(tvix_castore::proto::Node {
+            node: Some(root_node),
+        }),
+        // There's no reference scanning on path contents ingested like this.
+        references: vec![],
+        narinfo: Some(NarInfo {
+            nar_size,
+            nar_sha256: nar_sha256.to_vec().into(),
+            signatures: vec![],
+            reference_names: vec![],
+            deriver: None,
+            ca: Some(nar_info::Ca {
+                r#type: nar_info::ca::Hash::NarSha256.into(),
+                digest: nar_sha256.to_vec().into(),
+            }),
+        }),
+    }
+}
+
+/// Ingest the given path [`path`] and register the resulting output path in the
+/// [`PathInfoService`] as a recursive fixed output NAR.
+#[instrument(skip_all, fields(path=?path), err)]
+pub async fn import_path_as_nar_ca<BS, DS, PS, P>(
+    path: P,
+    blob_service: BS,
+    directory_service: DS,
+    path_info_service: PS,
+) -> Result<StorePath, std::io::Error>
+where
+    P: AsRef<Path> + std::fmt::Debug,
+    BS: AsRef<dyn BlobService> + Clone,
+    DS: AsRef<dyn DirectoryService>,
+    PS: AsRef<dyn PathInfoService>,
+{
+    let root_node =
+        tvix_castore::import::ingest_path(blob_service, directory_service, &path).await?;
+
+    // Ask the PathInfoService for the NAR size and sha256
+    let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?;
+
+    // Calculate the output path. This might still fail, as some names are illegal.
+    // FUTUREWORK: take `name` as a parameter here and enforce the validity of the name
+    // at the type level.
+    let name = path_to_name(path.as_ref())?;
+    let output_path = store_path::build_nar_based_store_path(&nar_sha256, name).map_err(|_| {
+        std::io::Error::new(
+            std::io::ErrorKind::InvalidData,
+            format!("invalid name: {}", name),
+        )
+    })?;
+
+    // assemble a new root_node with a name that is derived from the nar hash.
+    let root_node = root_node.rename(output_path.to_string().into_bytes().into());
+    log_node(&root_node, path.as_ref());
+
+    let path_info = derive_nar_ca_path_info(nar_size, nar_sha256, root_node);
+
+    // This new [`PathInfo`] that we get back from there might contain additional signatures or
+    // information set by the service itself. In this function, we silently swallow it because
+    // callers doesn't really need it.
+    let _path_info = path_info_service.as_ref().put(path_info).await?;
+
+    Ok(output_path.to_owned())
+}
+
+#[cfg(test)]
+mod tests {
+    use std::{ffi::OsStr, path::PathBuf};
+
+    use crate::import::path_to_name;
+    use test_case::test_case;
+
+    #[test_case("a/b/c", "c"; "simple path")]
+    #[test_case("a/b/../c", "c"; "simple path containing ..")]
+    #[test_case("a/b/../c/d/../e", "e"; "path containing multiple ..")]
+
+    fn test_path_to_name(path: &str, expected_name: &str) {
+        let path: PathBuf = path.into();
+        assert_eq!(path_to_name(&path).expect("must succeed"), expected_name);
+    }
+
+    #[test_case(b"a/b/.."; "path ending in ..")]
+    #[test_case(b"\xf8\xa1\xa1\xa1\xa1"; "non unicode path")]
+
+    fn test_invalid_path_to_name(invalid_path: &[u8]) {
+        let path: PathBuf = unsafe { OsStr::from_encoded_bytes_unchecked(invalid_path) }.into();
+        path_to_name(&path).expect_err("must fail");
+    }
+}
diff --git a/tvix/store/src/lib.rs b/tvix/store/src/lib.rs
index 4bc7b5e02d57..2fa86ff6a468 100644
--- a/tvix/store/src/lib.rs
+++ b/tvix/store/src/lib.rs
@@ -1,3 +1,4 @@
+pub mod import;
 pub mod nar;
 pub mod pathinfoservice;
 pub mod proto;
diff --git a/tvix/store/src/utils.rs b/tvix/store/src/utils.rs
index e7e4b7c79fad..041a9e683d59 100644
--- a/tvix/store/src/utils.rs
+++ b/tvix/store/src/utils.rs
@@ -1,18 +1,11 @@
-use std::{path::Path, sync::Arc};
+use std::sync::Arc;
 
-use data_encoding::BASE64;
-use nix_compat::store_path::{self, StorePath};
-use tracing::{debug, instrument};
 use tvix_castore::{
     blobservice::{self, BlobService},
     directoryservice::{self, DirectoryService},
-    proto::node::Node,
 };
 
-use crate::{
-    pathinfoservice::{self, PathInfoService},
-    proto::{nar_info, NarInfo, PathInfo},
-};
+use crate::pathinfoservice::{self, PathInfoService};
 
 /// Construct the three store handles from their addrs.
 pub async fn construct_services(
@@ -40,110 +33,3 @@ pub async fn construct_services(
 
     Ok((blob_service, directory_service, path_info_service))
 }
-
-/// Imports a given path on the filesystem into the store, and returns the
-/// [PathInfo] describing the path, that was sent to
-/// [PathInfoService].
-#[instrument(skip_all, fields(path=?path), err)]
-pub async fn import_path<BS, DS, PS, P>(
-    path: P,
-    blob_service: BS,
-    directory_service: DS,
-    path_info_service: PS,
-) -> Result<StorePath, std::io::Error>
-where
-    P: AsRef<Path> + std::fmt::Debug,
-    BS: AsRef<dyn BlobService> + Clone,
-    DS: AsRef<dyn DirectoryService>,
-    PS: AsRef<dyn PathInfoService>,
-{
-    // calculate the name
-    // TODO: make a path_to_name helper function?
-    let name = path
-        .as_ref()
-        .file_name()
-        .and_then(|file_name| file_name.to_str())
-        .ok_or_else(|| {
-            std::io::Error::new(
-                std::io::ErrorKind::InvalidInput,
-                "path must not be .. and the basename valid unicode",
-            )
-        })?;
-
-    // Ingest the path into blob and directory service.
-    let root_node = tvix_castore::import::ingest_path(blob_service, &directory_service, &path)
-        .await
-        .expect("failed to ingest path");
-
-    debug!(root_node =?root_node, "import successful");
-
-    // Ask the PathInfoService for the NAR size and sha256
-    let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?;
-
-    // Calculate the output path. This might still fail, as some names are illegal.
-    let output_path = store_path::build_nar_based_store_path(&nar_sha256, name).map_err(|_| {
-        std::io::Error::new(
-            std::io::ErrorKind::InvalidData,
-            format!("invalid name: {}", name),
-        )
-    })?;
-
-    // assemble a new root_node with a name that is derived from the nar hash.
-    let root_node = root_node.rename(output_path.to_string().into_bytes().into());
-    log_node(&root_node, path.as_ref());
-
-    // assemble the [crate::proto::PathInfo] object.
-    let path_info = PathInfo {
-        node: Some(tvix_castore::proto::Node {
-            node: Some(root_node),
-        }),
-        // There's no reference scanning on path contents ingested like this.
-        references: vec![],
-        narinfo: Some(NarInfo {
-            nar_size,
-            nar_sha256: nar_sha256.to_vec().into(),
-            signatures: vec![],
-            reference_names: vec![],
-            deriver: None,
-            ca: Some(nar_info::Ca {
-                r#type: nar_info::ca::Hash::NarSha256.into(),
-                digest: nar_sha256.to_vec().into(),
-            }),
-        }),
-    };
-
-    // put into [PathInfoService], and return the PathInfo that we get back
-    // from there (it might contain additional signatures).
-    let _path_info = path_info_service.as_ref().put(path_info).await?;
-
-    Ok(output_path.to_owned())
-}
-
-fn log_node(node: &Node, path: &Path) {
-    match node {
-        Node::Directory(directory_node) => {
-            debug!(
-                path = ?path,
-                name = ?directory_node.name,
-                digest = BASE64.encode(&directory_node.digest),
-                "import successful",
-            )
-        }
-        Node::File(file_node) => {
-            debug!(
-                path = ?path,
-                name = ?file_node.name,
-                digest = BASE64.encode(&file_node.digest),
-                "import successful"
-            )
-        }
-        Node::Symlink(symlink_node) => {
-            debug!(
-                path = ?path,
-                name = ?symlink_node.name,
-                target = ?symlink_node.target,
-                "import successful"
-            )
-        }
-    }
-}