From 4c3ba46ba36b2fa2d9079fcc92ef27875f26418b Mon Sep 17 00:00:00 2001 From: Ryan Lahfa Date: Mon, 8 Jan 2024 09:50:13 +0100 Subject: refactor(tvix/store): `import_path` → `import_path_as_nar_ca` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add multiple additional helpers such as: - `path_to_name`: derive the basename of a given path - `derive_nar_ca_path_info`: derive the `PathInfo` for a content addressed NAR which isolates further the tree walking feature and the ingestion feature. Additionally, we don't `expect` anymore and propagate properly ingestion errors up. Change-Id: I60edb5b633911c58ade7e19f5002e6f75f90e262 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10574 Reviewed-by: flokli Tested-by: BuildkiteCI Autosubmit: raitobezarius --- tvix/glue/src/tvix_store_io.rs | 2 +- tvix/store/src/bin/tvix-store.rs | 2 +- tvix/store/src/import.rs | 156 +++++++++++++++++++++++++++++++++++++++ tvix/store/src/lib.rs | 1 + tvix/store/src/utils.rs | 118 +---------------------------- 5 files changed, 161 insertions(+), 118 deletions(-) create mode 100644 tvix/store/src/import.rs diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index c72b5d108504..45bcf0df3ae8 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -274,7 +274,7 @@ impl EvalIO for TvixStoreIO { #[instrument(skip(self), ret, err)] fn import_path(&self, path: &Path) -> io::Result { let output_path = self.tokio_handle.block_on(async { - tvix_store::utils::import_path( + tvix_store::import::import_path_as_nar_ca( path, &self.blob_service, &self.directory_service, diff --git a/tvix/store/src/bin/tvix-store.rs b/tvix/store/src/bin/tvix-store.rs index 1a5c379b89af..9b07117d71ff 100644 --- a/tvix/store/src/bin/tvix-store.rs +++ b/tvix/store/src/bin/tvix-store.rs @@ -305,7 +305,7 @@ async fn main() -> Result<(), Box> { let path_info_service = path_info_service.clone(); async move { - let resp = tvix_store::utils::import_path( + let resp = tvix_store::import::import_path_as_nar_ca( path, blob_service, directory_service, diff --git a/tvix/store/src/import.rs b/tvix/store/src/import.rs new file mode 100644 index 000000000000..84c73e15544c --- /dev/null +++ b/tvix/store/src/import.rs @@ -0,0 +1,156 @@ +use std::path::Path; + +use data_encoding::BASE64; +use tracing::{debug, instrument}; +use tvix_castore::{ + blobservice::BlobService, directoryservice::DirectoryService, proto::node::Node, +}; + +use nix_compat::store_path::{self, StorePath}; + +use crate::{ + pathinfoservice::PathInfoService, + proto::{nar_info, NarInfo, PathInfo}, +}; + +fn log_node(node: &Node, path: &Path) { + match node { + Node::Directory(directory_node) => { + debug!( + path = ?path, + name = ?directory_node.name, + digest = BASE64.encode(&directory_node.digest), + "import successful", + ) + } + Node::File(file_node) => { + debug!( + path = ?path, + name = ?file_node.name, + digest = BASE64.encode(&file_node.digest), + "import successful" + ) + } + Node::Symlink(symlink_node) => { + debug!( + path = ?path, + name = ?symlink_node.name, + target = ?symlink_node.target, + "import successful" + ) + } + } +} + +/// Transform a path into its base name and returns an [`std::io::Error`] if it is `..` or if the +/// basename is not valid unicode. +#[inline] +pub fn path_to_name(path: &Path) -> std::io::Result<&str> { + path.file_name() + .and_then(|file_name| file_name.to_str()) + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidInput, + "path must not be .. and the basename valid unicode", + ) + }) +} + +/// Takes the NAR size, SHA-256 of the NAR representation and the root node. +/// Returns the path information object for a content addressed NAR-style (recursive) object. +/// +/// This [`PathInfo`] can be further filled for signatures, deriver or verified for the expected +/// hashes. +#[inline] +pub fn derive_nar_ca_path_info(nar_size: u64, nar_sha256: [u8; 32], root_node: Node) -> PathInfo { + // assemble the [crate::proto::PathInfo] object. + PathInfo { + node: Some(tvix_castore::proto::Node { + node: Some(root_node), + }), + // There's no reference scanning on path contents ingested like this. + references: vec![], + narinfo: Some(NarInfo { + nar_size, + nar_sha256: nar_sha256.to_vec().into(), + signatures: vec![], + reference_names: vec![], + deriver: None, + ca: Some(nar_info::Ca { + r#type: nar_info::ca::Hash::NarSha256.into(), + digest: nar_sha256.to_vec().into(), + }), + }), + } +} + +/// Ingest the given path [`path`] and register the resulting output path in the +/// [`PathInfoService`] as a recursive fixed output NAR. +#[instrument(skip_all, fields(path=?path), err)] +pub async fn import_path_as_nar_ca( + path: P, + blob_service: BS, + directory_service: DS, + path_info_service: PS, +) -> Result +where + P: AsRef + std::fmt::Debug, + BS: AsRef + Clone, + DS: AsRef, + PS: AsRef, +{ + let root_node = + tvix_castore::import::ingest_path(blob_service, directory_service, &path).await?; + + // Ask the PathInfoService for the NAR size and sha256 + let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?; + + // Calculate the output path. This might still fail, as some names are illegal. + // FUTUREWORK: take `name` as a parameter here and enforce the validity of the name + // at the type level. + let name = path_to_name(path.as_ref())?; + let output_path = store_path::build_nar_based_store_path(&nar_sha256, name).map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("invalid name: {}", name), + ) + })?; + + // assemble a new root_node with a name that is derived from the nar hash. + let root_node = root_node.rename(output_path.to_string().into_bytes().into()); + log_node(&root_node, path.as_ref()); + + let path_info = derive_nar_ca_path_info(nar_size, nar_sha256, root_node); + + // This new [`PathInfo`] that we get back from there might contain additional signatures or + // information set by the service itself. In this function, we silently swallow it because + // callers doesn't really need it. + let _path_info = path_info_service.as_ref().put(path_info).await?; + + Ok(output_path.to_owned()) +} + +#[cfg(test)] +mod tests { + use std::{ffi::OsStr, path::PathBuf}; + + use crate::import::path_to_name; + use test_case::test_case; + + #[test_case("a/b/c", "c"; "simple path")] + #[test_case("a/b/../c", "c"; "simple path containing ..")] + #[test_case("a/b/../c/d/../e", "e"; "path containing multiple ..")] + + fn test_path_to_name(path: &str, expected_name: &str) { + let path: PathBuf = path.into(); + assert_eq!(path_to_name(&path).expect("must succeed"), expected_name); + } + + #[test_case(b"a/b/.."; "path ending in ..")] + #[test_case(b"\xf8\xa1\xa1\xa1\xa1"; "non unicode path")] + + fn test_invalid_path_to_name(invalid_path: &[u8]) { + let path: PathBuf = unsafe { OsStr::from_encoded_bytes_unchecked(invalid_path) }.into(); + path_to_name(&path).expect_err("must fail"); + } +} diff --git a/tvix/store/src/lib.rs b/tvix/store/src/lib.rs index 4bc7b5e02d57..2fa86ff6a468 100644 --- a/tvix/store/src/lib.rs +++ b/tvix/store/src/lib.rs @@ -1,3 +1,4 @@ +pub mod import; pub mod nar; pub mod pathinfoservice; pub mod proto; diff --git a/tvix/store/src/utils.rs b/tvix/store/src/utils.rs index e7e4b7c79fad..041a9e683d59 100644 --- a/tvix/store/src/utils.rs +++ b/tvix/store/src/utils.rs @@ -1,18 +1,11 @@ -use std::{path::Path, sync::Arc}; +use std::sync::Arc; -use data_encoding::BASE64; -use nix_compat::store_path::{self, StorePath}; -use tracing::{debug, instrument}; use tvix_castore::{ blobservice::{self, BlobService}, directoryservice::{self, DirectoryService}, - proto::node::Node, }; -use crate::{ - pathinfoservice::{self, PathInfoService}, - proto::{nar_info, NarInfo, PathInfo}, -}; +use crate::pathinfoservice::{self, PathInfoService}; /// Construct the three store handles from their addrs. pub async fn construct_services( @@ -40,110 +33,3 @@ pub async fn construct_services( Ok((blob_service, directory_service, path_info_service)) } - -/// Imports a given path on the filesystem into the store, and returns the -/// [PathInfo] describing the path, that was sent to -/// [PathInfoService]. -#[instrument(skip_all, fields(path=?path), err)] -pub async fn import_path( - path: P, - blob_service: BS, - directory_service: DS, - path_info_service: PS, -) -> Result -where - P: AsRef + std::fmt::Debug, - BS: AsRef + Clone, - DS: AsRef, - PS: AsRef, -{ - // calculate the name - // TODO: make a path_to_name helper function? - let name = path - .as_ref() - .file_name() - .and_then(|file_name| file_name.to_str()) - .ok_or_else(|| { - std::io::Error::new( - std::io::ErrorKind::InvalidInput, - "path must not be .. and the basename valid unicode", - ) - })?; - - // Ingest the path into blob and directory service. - let root_node = tvix_castore::import::ingest_path(blob_service, &directory_service, &path) - .await - .expect("failed to ingest path"); - - debug!(root_node =?root_node, "import successful"); - - // Ask the PathInfoService for the NAR size and sha256 - let (nar_size, nar_sha256) = path_info_service.as_ref().calculate_nar(&root_node).await?; - - // Calculate the output path. This might still fail, as some names are illegal. - let output_path = store_path::build_nar_based_store_path(&nar_sha256, name).map_err(|_| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("invalid name: {}", name), - ) - })?; - - // assemble a new root_node with a name that is derived from the nar hash. - let root_node = root_node.rename(output_path.to_string().into_bytes().into()); - log_node(&root_node, path.as_ref()); - - // assemble the [crate::proto::PathInfo] object. - let path_info = PathInfo { - node: Some(tvix_castore::proto::Node { - node: Some(root_node), - }), - // There's no reference scanning on path contents ingested like this. - references: vec![], - narinfo: Some(NarInfo { - nar_size, - nar_sha256: nar_sha256.to_vec().into(), - signatures: vec![], - reference_names: vec![], - deriver: None, - ca: Some(nar_info::Ca { - r#type: nar_info::ca::Hash::NarSha256.into(), - digest: nar_sha256.to_vec().into(), - }), - }), - }; - - // put into [PathInfoService], and return the PathInfo that we get back - // from there (it might contain additional signatures). - let _path_info = path_info_service.as_ref().put(path_info).await?; - - Ok(output_path.to_owned()) -} - -fn log_node(node: &Node, path: &Path) { - match node { - Node::Directory(directory_node) => { - debug!( - path = ?path, - name = ?directory_node.name, - digest = BASE64.encode(&directory_node.digest), - "import successful", - ) - } - Node::File(file_node) => { - debug!( - path = ?path, - name = ?file_node.name, - digest = BASE64.encode(&file_node.digest), - "import successful" - ) - } - Node::Symlink(symlink_node) => { - debug!( - path = ?path, - name = ?symlink_node.name, - target = ?symlink_node.target, - "import successful" - ) - } - } -} -- cgit 1.4.1