diff options
author | Florian Klink <flokli@flokli.de> | 2024-10-15T13·11+0300 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-10-15T23·26+0000 |
commit | ca1e628c8573fe4f054af510918b81a378d7eb9a (patch) | |
tree | 74ffa5ada0387ddaff4b5cfb88dfbfcea485cacb /tvix/glue | |
parent | baebe29bab5d02607b7811dbc2542f708aee2665 (diff) |
refactor(tvix/glue/builtins/import): refactor r/8811
This removes all the intermediate helper functions and reorganizes the import code to only do the calculations where/when needed, and hopefully makes things easier to understand as well. Change-Id: I7e4c89c742bf8569b45e303523f7f801da7127ea Reviewed-on: https://cl.tvl.fyi/c/depot/+/12627 Autosubmit: flokli <flokli@flokli.de> Tested-by: BuildkiteCI Reviewed-by: Jörg Thalheim <joerg@thalheim.io> Reviewed-by: edef <edef@edef.eu>
Diffstat (limited to 'tvix/glue')
-rw-r--r-- | tvix/glue/src/builtins/errors.rs | 4 | ||||
-rw-r--r-- | tvix/glue/src/builtins/import.rs | 276 | ||||
-rw-r--r-- | tvix/glue/src/tvix_store_io.rs | 56 |
3 files changed, 175 insertions, 161 deletions
diff --git a/tvix/glue/src/builtins/errors.rs b/tvix/glue/src/builtins/errors.rs index d41c7d8e4998..ec85942bb1ee 100644 --- a/tvix/glue/src/builtins/errors.rs +++ b/tvix/glue/src/builtins/errors.rs @@ -64,10 +64,10 @@ pub enum FetcherError { #[derive(Debug, Error)] pub enum ImportError { #[error("non-file '{0}' cannot be imported in 'flat' mode")] - FlatImportOfNonFile(String), + FlatImportOfNonFile(PathBuf), #[error("hash mismatch at ingestion of '{0}', expected: '{1}', got: '{2}'")] - HashMismatch(String, NixHash, NixHash), + HashMismatch(PathBuf, NixHash, NixHash), #[error("path '{}' is not absolute or invalid", .0.display())] PathNotAbsoluteOrInvalid(PathBuf), diff --git a/tvix/glue/src/builtins/import.rs b/tvix/glue/src/builtins/import.rs index d2e2e6c5bd7d..db60f44de183 100644 --- a/tvix/glue/src/builtins/import.rs +++ b/tvix/glue/src/builtins/import.rs @@ -112,7 +112,7 @@ mod import_builtins { use crate::tvix_store_io::TvixStoreIO; use bstr::ByteSlice; use nix_compat::nixhash::{CAHash, NixHash}; - use nix_compat::store_path::StorePathRef; + use nix_compat::store_path::{build_ca_path, StorePathRef}; use sha2::Digest; use std::rc::Rc; use tokio::io::AsyncWriteExt; @@ -120,6 +120,7 @@ mod import_builtins { use tvix_eval::generators::Gen; use tvix_eval::{generators::GenCo, ErrorKind, Value}; use tvix_eval::{FileType, NixContextElement, NixString}; + use tvix_store::path_info::PathInfo; #[builtin("path")] async fn builtin_path( @@ -147,124 +148,128 @@ mod import_builtins { .expect("Failed to derive the default name out of the path") .to_string() }; + let filter = args.select("filter"); + + // Construct a sha256 hasher, which is needed for flat ingestion. let recursive_ingestion = args .select("recursive") .map(|r| r.as_bool()) .transpose()? .unwrap_or(true); // Yes, yes, Nix, by default, puts `recursive = true;`. + let expected_sha256 = args .select("sha256") .map(|h| { h.to_str().and_then(|expected| { - let expected = expected.into_bstring().to_string(); - // TODO: ensure that we fail if this is not a valid str. - nix_compat::nixhash::from_str(&expected, None).map_err(|_err| { - // TODO: a better error would be nice, we use - // DerivationError::InvalidOutputHash usually for derivation construction. - // This is not a derivation construction, should we move it outside and - // generalize? - ErrorKind::TypeError { - expected: "sha256", - actual: "not a sha256", + match nix_compat::nixhash::from_str( + expected.into_bstring().to_str()?, + Some("sha256"), + ) { + Ok(NixHash::Sha256(digest)) => Ok(digest), + Ok(_) => unreachable!(), + Err(_e) => { + // TODO: a better error would be nice, we use + // DerivationError::InvalidOutputHash usually for derivation construction. + // This is not a derivation construction, should we move it outside and + // generalize? + Err(ErrorKind::TypeError { + expected: "sha256", + actual: "not a sha256", + }) } - }) + } }) }) .transpose()?; - // Check if the path points to a regular file. - // If it does, the filter function is never executed. - // TODO: follow symlinks and check their type instead - let (root_node, ca_hash) = match state.file_type(path.as_ref())? { + // As a first step, we ingest the contents, and get back a root node, + // and optionally the sha256 a flat file. + let (root_node, ca) = match state.file_type(path.as_ref())? { + // Check if the path points to a regular file. + // If it does, the filter function is never executed, and we copy to the blobservice directly. + // If recursive is false, we need to calculate the sha256 digest of the raw contents, + // as that affects the output path calculation. FileType::Regular => { let mut file = state.open(path.as_ref())?; - // This is a single file, copy it to the blobservice directly. - let mut hash = sha2::Sha256::new(); + + let mut flat_sha256 = (!recursive_ingestion).then(sha2::Sha256::new); let mut blob_size = 0; + let mut blob_writer = state .tokio_handle .block_on(async { state.blob_service.open_write().await }); - let mut buf = [0u8; 4096]; + // read piece by piece and write to blob_writer. + // This is a bit manual due to EvalIO being sync, while everything else async. + { + let mut buf = [0u8; 4096]; - loop { - // read bytes into buffer, break out if EOF - let len = file.read(&mut buf)?; - if len == 0 { - break; - } - blob_size += len as u64; + loop { + // read bytes into buffer, break out if EOF + let len = file.read(&mut buf)?; + if len == 0 { + break; + } + blob_size += len as u64; - let data = &buf[0..len]; + let data = &buf[0..len]; - // add to blobwriter - state - .tokio_handle - .block_on(async { blob_writer.write_all(data).await })?; + // add to blobwriter + state + .tokio_handle + .block_on(async { blob_writer.write_all(data).await })?; - // update the sha256 hash function. We can skip that if we're not using it. - if !recursive_ingestion { - hash.update(data); + // update blob_sha256 if needed. + if let Some(h) = flat_sha256.as_mut() { + h.update(data) + } } } - // close the blob writer, get back the b3 digest. - let blob_digest = state - .tokio_handle - .block_on(async { blob_writer.close().await })?; - - let root_node = Node::File { - digest: blob_digest, - size: blob_size, - executable: false, - }; - - let ca_hash = if recursive_ingestion { - let (_nar_size, nar_sha256) = state - .tokio_handle - .block_on(async { - state - .nar_calculation_service - .as_ref() - .calculate_nar(&root_node) - .await - }) - .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; - CAHash::Nar(NixHash::Sha256(nar_sha256)) - } else { - CAHash::Flat(NixHash::Sha256(hash.finalize().into())) - }; - - (root_node, ca_hash) + // close the blob writer, construct the root node and the blob_sha256 (later used for output path calculation) + ( + Node::File { + digest: state + .tokio_handle + .block_on(async { blob_writer.close().await })?, + size: blob_size, + executable: false, + }, + { + // If non-recursive ingestion is requested… + if let Some(flat_sha256) = flat_sha256 { + let actual_sha256 = flat_sha256.finalize().into(); + + // compare the recorded flat hash with an upfront one if provided. + if let Some(expected_sha256) = expected_sha256 { + if actual_sha256 != expected_sha256 { + return Err(ImportError::HashMismatch( + path, + NixHash::Sha256(expected_sha256), + NixHash::Sha256(actual_sha256), + ) + .into()); + } + } + + Some(CAHash::Flat(NixHash::Sha256(actual_sha256))) + } else { + None + } + }, + ) } - FileType::Directory => { - if !recursive_ingestion { - return Err(ImportError::FlatImportOfNonFile( - path.to_string_lossy().to_string(), - ))?; - } - - // do the filtered ingest - let root_node = filtered_ingest(state.clone(), co, path.as_ref(), filter).await?; - - // calculate the NAR sha256 - let (_nar_size, nar_sha256) = state - .tokio_handle - .block_on(async { - state - .nar_calculation_service - .as_ref() - .calculate_nar(&root_node) - .await - }) - .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; - - let ca_hash = CAHash::Nar(NixHash::Sha256(nar_sha256)); - - (root_node, ca_hash) + FileType::Directory if !recursive_ingestion => { + return Err(ImportError::FlatImportOfNonFile(path))? } + + // do the filtered ingest + FileType::Directory => ( + filtered_ingest(state.clone(), co, path.as_ref(), filter).await?, + None, + ), FileType::Symlink => { // FUTUREWORK: Nix follows a symlink if it's at the root, // except if it's not resolve-able (NixOS/nix#7761).i @@ -287,26 +292,63 @@ mod import_builtins { } }; - if let Some(expected_sha256) = expected_sha256 { - if *ca_hash.hash() != expected_sha256 { - Err(ImportError::HashMismatch( - path.to_string_lossy().to_string(), - expected_sha256, - ca_hash.hash().into_owned(), - ))?; + // Calculate the NAR sha256. + let (nar_size, nar_sha256) = state + .tokio_handle + .block_on(async { + state + .nar_calculation_service + .as_ref() + .calculate_nar(&root_node) + .await + }) + .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; + + // Calculate the CA hash for the recursive cases, this is only already + // `Some(_)` for flat ingestion. + let ca = match ca { + None => { + // If an upfront-expected NAR hash was specified, compare. + if let Some(expected_nar_sha256) = expected_sha256 { + if expected_nar_sha256 != nar_sha256 { + return Err(ImportError::HashMismatch( + path, + NixHash::Sha256(expected_nar_sha256), + NixHash::Sha256(nar_sha256), + ) + .into()); + } + } + CAHash::Nar(NixHash::Sha256(nar_sha256)) } - } + Some(ca) => ca, + }; + + let store_path = build_ca_path(&name, &ca, Vec::<&str>::new(), false) + .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?; let path_info = state .tokio_handle .block_on(async { state - .register_in_path_info_service(name.as_ref(), path.as_ref(), ca_hash, root_node) + .path_info_service + .as_ref() + .put(PathInfo { + store_path, + node: root_node, + // There's no reference scanning on path contents ingested like this. + references: vec![], + nar_size, + nar_sha256, + signatures: vec![], + deriver: None, + ca: Some(ca), + }) .await }) .map_err(|e| tvix_eval::ErrorKind::IO { path: Some(path.to_path_buf()), - error: Rc::new(e), + error: Rc::new(e.into()), })?; // We need to attach context to the final output path. @@ -332,24 +374,46 @@ mod import_builtins { let path_info = state .tokio_handle .block_on(async { - let (_, nar_sha256) = state + // Ask the PathInfoService for the NAR size and sha256 + // We always need it no matter what is the actual hash mode + // because the [PathInfo] needs to contain nar_{sha256,size}. + let (nar_size, nar_sha256) = state .nar_calculation_service .as_ref() .calculate_nar(&root_node) .await?; + let ca = CAHash::Nar(NixHash::Sha256(nar_sha256)); + + // Calculate the output path. This might still fail, as some names are illegal. + let output_path = + nix_compat::store_path::build_ca_path(name, &ca, Vec::<&str>::new(), false) + .map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + format!("invalid name: {}", name), + ) + })?; + state - .register_in_path_info_service( - name, - &p, - CAHash::Nar(NixHash::Sha256(nar_sha256)), - root_node, - ) + .path_info_service + .as_ref() + .put(PathInfo { + store_path: output_path, + node: root_node, + // There's no reference scanning on path contents ingested like this. + references: vec![], + nar_size, + nar_sha256, + signatures: vec![], + deriver: None, + ca: Some(ca), + }) .await }) - .map_err(|err| ErrorKind::IO { + .map_err(|e| ErrorKind::IO { path: Some(p.to_path_buf()), - error: err.into(), + error: Rc::new(e.into()), })?; // We need to attach context to the final output path. diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index b5877566d524..839d2ae85845 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -384,56 +384,6 @@ impl TvixStoreIO { .await .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e)) } - - async fn node_to_path_info<'a>( - &self, - name: &'a str, - path: &Path, - ca: CAHash, - root_node: Node, - ) -> io::Result<PathInfo> { - // Ask the PathInfoService for the NAR size and sha256 - // We always need it no matter what is the actual hash mode - // because the [PathInfo] needs to contain nar_{sha256,size}. - let (nar_size, nar_sha256) = self - .nar_calculation_service - .as_ref() - .calculate_nar(&root_node) - .await?; - - // Calculate the output path. This might still fail, as some names are illegal. - let output_path = - nix_compat::store_path::build_ca_path(name, &ca, Vec::<&str>::new(), false).map_err( - |_| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - format!("invalid name: {}", name), - ) - }, - )?; - - tvix_store::import::log_node(name.as_bytes(), &root_node, path); - - // construct a PathInfo - Ok(tvix_store::import::derive_nar_ca_path_info( - nar_size, - nar_sha256, - Some(ca), - output_path, - root_node, - )) - } - - pub(crate) async fn register_in_path_info_service<'a>( - &self, - name: &'a str, - path: &Path, - ca: CAHash, - root_node: Node, - ) -> io::Result<PathInfo> { - let path_info = self.node_to_path_info(name, path, ca, root_node).await?; - Ok(self.path_info_service.as_ref().put(path_info).await?) - } } impl EvalIO for TvixStoreIO { @@ -589,7 +539,7 @@ impl EvalIO for TvixStoreIO { #[instrument(skip(self), ret(level = Level::TRACE), err)] fn import_path(&self, path: &Path) -> io::Result<PathBuf> { - let output_path = self.tokio_handle.block_on(async { + let path_info = self.tokio_handle.block_on({ tvix_store::import::import_path_as_nar_ca( path, tvix_store::import::path_to_name(path)?, @@ -598,10 +548,10 @@ impl EvalIO for TvixStoreIO { &self.path_info_service, &self.nar_calculation_service, ) - .await })?; - Ok(output_path.to_absolute_path().into()) + // From the returned PathInfo, extract the store path and return it. + Ok(path_info.store_path.to_absolute_path().into()) } #[instrument(skip(self), ret(level = Level::TRACE))] |