about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-06-25T16·40+0300
committerflokli <flokli@flokli.de>2024-06-26T04·51+0000
commit78eb22c54d304715f527fa0e350f8056a3a354cf (patch)
tree89d227ec0918e567576b67159a99a17a6a679fff
parent080654aaf9bd2f94d634008afd1dc26c74752eec (diff)
feat(tvix/glue): handle regular file at builtins.path import r/8305
If builtins.path is passed a regular file, no filtering is applied.
We use the just-introduced file_type function in the EvalIO trait for
that.

This means, we don't need to pass through filtered_ingest, and can
assemble the FileNode directly in that specific match case.

This also means, we can explicitly calculate the sha256 flat digest,
and avoid having to pipe through the file contents again (via
blob_to_sha256_hash) to construct the sha256 digest.

Change-Id: I500b19dd9e4b7cc897d88b44547e7851559e5a4e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11872
Tested-by: BuildkiteCI
Reviewed-by: Ilan Joselevich <personal@ilanjoselevich.com>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
-rw-r--r--tvix/docs/src/TODO.md9
-rw-r--r--tvix/glue/src/builtins/import.rs155
-rw-r--r--tvix/glue/src/tvix_store_io.rs22
3 files changed, 114 insertions, 72 deletions
diff --git a/tvix/docs/src/TODO.md b/tvix/docs/src/TODO.md
index 92d7c4cace81..127fb6f4d0c1 100644
--- a/tvix/docs/src/TODO.md
+++ b/tvix/docs/src/TODO.md
@@ -127,15 +127,6 @@ Some more fetcher-related builtins need work:
  - `fetchTree` (hairy, seems there's no proper spec and the URL syntax seems
    subject to change/underdocumented)
 
-### `builtins.path` roundtrip for flat
-`builtins.path` currently uses `filtered_ingest` also for the non-recursive
-case, then reads through the blob contents again to get the sha256.
-
-We should take care of assembling the root node on our own, and pipe the data
-through sha256 too (via `InspectReader`, see `glue/fetcher` for an example).
-
-This avoids some roundtrips, and is probably faster.
-
 ### Derivation -> Build
 While we have some support for `structuredAttrs` and `fetchClosure` (at least
 enough to calculate output hashes, aka produce identical ATerm), the code
diff --git a/tvix/glue/src/builtins/import.rs b/tvix/glue/src/builtins/import.rs
index 4a8a29b417df..9257975a09d6 100644
--- a/tvix/glue/src/builtins/import.rs
+++ b/tvix/glue/src/builtins/import.rs
@@ -109,15 +109,16 @@ mod import_builtins {
 
     use super::*;
 
+    use crate::tvix_store_io::TvixStoreIO;
     use nix_compat::nixhash::{CAHash, NixHash};
     use nix_compat::store_path::StorePath;
+    use sha2::Digest;
+    use tokio::io::AsyncWriteExt;
+    use tvix_castore::proto::node::Node;
+    use tvix_castore::proto::FileNode;
     use tvix_eval::generators::Gen;
     use tvix_eval::{generators::GenCo, ErrorKind, Value};
-    use tvix_eval::{NixContextElement, NixString};
-
-    use tvix_castore::B3Digest;
-
-    use crate::tvix_store_io::TvixStoreIO;
+    use tvix_eval::{FileType, NixContextElement, NixString};
 
     #[builtin("path")]
     async fn builtin_path(
@@ -167,54 +168,126 @@ mod import_builtins {
             })
             .transpose()?;
 
-        // FUTUREWORK(performance): this opens the file instead of using a stat-like
-        // system call to the file.
-        if !recursive_ingestion && state.open(path.as_ref()).is_err() {
-            Err(ImportError::FlatImportOfNonFile(
-                path.to_string_lossy().to_string(),
-            ))?;
-        }
+        // Check if the path points to a regular file.
+        // If it does, the filter function is never executed.
+        // TODO: follow symlinks and check their type instead
+        let (root_node, ca_hash) = match state.file_type(path.as_ref())? {
+            FileType::Regular => {
+                let mut file = state.open(path.as_ref())?;
+                // This is a single file, copy it to the blobservice directly.
+                let mut hash = sha2::Sha256::new();
+                let mut blob_size = 0;
+                let mut blob_writer = state
+                    .tokio_handle
+                    .block_on(async { state.blob_service.open_write().await });
+
+                let mut buf = [0u8; 4096];
 
-        let root_node = filtered_ingest(state.clone(), co, path.as_ref(), filter).await?;
-        let ca: CAHash = if recursive_ingestion {
-            CAHash::Nar(NixHash::Sha256(state.tokio_handle.block_on(async {
-                Ok::<_, tvix_eval::ErrorKind>(
+                loop {
+                    // read bytes into buffer, break out if EOF
+                    let len = file.read(&mut buf)?;
+                    if len == 0 {
+                        break;
+                    }
+                    blob_size += len as u64;
+
+                    let data = &buf[0..len];
+
+                    // add to blobwriter
                     state
-                        .nar_calculation_service
-                        .as_ref()
-                        .calculate_nar(&root_node)
-                        .await
-                        .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?
-                        .1,
-                )
-            })?))
-        } else {
-            let digest: B3Digest = match root_node {
-                tvix_castore::proto::node::Node::File(ref fnode) => {
-                    // It's already validated.
-                    fnode.digest.clone().try_into().unwrap()
+                        .tokio_handle
+                        .block_on(async { blob_writer.write_all(data).await })?;
+
+                    // update the sha256 hash function. We can skip that if we're not using it.
+                    if !recursive_ingestion {
+                        hash.update(data);
+                    }
                 }
-                // We cannot hash anything else than file in flat import mode.
-                _ => {
+
+                // close the blob writer, get back the b3 digest.
+                let blob_digest = state
+                    .tokio_handle
+                    .block_on(async { blob_writer.close().await })?;
+
+                let root_node = Node::File(FileNode {
+                    // The name gets set further down, while constructing the PathInfo.
+                    name: "".into(),
+                    digest: blob_digest.into(),
+                    size: blob_size,
+                    executable: false,
+                });
+
+                let ca_hash = if recursive_ingestion {
+                    let (_nar_size, nar_sha256) = state
+                        .tokio_handle
+                        .block_on(async {
+                            state
+                                .nar_calculation_service
+                                .as_ref()
+                                .calculate_nar(&root_node)
+                                .await
+                        })
+                        .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?;
+                    CAHash::Nar(NixHash::Sha256(nar_sha256))
+                } else {
+                    CAHash::Flat(NixHash::Sha256(hash.finalize().into()))
+                };
+
+                (root_node, ca_hash)
+            }
+
+            FileType::Directory => {
+                if !recursive_ingestion {
                     return Err(ImportError::FlatImportOfNonFile(
                         path.to_string_lossy().to_string(),
-                    )
-                    .into())
+                    ))?;
                 }
-            };
 
-            // FUTUREWORK: avoid hashing again.
-            CAHash::Flat(NixHash::Sha256(
-                state
+                // do the filtered ingest
+                let root_node = filtered_ingest(state.clone(), co, path.as_ref(), filter).await?;
+
+                // calculate the NAR sha256
+                let (_nar_size, nar_sha256) = state
                     .tokio_handle
-                    .block_on(async { state.blob_to_sha256_hash(digest).await })?,
-            ))
+                    .block_on(async {
+                        state
+                            .nar_calculation_service
+                            .as_ref()
+                            .calculate_nar(&root_node)
+                            .await
+                    })
+                    .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?;
+
+                let ca_hash = CAHash::Nar(NixHash::Sha256(nar_sha256));
+
+                (root_node, ca_hash)
+            }
+            FileType::Symlink => {
+                // FUTUREWORK: Nix follows a symlink if it's at the root,
+                // except if it's not resolve-able (NixOS/nix#7761).i
+                return Err(tvix_eval::ErrorKind::IO {
+                    path: Some(path.to_path_buf()),
+                    error: Rc::new(std::io::Error::new(
+                        std::io::ErrorKind::Unsupported,
+                        "builtins.path pointing to a symlink is ill-defined.",
+                    )),
+                });
+            }
+            FileType::Unknown => {
+                return Err(tvix_eval::ErrorKind::IO {
+                    path: Some(path.to_path_buf()),
+                    error: Rc::new(std::io::Error::new(
+                        std::io::ErrorKind::Unsupported,
+                        "unsupported file type",
+                    )),
+                })
+            }
         };
 
-        let obtained_hash = ca.hash().clone().into_owned();
+        let obtained_hash = ca_hash.hash().clone().into_owned();
         let (path_info, _hash, output_path) = state.tokio_handle.block_on(async {
             state
-                .node_to_path_info(name.as_ref(), path.as_ref(), ca, root_node)
+                .node_to_path_info(name.as_ref(), path.as_ref(), ca_hash, root_node)
                 .await
         })?;
 
diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs
index dd034d74bf3e..b0367f60aac7 100644
--- a/tvix/glue/src/tvix_store_io.rs
+++ b/tvix/glue/src/tvix_store_io.rs
@@ -4,7 +4,6 @@ use futures::{StreamExt, TryStreamExt};
 use nix_compat::nixhash::NixHash;
 use nix_compat::store_path::StorePathRef;
 use nix_compat::{nixhash::CAHash, store_path::StorePath};
-use sha2::{Digest, Sha256};
 use std::{
     cell::RefCell,
     collections::BTreeSet,
@@ -19,7 +18,6 @@ use tvix_build::buildservice::BuildService;
 use tvix_castore::proto::node::Node;
 use tvix_eval::{EvalIO, FileType, StdIO};
 use tvix_store::nar::NarCalculationService;
-use tvix_store::utils::AsyncIoBridge;
 
 use tvix_castore::{
     blobservice::BlobService,
@@ -410,26 +408,6 @@ impl TvixStoreIO {
         Ok(output_path)
     }
 
-    /// Transforms a BLAKE-3 digest into a SHA256 digest
-    /// by re-hashing the whole file.
-    pub(crate) async fn blob_to_sha256_hash(&self, blob_digest: B3Digest) -> io::Result<[u8; 32]> {
-        let mut reader = self
-            .blob_service
-            .open_read(&blob_digest)
-            .await?
-            .ok_or_else(|| {
-                io::Error::new(
-                    io::ErrorKind::NotFound,
-                    format!("blob represented by digest: '{}' not found", blob_digest),
-                )
-            })?;
-        // It is fine to use `AsyncIoBridge` here because hashing is not actually I/O.
-        let mut hasher = AsyncIoBridge(Sha256::new());
-
-        tokio::io::copy(&mut reader, &mut hasher).await?;
-        Ok(hasher.0.finalize().into())
-    }
-
     pub async fn store_path_exists<'a>(&'a self, store_path: StorePathRef<'a>) -> io::Result<bool> {
         Ok(self
             .path_info_service