about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--tvix/docs/src/TODO.md9
-rw-r--r--tvix/glue/src/builtins/import.rs155
-rw-r--r--tvix/glue/src/tvix_store_io.rs22
3 files changed, 114 insertions, 72 deletions
diff --git a/tvix/docs/src/TODO.md b/tvix/docs/src/TODO.md
index 92d7c4cace81..127fb6f4d0c1 100644
--- a/tvix/docs/src/TODO.md
+++ b/tvix/docs/src/TODO.md
@@ -127,15 +127,6 @@ Some more fetcher-related builtins need work:
  - `fetchTree` (hairy, seems there's no proper spec and the URL syntax seems
    subject to change/underdocumented)
 
-### `builtins.path` roundtrip for flat
-`builtins.path` currently uses `filtered_ingest` also for the non-recursive
-case, then reads through the blob contents again to get the sha256.
-
-We should take care of assembling the root node on our own, and pipe the data
-through sha256 too (via `InspectReader`, see `glue/fetcher` for an example).
-
-This avoids some roundtrips, and is probably faster.
-
 ### Derivation -> Build
 While we have some support for `structuredAttrs` and `fetchClosure` (at least
 enough to calculate output hashes, aka produce identical ATerm), the code
diff --git a/tvix/glue/src/builtins/import.rs b/tvix/glue/src/builtins/import.rs
index 4a8a29b417df..9257975a09d6 100644
--- a/tvix/glue/src/builtins/import.rs
+++ b/tvix/glue/src/builtins/import.rs
@@ -109,15 +109,16 @@ mod import_builtins {
 
     use super::*;
 
+    use crate::tvix_store_io::TvixStoreIO;
     use nix_compat::nixhash::{CAHash, NixHash};
     use nix_compat::store_path::StorePath;
+    use sha2::Digest;
+    use tokio::io::AsyncWriteExt;
+    use tvix_castore::proto::node::Node;
+    use tvix_castore::proto::FileNode;
     use tvix_eval::generators::Gen;
     use tvix_eval::{generators::GenCo, ErrorKind, Value};
-    use tvix_eval::{NixContextElement, NixString};
-
-    use tvix_castore::B3Digest;
-
-    use crate::tvix_store_io::TvixStoreIO;
+    use tvix_eval::{FileType, NixContextElement, NixString};
 
     #[builtin("path")]
     async fn builtin_path(
@@ -167,54 +168,126 @@ mod import_builtins {
             })
             .transpose()?;
 
-        // FUTUREWORK(performance): this opens the file instead of using a stat-like
-        // system call to the file.
-        if !recursive_ingestion && state.open(path.as_ref()).is_err() {
-            Err(ImportError::FlatImportOfNonFile(
-                path.to_string_lossy().to_string(),
-            ))?;
-        }
+        // Check if the path points to a regular file.
+        // If it does, the filter function is never executed.
+        // TODO: follow symlinks and check their type instead
+        let (root_node, ca_hash) = match state.file_type(path.as_ref())? {
+            FileType::Regular => {
+                let mut file = state.open(path.as_ref())?;
+                // This is a single file, copy it to the blobservice directly.
+                let mut hash = sha2::Sha256::new();
+                let mut blob_size = 0;
+                let mut blob_writer = state
+                    .tokio_handle
+                    .block_on(async { state.blob_service.open_write().await });
+
+                let mut buf = [0u8; 4096];
 
-        let root_node = filtered_ingest(state.clone(), co, path.as_ref(), filter).await?;
-        let ca: CAHash = if recursive_ingestion {
-            CAHash::Nar(NixHash::Sha256(state.tokio_handle.block_on(async {
-                Ok::<_, tvix_eval::ErrorKind>(
+                loop {
+                    // read bytes into buffer, break out if EOF
+                    let len = file.read(&mut buf)?;
+                    if len == 0 {
+                        break;
+                    }
+                    blob_size += len as u64;
+
+                    let data = &buf[0..len];
+
+                    // add to blobwriter
                     state
-                        .nar_calculation_service
-                        .as_ref()
-                        .calculate_nar(&root_node)
-                        .await
-                        .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?
-                        .1,
-                )
-            })?))
-        } else {
-            let digest: B3Digest = match root_node {
-                tvix_castore::proto::node::Node::File(ref fnode) => {
-                    // It's already validated.
-                    fnode.digest.clone().try_into().unwrap()
+                        .tokio_handle
+                        .block_on(async { blob_writer.write_all(data).await })?;
+
+                    // update the sha256 hash function. We can skip that if we're not using it.
+                    if !recursive_ingestion {
+                        hash.update(data);
+                    }
                 }
-                // We cannot hash anything else than file in flat import mode.
-                _ => {
+
+                // close the blob writer, get back the b3 digest.
+                let blob_digest = state
+                    .tokio_handle
+                    .block_on(async { blob_writer.close().await })?;
+
+                let root_node = Node::File(FileNode {
+                    // The name gets set further down, while constructing the PathInfo.
+                    name: "".into(),
+                    digest: blob_digest.into(),
+                    size: blob_size,
+                    executable: false,
+                });
+
+                let ca_hash = if recursive_ingestion {
+                    let (_nar_size, nar_sha256) = state
+                        .tokio_handle
+                        .block_on(async {
+                            state
+                                .nar_calculation_service
+                                .as_ref()
+                                .calculate_nar(&root_node)
+                                .await
+                        })
+                        .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?;
+                    CAHash::Nar(NixHash::Sha256(nar_sha256))
+                } else {
+                    CAHash::Flat(NixHash::Sha256(hash.finalize().into()))
+                };
+
+                (root_node, ca_hash)
+            }
+
+            FileType::Directory => {
+                if !recursive_ingestion {
                     return Err(ImportError::FlatImportOfNonFile(
                         path.to_string_lossy().to_string(),
-                    )
-                    .into())
+                    ))?;
                 }
-            };
 
-            // FUTUREWORK: avoid hashing again.
-            CAHash::Flat(NixHash::Sha256(
-                state
+                // do the filtered ingest
+                let root_node = filtered_ingest(state.clone(), co, path.as_ref(), filter).await?;
+
+                // calculate the NAR sha256
+                let (_nar_size, nar_sha256) = state
                     .tokio_handle
-                    .block_on(async { state.blob_to_sha256_hash(digest).await })?,
-            ))
+                    .block_on(async {
+                        state
+                            .nar_calculation_service
+                            .as_ref()
+                            .calculate_nar(&root_node)
+                            .await
+                    })
+                    .map_err(|e| tvix_eval::ErrorKind::TvixError(Rc::new(e)))?;
+
+                let ca_hash = CAHash::Nar(NixHash::Sha256(nar_sha256));
+
+                (root_node, ca_hash)
+            }
+            FileType::Symlink => {
+                // FUTUREWORK: Nix follows a symlink if it's at the root,
+                // except if it's not resolve-able (NixOS/nix#7761).i
+                return Err(tvix_eval::ErrorKind::IO {
+                    path: Some(path.to_path_buf()),
+                    error: Rc::new(std::io::Error::new(
+                        std::io::ErrorKind::Unsupported,
+                        "builtins.path pointing to a symlink is ill-defined.",
+                    )),
+                });
+            }
+            FileType::Unknown => {
+                return Err(tvix_eval::ErrorKind::IO {
+                    path: Some(path.to_path_buf()),
+                    error: Rc::new(std::io::Error::new(
+                        std::io::ErrorKind::Unsupported,
+                        "unsupported file type",
+                    )),
+                })
+            }
         };
 
-        let obtained_hash = ca.hash().clone().into_owned();
+        let obtained_hash = ca_hash.hash().clone().into_owned();
         let (path_info, _hash, output_path) = state.tokio_handle.block_on(async {
             state
-                .node_to_path_info(name.as_ref(), path.as_ref(), ca, root_node)
+                .node_to_path_info(name.as_ref(), path.as_ref(), ca_hash, root_node)
                 .await
         })?;
 
diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs
index dd034d74bf3e..b0367f60aac7 100644
--- a/tvix/glue/src/tvix_store_io.rs
+++ b/tvix/glue/src/tvix_store_io.rs
@@ -4,7 +4,6 @@ use futures::{StreamExt, TryStreamExt};
 use nix_compat::nixhash::NixHash;
 use nix_compat::store_path::StorePathRef;
 use nix_compat::{nixhash::CAHash, store_path::StorePath};
-use sha2::{Digest, Sha256};
 use std::{
     cell::RefCell,
     collections::BTreeSet,
@@ -19,7 +18,6 @@ use tvix_build::buildservice::BuildService;
 use tvix_castore::proto::node::Node;
 use tvix_eval::{EvalIO, FileType, StdIO};
 use tvix_store::nar::NarCalculationService;
-use tvix_store::utils::AsyncIoBridge;
 
 use tvix_castore::{
     blobservice::BlobService,
@@ -410,26 +408,6 @@ impl TvixStoreIO {
         Ok(output_path)
     }
 
-    /// Transforms a BLAKE-3 digest into a SHA256 digest
-    /// by re-hashing the whole file.
-    pub(crate) async fn blob_to_sha256_hash(&self, blob_digest: B3Digest) -> io::Result<[u8; 32]> {
-        let mut reader = self
-            .blob_service
-            .open_read(&blob_digest)
-            .await?
-            .ok_or_else(|| {
-                io::Error::new(
-                    io::ErrorKind::NotFound,
-                    format!("blob represented by digest: '{}' not found", blob_digest),
-                )
-            })?;
-        // It is fine to use `AsyncIoBridge` here because hashing is not actually I/O.
-        let mut hasher = AsyncIoBridge(Sha256::new());
-
-        tokio::io::copy(&mut reader, &mut hasher).await?;
-        Ok(hasher.0.finalize().into())
-    }
-
     pub async fn store_path_exists<'a>(&'a self, store_path: StorePathRef<'a>) -> io::Result<bool> {
         Ok(self
             .path_info_service