about summary refs log tree commit diff
path: root/tvix/glue/src/tvix_store_io.rs
diff options
context:
space:
mode:
authorConnor Brewster <cbrewster@hey.com>2024-03-22T23·52-0500
committerConnor Brewster <cbrewster@hey.com>2024-04-09T17·31+0000
commit63116d8c21afdc50725ae93d13839fe1915b06b7 (patch)
tree4997838251dac809c2917b35e5d32224030ba595 /tvix/glue/src/tvix_store_io.rs
parent17849c5c0033fa1909f0403b5d5e6a5e018b7fee (diff)
fix(tvix): Avoid buffering file into memory in builtins.hashFile r/7882
Right now `builtins.hashFile` always reads the entire file into memory
before hashing, which is not ideal for large files. This replaces
`read_to_string` with `open_file` which allows calculating the hash of
the file without buffering it entirely into memory. Other callers can
continue to buffer into memory if they choose, but they still use the
`open_file` VM request and then call `read_to_string` or `read_to_end`
on the `std::io::Reader`.

Fixes b/380

Change-Id: Ifa1c8324bcee8f751604b0b449feab875c632fda
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11236
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/glue/src/tvix_store_io.rs')
-rw-r--r--tvix/glue/src/tvix_store_io.rs45
1 files changed, 21 insertions, 24 deletions
diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs
index 7b675bfc7d..10a5902785 100644
--- a/tvix/glue/src/tvix_store_io.rs
+++ b/tvix/glue/src/tvix_store_io.rs
@@ -17,7 +17,7 @@ use std::{
     path::{Path, PathBuf},
     sync::Arc,
 };
-use tokio::io::AsyncReadExt;
+use tokio_util::io::SyncIoBridge;
 use tracing::{error, instrument, warn, Level};
 use tvix_build::buildservice::BuildService;
 use tvix_eval::{ErrorKind, EvalIO, FileType, StdIO};
@@ -478,7 +478,7 @@ impl EvalIO for TvixStoreIO {
     }
 
     #[instrument(skip(self), err)]
-    fn read_to_end(&self, path: &Path) -> io::Result<Vec<u8>> {
+    fn open(&self, path: &Path) -> io::Result<Box<dyn io::Read>> {
         if let Ok((store_path, sub_path)) =
             StorePath::from_absolute_path_full(&path.to_string_lossy())
         {
@@ -509,27 +509,24 @@ impl EvalIO for TvixStoreIO {
                             })?;
 
                         self.tokio_handle.block_on(async {
-                            let mut reader = {
-                                let resp = self.blob_service.as_ref().open_read(&digest).await?;
-                                match resp {
-                                    Some(blob_reader) => blob_reader,
-                                    None => {
-                                        error!(
-                                            blob.digest = %digest,
-                                            "blob not found",
-                                        );
-                                        Err(io::Error::new(
-                                            io::ErrorKind::NotFound,
-                                            format!("blob {} not found", &digest),
-                                        ))?
-                                    }
+                            let resp = self.blob_service.as_ref().open_read(&digest).await?;
+                            match resp {
+                                Some(blob_reader) => {
+                                    // The VM Response needs a sync [std::io::Reader].
+                                    Ok(Box::new(SyncIoBridge::new(blob_reader))
+                                        as Box<dyn io::Read>)
                                 }
-                            };
-
-                            let mut buf = Vec::new();
-
-                            reader.read_to_end(&mut buf).await?;
-                            Ok(buf)
+                                None => {
+                                    error!(
+                                        blob.digest = %digest,
+                                        "blob not found",
+                                    );
+                                    Err(io::Error::new(
+                                        io::ErrorKind::NotFound,
+                                        format!("blob {} not found", &digest),
+                                    ))
+                                }
+                            }
                         })
                     }
                     Node::Symlink(_symlink_node) => Err(io::Error::new(
@@ -540,11 +537,11 @@ impl EvalIO for TvixStoreIO {
             } else {
                 // As tvix-store doesn't manage /nix/store on the filesystem,
                 // we still need to also ask self.std_io here.
-                self.std_io.read_to_end(path)
+                self.std_io.open(path)
             }
         } else {
             // The store path is no store path, so do regular StdIO.
-            self.std_io.read_to_end(path)
+            self.std_io.open(path)
         }
     }