about summary refs log tree commit diff
path: root/tvix/eval/src/vm
diff options
context:
space:
mode:
authorConnor Brewster <cbrewster@hey.com>2024-03-22T23·52-0500
committerConnor Brewster <cbrewster@hey.com>2024-04-09T17·31+0000
commit63116d8c21afdc50725ae93d13839fe1915b06b7 (patch)
tree4997838251dac809c2917b35e5d32224030ba595 /tvix/eval/src/vm
parent17849c5c0033fa1909f0403b5d5e6a5e018b7fee (diff)
fix(tvix): Avoid buffering file into memory in builtins.hashFile r/7882
Right now `builtins.hashFile` always reads the entire file into memory
before hashing, which is not ideal for large files. This replaces
`read_to_string` with `open_file` which allows calculating the hash of
the file without buffering it entirely into memory. Other callers can
continue to buffer into memory if they choose, but they still use the
`open_file` VM request and then call `read_to_string` or `read_to_end`
on the `std::io::Reader`.

Fixes b/380

Change-Id: Ifa1c8324bcee8f751604b0b449feab875c632fda
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11236
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/eval/src/vm')
-rw-r--r--tvix/eval/src/vm/generators.rs27
1 files changed, 16 insertions, 11 deletions
diff --git a/tvix/eval/src/vm/generators.rs b/tvix/eval/src/vm/generators.rs
index 4dacdef0dd08..d5b5f1de4979 100644
--- a/tvix/eval/src/vm/generators.rs
+++ b/tvix/eval/src/vm/generators.rs
@@ -102,8 +102,8 @@ pub enum VMRequest {
     /// Request that the VM imports the given path through its I/O interface.
     PathImport(PathBuf),
 
-    /// Request that the VM reads the given path to a string.
-    ReadToString(PathBuf),
+    /// Request that the VM opens the specified file and provides a reader.
+    OpenFile(PathBuf),
 
     /// Request that the VM checks whether the given path exists.
     PathExists(PathBuf),
@@ -170,8 +170,8 @@ impl Display for VMRequest {
                 write!(f, "import_cache_put({})", p.to_string_lossy())
             }
             VMRequest::PathImport(p) => write!(f, "path_import({})", p.to_string_lossy()),
-            VMRequest::ReadToString(p) => {
-                write!(f, "read_to_string({})", p.to_string_lossy())
+            VMRequest::OpenFile(p) => {
+                write!(f, "open_file({})", p.to_string_lossy())
             }
             VMRequest::PathExists(p) => write!(f, "path_exists({})", p.to_string_lossy()),
             VMRequest::ReadDir(p) => write!(f, "read_dir({})", p.to_string_lossy()),
@@ -199,6 +199,9 @@ pub enum VMResponse {
 
     /// VM response with a span to use at the current point.
     Span(LightSpan),
+
+    /// [std::io::Reader] produced by the VM in response to some IO operation.
+    Reader(Box<dyn std::io::Read>),
 }
 
 impl Display for VMResponse {
@@ -209,6 +212,7 @@ impl Display for VMResponse {
             VMResponse::Path(p) => write!(f, "path({})", p.to_string_lossy()),
             VMResponse::Directory(d) => write!(f, "dir(len = {})", d.len()),
             VMResponse::Span(_) => write!(f, "span"),
+            VMResponse::Reader(_) => write!(f, "reader"),
         }
     }
 }
@@ -425,18 +429,18 @@ where
                             message = VMResponse::Path(imported);
                         }
 
-                        VMRequest::ReadToString(path) => {
-                            let content = self
+                        VMRequest::OpenFile(path) => {
+                            let reader = self
                                 .io_handle
                                 .as_ref()
-                                .read_to_end(&path)
+                                .open(&path)
                                 .map_err(|e| ErrorKind::IO {
                                     path: Some(path),
                                     error: e.into(),
                                 })
                                 .with_span(&span, self)?;
 
-                            message = VMResponse::Value(content.into())
+                            message = VMResponse::Reader(reader)
                         }
 
                         VMRequest::PathExists(path) => {
@@ -730,9 +734,10 @@ pub(crate) async fn request_path_import(co: &GenCo, path: PathBuf) -> PathBuf {
     }
 }
 
-pub(crate) async fn request_read_to_string(co: &GenCo, path: PathBuf) -> Value {
-    match co.yield_(VMRequest::ReadToString(path)).await {
-        VMResponse::Value(value) => value,
+/// Request that the VM open a [std::io::Read] for the specified file.
+pub async fn request_open_file(co: &GenCo, path: PathBuf) -> Box<dyn std::io::Read> {
+    match co.yield_(VMRequest::OpenFile(path)).await {
+        VMResponse::Reader(value) => value,
         msg => panic!(
             "Tvix bug: VM responded with incorrect generator message: {}",
             msg