diff options
author | Connor Brewster <cbrewster@hey.com> | 2024-03-22T23·52-0500 |
---|---|---|
committer | Connor Brewster <cbrewster@hey.com> | 2024-04-09T17·31+0000 |
commit | 63116d8c21afdc50725ae93d13839fe1915b06b7 (patch) | |
tree | 4997838251dac809c2917b35e5d32224030ba595 /tvix/eval/src/vm | |
parent | 17849c5c0033fa1909f0403b5d5e6a5e018b7fee (diff) |
fix(tvix): Avoid buffering file into memory in builtins.hashFile r/7882
Right now `builtins.hashFile` always reads the entire file into memory before hashing, which is not ideal for large files. This replaces `read_to_string` with `open_file` which allows calculating the hash of the file without buffering it entirely into memory. Other callers can continue to buffer into memory if they choose, but they still use the `open_file` VM request and then call `read_to_string` or `read_to_end` on the `std::io::Reader`. Fixes b/380 Change-Id: Ifa1c8324bcee8f751604b0b449feab875c632fda Reviewed-on: https://cl.tvl.fyi/c/depot/+/11236 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/eval/src/vm')
-rw-r--r-- | tvix/eval/src/vm/generators.rs | 27 |
1 files changed, 16 insertions, 11 deletions
diff --git a/tvix/eval/src/vm/generators.rs b/tvix/eval/src/vm/generators.rs index 4dacdef0dd08..d5b5f1de4979 100644 --- a/tvix/eval/src/vm/generators.rs +++ b/tvix/eval/src/vm/generators.rs @@ -102,8 +102,8 @@ pub enum VMRequest { /// Request that the VM imports the given path through its I/O interface. PathImport(PathBuf), - /// Request that the VM reads the given path to a string. - ReadToString(PathBuf), + /// Request that the VM opens the specified file and provides a reader. + OpenFile(PathBuf), /// Request that the VM checks whether the given path exists. PathExists(PathBuf), @@ -170,8 +170,8 @@ impl Display for VMRequest { write!(f, "import_cache_put({})", p.to_string_lossy()) } VMRequest::PathImport(p) => write!(f, "path_import({})", p.to_string_lossy()), - VMRequest::ReadToString(p) => { - write!(f, "read_to_string({})", p.to_string_lossy()) + VMRequest::OpenFile(p) => { + write!(f, "open_file({})", p.to_string_lossy()) } VMRequest::PathExists(p) => write!(f, "path_exists({})", p.to_string_lossy()), VMRequest::ReadDir(p) => write!(f, "read_dir({})", p.to_string_lossy()), @@ -199,6 +199,9 @@ pub enum VMResponse { /// VM response with a span to use at the current point. Span(LightSpan), + + /// [std::io::Reader] produced by the VM in response to some IO operation. + Reader(Box<dyn std::io::Read>), } impl Display for VMResponse { @@ -209,6 +212,7 @@ impl Display for VMResponse { VMResponse::Path(p) => write!(f, "path({})", p.to_string_lossy()), VMResponse::Directory(d) => write!(f, "dir(len = {})", d.len()), VMResponse::Span(_) => write!(f, "span"), + VMResponse::Reader(_) => write!(f, "reader"), } } } @@ -425,18 +429,18 @@ where message = VMResponse::Path(imported); } - VMRequest::ReadToString(path) => { - let content = self + VMRequest::OpenFile(path) => { + let reader = self .io_handle .as_ref() - .read_to_end(&path) + .open(&path) .map_err(|e| ErrorKind::IO { path: Some(path), error: e.into(), }) .with_span(&span, self)?; - message = VMResponse::Value(content.into()) + message = VMResponse::Reader(reader) } VMRequest::PathExists(path) => { @@ -730,9 +734,10 @@ pub(crate) async fn request_path_import(co: &GenCo, path: PathBuf) -> PathBuf { } } -pub(crate) async fn request_read_to_string(co: &GenCo, path: PathBuf) -> Value { - match co.yield_(VMRequest::ReadToString(path)).await { - VMResponse::Value(value) => value, +/// Request that the VM open a [std::io::Read] for the specified file. +pub async fn request_open_file(co: &GenCo, path: PathBuf) -> Box<dyn std::io::Read> { + match co.yield_(VMRequest::OpenFile(path)).await { + VMResponse::Reader(value) => value, msg => panic!( "Tvix bug: VM responded with incorrect generator message: {}", msg |