diff options
author | Connor Brewster <cbrewster@hey.com> | 2024-03-22T23·52-0500 |
---|---|---|
committer | Connor Brewster <cbrewster@hey.com> | 2024-04-09T17·31+0000 |
commit | 63116d8c21afdc50725ae93d13839fe1915b06b7 (patch) | |
tree | 4997838251dac809c2917b35e5d32224030ba595 /tvix/eval/src/builtins/hash.rs | |
parent | 17849c5c0033fa1909f0403b5d5e6a5e018b7fee (diff) |
fix(tvix): Avoid buffering file into memory in builtins.hashFile r/7882
Right now `builtins.hashFile` always reads the entire file into memory before hashing, which is not ideal for large files. This replaces `read_to_string` with `open_file` which allows calculating the hash of the file without buffering it entirely into memory. Other callers can continue to buffer into memory if they choose, but they still use the `open_file` VM request and then call `read_to_string` or `read_to_end` on the `std::io::Reader`. Fixes b/380 Change-Id: Ifa1c8324bcee8f751604b0b449feab875c632fda Reviewed-on: https://cl.tvl.fyi/c/depot/+/11236 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/eval/src/builtins/hash.rs')
-rw-r--r-- | tvix/eval/src/builtins/hash.rs | 20 |
1 files changed, 12 insertions, 8 deletions
diff --git a/tvix/eval/src/builtins/hash.rs b/tvix/eval/src/builtins/hash.rs index 6d07fc9b2dc8..d0145f1e7d75 100644 --- a/tvix/eval/src/builtins/hash.rs +++ b/tvix/eval/src/builtins/hash.rs @@ -6,18 +6,22 @@ use sha2::{digest::Output, Digest, Sha256, Sha512}; use crate::ErrorKind; -fn hash<D: Digest>(b: &[u8]) -> Output<D> { +/// Reads through all data from the passed reader, and returns the resulting [Digest]. +/// The exact hash function used is left generic over all [Digest]. +fn hash<D: Digest + std::io::Write>(mut r: impl std::io::Read) -> Result<Output<D>, ErrorKind> { let mut hasher = D::new(); - hasher.update(b); - hasher.finalize() + std::io::copy(&mut r, &mut hasher)?; + Ok(hasher.finalize()) } -pub fn hash_nix_string(algo: impl AsRef<[u8]>, s: impl AsRef<[u8]>) -> Result<String, ErrorKind> { +/// For a given algo "string" and reader for data, calculate the digest +/// and return it as a hexlower encoded [String]. +pub fn hash_nix_string(algo: impl AsRef<[u8]>, s: impl std::io::Read) -> Result<String, ErrorKind> { match algo.as_ref() { - b"md5" => Ok(HEXLOWER.encode(hash::<Md5>(s.as_ref()).as_bstr())), - b"sha1" => Ok(HEXLOWER.encode(hash::<Sha1>(s.as_ref()).as_bstr())), - b"sha256" => Ok(HEXLOWER.encode(hash::<Sha256>(s.as_ref()).as_bstr())), - b"sha512" => Ok(HEXLOWER.encode(hash::<Sha512>(s.as_ref()).as_bstr())), + b"md5" => Ok(HEXLOWER.encode(hash::<Md5>(s)?.as_bstr())), + b"sha1" => Ok(HEXLOWER.encode(hash::<Sha1>(s)?.as_bstr())), + b"sha256" => Ok(HEXLOWER.encode(hash::<Sha256>(s)?.as_bstr())), + b"sha512" => Ok(HEXLOWER.encode(hash::<Sha512>(s)?.as_bstr())), _ => Err(ErrorKind::UnknownHashType( algo.as_ref().as_bstr().to_string(), )), |