From b049b88d2d7bcb9caef158ffdf9cd931c62d2511 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Fri, 10 Mar 2023 23:24:23 +0100 Subject: refactor(tvix/store): factor out hash update into function We're using this in a bunch of places. Let's move it into a helper function. Change-Id: I118fba35f6d343704520ba37280e4ca52a61da44 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8251 Autosubmit: flokli Tested-by: BuildkiteCI Reviewed-by: raitobezarius --- tvix/store/src/chunkservice/util.rs | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'tvix/store/src/chunkservice/util.rs') diff --git a/tvix/store/src/chunkservice/util.rs b/tvix/store/src/chunkservice/util.rs index fe8e4b350fe4..2897d4e58e94 100644 --- a/tvix/store/src/chunkservice/util.rs +++ b/tvix/store/src/chunkservice/util.rs @@ -11,12 +11,7 @@ pub fn upload_chunk( chunk_data: Vec, ) -> Result, Error> { let mut hasher = blake3::Hasher::new(); - // TODO: benchmark this number and factor it out - if chunk_data.len() >= 128 * 1024 { - hasher.update_rayon(&chunk_data); - } else { - hasher.update(&chunk_data); - } + update_hasher(&mut hasher, &chunk_data); let digest = hasher.finalize(); if chunk_service.has(digest.as_bytes())? { @@ -28,3 +23,24 @@ pub fn upload_chunk( Ok(digest.as_bytes().to_vec()) } + +/// updates a given hasher with more data. Uses rayon if the data is +/// sufficiently big. +/// +/// From the docs: +/// +/// To get any performance benefit from multithreading, the input buffer needs +/// to be large. As a rule of thumb on x86_64, update_rayon is slower than +/// update for inputs under 128 KiB. That threshold varies quite a lot across +/// different processors, and it’s important to benchmark your specific use +/// case. +/// +/// We didn't benchmark yet, so these numbers might need tweaking. +#[instrument(skip_all)] +pub fn update_hasher(hasher: &mut blake3::Hasher, data: &[u8]) { + if data.len() > 128 * 1024 { + hasher.update_rayon(data); + } else { + hasher.update(data); + } +} -- cgit 1.4.1