diff options
Diffstat (limited to 'tvix/store/src/chunkservice/util.rs')
-rw-r--r-- | tvix/store/src/chunkservice/util.rs | 28 |
1 files changed, 22 insertions, 6 deletions
diff --git a/tvix/store/src/chunkservice/util.rs b/tvix/store/src/chunkservice/util.rs index fe8e4b350fe4..2897d4e58e94 100644 --- a/tvix/store/src/chunkservice/util.rs +++ b/tvix/store/src/chunkservice/util.rs @@ -11,12 +11,7 @@ pub fn upload_chunk<CS: ChunkService>( chunk_data: Vec<u8>, ) -> Result<Vec<u8>, Error> { let mut hasher = blake3::Hasher::new(); - // TODO: benchmark this number and factor it out - if chunk_data.len() >= 128 * 1024 { - hasher.update_rayon(&chunk_data); - } else { - hasher.update(&chunk_data); - } + update_hasher(&mut hasher, &chunk_data); let digest = hasher.finalize(); if chunk_service.has(digest.as_bytes())? { @@ -28,3 +23,24 @@ pub fn upload_chunk<CS: ChunkService>( Ok(digest.as_bytes().to_vec()) } + +/// updates a given hasher with more data. Uses rayon if the data is +/// sufficiently big. +/// +/// From the docs: +/// +/// To get any performance benefit from multithreading, the input buffer needs +/// to be large. As a rule of thumb on x86_64, update_rayon is slower than +/// update for inputs under 128 KiB. That threshold varies quite a lot across +/// different processors, and it’s important to benchmark your specific use +/// case. +/// +/// We didn't benchmark yet, so these numbers might need tweaking. +#[instrument(skip_all)] +pub fn update_hasher(hasher: &mut blake3::Hasher, data: &[u8]) { + if data.len() > 128 * 1024 { + hasher.update_rayon(data); + } else { + hasher.update(data); + } +} |