1 files changed, 22 insertions, 6 deletions
diff --git a/tvix/store/src/chunkservice/util.rs b/tvix/store/src/chunkservice/util.rs
index fe8e4b350fe4..2897d4e58e94 100644
--- a/tvix/store/src/chunkservice/util.rs
+++ b/tvix/store/src/chunkservice/util.rs
@@ -11,12 +11,7 @@ pub fn upload_chunk<CS: ChunkService>(
     chunk_data: Vec<u8>,
 ) -> Result<Vec<u8>, Error> {
     let mut hasher = blake3::Hasher::new();
-    // TODO: benchmark this number and factor it out
-    if chunk_data.len() >= 128 * 1024 {
-        hasher.update_rayon(&chunk_data);
-    } else {
-        hasher.update(&chunk_data);
-    }
+    update_hasher(&mut hasher, &chunk_data);
     let digest = hasher.finalize();
 
     if chunk_service.has(digest.as_bytes())? {
@@ -28,3 +23,24 @@ pub fn upload_chunk<CS: ChunkService>(
 
     Ok(digest.as_bytes().to_vec())
 }
+
+/// updates a given hasher with more data. Uses rayon if the data is
+/// sufficiently big.
+///
+/// From the docs:
+///
+/// To get any performance benefit from multithreading, the input buffer needs
+/// to be large. As a rule of thumb on x86_64, update_rayon is slower than
+/// update for inputs under 128 KiB. That threshold varies quite a lot across
+/// different processors, and it’s important to benchmark your specific use
+/// case.
+///
+/// We didn't benchmark yet, so these numbers might need tweaking.
+#[instrument(skip_all)]
+pub fn update_hasher(hasher: &mut blake3::Hasher, data: &[u8]) {
+    if data.len() > 128 * 1024 {
+        hasher.update_rayon(data);
+    } else {
+        hasher.update(data);
+    }
+}