about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-02-17T17·15+0100
committerflokli <flokli@flokli.de>2023-03-10T10·58+0000
commita40d2dcdcd453e44d53d44b4c1471f3b503c7cd6 (patch)
tree66d63a4ae80b107227deaef191ce148efd775109
parentfbabcf0420bcc913971d529c11d58f6f888f1002 (diff)
feat(tvix/store): bump fastcdc, use v2020 version r/5925
This switches away from the less canonical "ronomon" version to the
implementation as described in the
[paper](https://ieeexplore.ieee.org/document/9055082) by Wen Xia, et
al., in 2020.

That version uses 64-bit hash values and tends to be faster than both
the ronomon and v2016 versions, and produces the same chunking as the
2016 version.

As per https://docs.rs/fastcdc/latest/fastcdc/#implementations-1, it's
the recommended choice.

The crate also gained support for streaming version of chunkers:
https://docs.rs/fastcdc/latest/fastcdc/#large-data, which might be
useful.

Change-Id: Ieabec3da54eb2b73c045cb54e51f7a216f63641e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8134
Reviewed-by: raitobezarius <tvl@lahfa.xyz>
Tested-by: BuildkiteCI
-rw-r--r--tvix/Cargo.lock4
-rw-r--r--tvix/Cargo.nix4
-rw-r--r--tvix/store/Cargo.toml2
-rw-r--r--tvix/store/src/proto/grpc_blobservice_wrapper.rs2
4 files changed, 6 insertions, 6 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index 6c04176a1fca..534e04dbeeef 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -675,9 +675,9 @@ checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
 
 [[package]]
 name = "fastcdc"
-version = "2.0.0"
+version = "3.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8093ae083a5098c6ac2e898350c704b2c70d092f9a5c0ad6f43703ec89a872b1"
+checksum = "c47726595a8a071d7d8045a837d1179b1964633e256300675aa50c31284a23e2"
 
 [[package]]
 name = "fastrand"
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index 4e9e42e47949..4d01c2f0de94 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -1915,9 +1915,9 @@ rec {
       };
       "fastcdc" = rec {
         crateName = "fastcdc";
-        version = "2.0.0";
+        version = "3.0.0";
         edition = "2018";
-        sha256 = "1cbjm24yq0rpykb0lp4s5w4hvixj0k3m10w95sncd62h784ax4w0";
+        sha256 = "1qi398l32355b9kh0qr57rin86cv2z8kga25h1yis1wab9cjcxy4";
         authors = [
           "Nathan Fiedler <nathanfiedler@fastmail.fm>"
         ];
diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml
index 766c44cb1775..0a1ee9ce5c57 100644
--- a/tvix/store/Cargo.toml
+++ b/tvix/store/Cargo.toml
@@ -12,7 +12,7 @@ blake3 = { version = "1.3.1", features = ["rayon", "std"] }
 clap = { version = "4.0", features = ["derive", "env"] }
 count-write = "0.1.0"
 data-encoding = "2.3.3"
-fastcdc = "2.0.0"
+fastcdc = "3.0.0"
 lazy_static = "1.4.0"
 prost = "0.11.2"
 sha2 = "0.10.6"
diff --git a/tvix/store/src/proto/grpc_blobservice_wrapper.rs b/tvix/store/src/proto/grpc_blobservice_wrapper.rs
index 6204fc14005b..2516b5d3f933 100644
--- a/tvix/store/src/proto/grpc_blobservice_wrapper.rs
+++ b/tvix/store/src/proto/grpc_blobservice_wrapper.rs
@@ -157,7 +157,7 @@ impl<
 
         // initialize a new chunker
         // TODO: play with chunking sizes
-        let chunker = fastcdc::FastCDC::new(
+        let chunker = fastcdc::v2020::FastCDC::new(
             &blob_contents,
             64 * 1024 / 4, // min
             64 * 1024,     // avg