From a40d2dcdcd453e44d53d44b4c1471f3b503c7cd6 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Fri, 17 Feb 2023 18:15:21 +0100 Subject: feat(tvix/store): bump fastcdc, use v2020 version This switches away from the less canonical "ronomon" version to the implementation as described in the [paper](https://ieeexplore.ieee.org/document/9055082) by Wen Xia, et al., in 2020. That version uses 64-bit hash values and tends to be faster than both the ronomon and v2016 versions, and produces the same chunking as the 2016 version. As per https://docs.rs/fastcdc/latest/fastcdc/#implementations-1, it's the recommended choice. The crate also gained support for streaming version of chunkers: https://docs.rs/fastcdc/latest/fastcdc/#large-data, which might be useful. Change-Id: Ieabec3da54eb2b73c045cb54e51f7a216f63641e Reviewed-on: https://cl.tvl.fyi/c/depot/+/8134 Reviewed-by: raitobezarius Tested-by: BuildkiteCI --- tvix/Cargo.lock | 4 ++-- tvix/Cargo.nix | 4 ++-- tvix/store/Cargo.toml | 2 +- tvix/store/src/proto/grpc_blobservice_wrapper.rs | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 6c04176a1fca..534e04dbeeef 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -675,9 +675,9 @@ checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" [[package]] name = "fastcdc" -version = "2.0.0" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8093ae083a5098c6ac2e898350c704b2c70d092f9a5c0ad6f43703ec89a872b1" +checksum = "c47726595a8a071d7d8045a837d1179b1964633e256300675aa50c31284a23e2" [[package]] name = "fastrand" diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 4e9e42e47949..4d01c2f0de94 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -1915,9 +1915,9 @@ rec { }; "fastcdc" = rec { crateName = "fastcdc"; - version = "2.0.0"; + version = "3.0.0"; edition = "2018"; - sha256 = "1cbjm24yq0rpykb0lp4s5w4hvixj0k3m10w95sncd62h784ax4w0"; + sha256 = "1qi398l32355b9kh0qr57rin86cv2z8kga25h1yis1wab9cjcxy4"; authors = [ "Nathan Fiedler " ]; diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml index 766c44cb1775..0a1ee9ce5c57 100644 --- a/tvix/store/Cargo.toml +++ b/tvix/store/Cargo.toml @@ -12,7 +12,7 @@ blake3 = { version = "1.3.1", features = ["rayon", "std"] } clap = { version = "4.0", features = ["derive", "env"] } count-write = "0.1.0" data-encoding = "2.3.3" -fastcdc = "2.0.0" +fastcdc = "3.0.0" lazy_static = "1.4.0" prost = "0.11.2" sha2 = "0.10.6" diff --git a/tvix/store/src/proto/grpc_blobservice_wrapper.rs b/tvix/store/src/proto/grpc_blobservice_wrapper.rs index 6204fc14005b..2516b5d3f933 100644 --- a/tvix/store/src/proto/grpc_blobservice_wrapper.rs +++ b/tvix/store/src/proto/grpc_blobservice_wrapper.rs @@ -157,7 +157,7 @@ impl< // initialize a new chunker // TODO: play with chunking sizes - let chunker = fastcdc::FastCDC::new( + let chunker = fastcdc::v2020::FastCDC::new( &blob_contents, 64 * 1024 / 4, // min 64 * 1024, // avg -- cgit 1.4.1