From d8e0fa8e5e67c7cc21a72cad545789914adcc798 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Fri, 20 Jan 2023 19:22:41 +0100 Subject: feat(tvix/store/directory): deduplicate Directory messages We can omit sending Directory messages to clients that have already been sent in the same stream. We can also omit storing a Directory message if we already have it - they're content-addressed anyways. Change-Id: Iba44565e07157a83a033177a2ffbdddced64ba5c Reviewed-on: https://cl.tvl.fyi/c/depot/+/7881 Reviewed-by: tazjin Tested-by: BuildkiteCI --- tvix/store/src/tests/directory_service.rs | 50 +++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'tvix/store/src/tests') diff --git a/tvix/store/src/tests/directory_service.rs b/tvix/store/src/tests/directory_service.rs index c16feb2430..38cc0897c2 100644 --- a/tvix/store/src/tests/directory_service.rs +++ b/tvix/store/src/tests/directory_service.rs @@ -19,6 +19,21 @@ lazy_static! { }], ..Default::default() }; + static ref DIRECTORY_C: Directory = Directory { + directories: vec![ + DirectoryNode { + name: "a".to_string(), + digest: DIRECTORY_A.digest(), + size: DIRECTORY_A.size(), + }, + DirectoryNode { + name: "a'".to_string(), + digest: DIRECTORY_A.digest(), + size: DIRECTORY_A.size(), + } + ], + ..Default::default() + }; } /// Send the specified GetDirectoryRequest. @@ -156,3 +171,38 @@ async fn put_get_multiple() -> anyhow::Result<()> { Ok(()) } + +/// Put multiple Directories into the store, and omit duplicates. +#[tokio::test] +async fn put_get_dedup() -> anyhow::Result<()> { + let service = SledDirectoryService::new(TempDir::new()?.path().to_path_buf())?; + + // Send "A", then "C", which refers to "A" two times + // Pretend we're a dumb client sending A twice. + let put_resp = service + .put(tonic_mock::streaming_request(vec![ + DIRECTORY_A.clone(), + DIRECTORY_A.clone(), + DIRECTORY_C.clone(), + ])) + .await + .expect("must succeed"); + + assert_eq!(DIRECTORY_C.digest(), put_resp.into_inner().root_digest); + + // Ask for "C" recursively. We expect to only get "A" once, as there's no point sending it twice. + let items = get_directories( + &service, + GetDirectoryRequest { + recursive: true, + by_what: Some(ByWhat::Digest(DIRECTORY_C.digest())), + }, + ) + .await + .expect("must not error"); + + // We expect to get C, and then A (once, as the second A has been deduplicated). + assert_eq!(vec![DIRECTORY_C.clone(), DIRECTORY_A.clone()], items); + + Ok(()) +} -- cgit 1.4.1