From fe20ba5ffc888a0b62a97b8bd29fcdd07e7851f6 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Fri, 10 May 2024 07:51:18 +0300 Subject: feat(tvix/store): add LruPathInfoService This provides an implementation of PathInfoService storing PathInfo in memory up to a certain capacity, then evicting these that have been used the least recently. Change-Id: I9d738687caf4f181a957f72245f26b92832313cd Reviewed-on: https://cl.tvl.fyi/c/depot/+/11622 Tested-by: BuildkiteCI Reviewed-by: Connor Brewster Autosubmit: flokli --- tvix/Cargo.lock | 52 +++++++++++ tvix/Cargo.nix | 162 +++++++++++++++++++++++++++++++++- tvix/store/Cargo.toml | 1 + tvix/store/src/pathinfoservice/lru.rs | 125 ++++++++++++++++++++++++++ tvix/store/src/pathinfoservice/mod.rs | 2 + 5 files changed, 341 insertions(+), 1 deletion(-) create mode 100644 tvix/store/src/pathinfoservice/lru.rs (limited to 'tvix') diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 8385528e8f..e1b18b37a2 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -17,6 +17,18 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" +[[package]] +name = "ahash" +version = "0.8.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" +dependencies = [ + "cfg-if", + "once_cell", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.2" @@ -26,6 +38,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -1506,6 +1524,10 @@ name = "hashbrown" version = "0.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "heck" @@ -1990,6 +2012,15 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "lru" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3262e75e648fce39813cb56ac41f3c3e3f65217ebf3844d818d1f9398cfb0dc" +dependencies = [ + "hashbrown 0.14.3", +] + [[package]] name = "lzma-sys" version = "0.1.20" @@ -4436,6 +4467,7 @@ dependencies = [ "data-encoding", "futures", "lazy_static", + "lru", "nix-compat", "opentelemetry", "opentelemetry-otlp", @@ -5054,6 +5086,26 @@ version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" +[[package]] +name = "zerocopy" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "zeroize" version = "1.7.0" diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 0efb86c66d..a88e7cddad 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -193,6 +193,49 @@ rec { "rustc-dep-of-std" = [ "core" "compiler_builtins" ]; }; }; + "ahash" = rec { + crateName = "ahash"; + version = "0.8.11"; + edition = "2018"; + sha256 = "04chdfkls5xmhp1d48gnjsmglbqibizs3bpbj6rsj604m10si7g8"; + authors = [ + "Tom Kaitchuck " + ]; + dependencies = [ + { + name = "cfg-if"; + packageId = "cfg-if"; + } + { + name = "once_cell"; + packageId = "once_cell"; + usesDefaultFeatures = false; + target = { target, features }: (!(("arm" == target."arch" or null) && ("none" == target."os" or null))); + features = [ "alloc" ]; + } + { + name = "zerocopy"; + packageId = "zerocopy"; + usesDefaultFeatures = false; + features = [ "simd" ]; + } + ]; + buildDependencies = [ + { + name = "version_check"; + packageId = "version_check"; + } + ]; + features = { + "atomic-polyfill" = [ "dep:atomic-polyfill" "once_cell/atomic-polyfill" ]; + "compile-time-rng" = [ "const-random" ]; + "const-random" = [ "dep:const-random" ]; + "default" = [ "std" "runtime-rng" ]; + "getrandom" = [ "dep:getrandom" ]; + "runtime-rng" = [ "getrandom" ]; + "serde" = [ "dep:serde" ]; + }; + }; "aho-corasick" = rec { crateName = "aho-corasick"; version = "1.1.2"; @@ -218,6 +261,21 @@ rec { }; resolvedDefaultFeatures = [ "default" "perf-literal" "std" ]; }; + "allocator-api2" = rec { + crateName = "allocator-api2"; + version = "0.2.18"; + edition = "2018"; + sha256 = "0kr6lfnxvnj164j1x38g97qjlhb7akppqzvgfs0697140ixbav2w"; + authors = [ + "Zakarum " + ]; + features = { + "default" = [ "std" ]; + "serde" = [ "dep:serde" ]; + "std" = [ "alloc" ]; + }; + resolvedDefaultFeatures = [ "alloc" ]; + }; "android-tzdata" = rec { crateName = "android-tzdata"; version = "0.1.1"; @@ -4660,6 +4718,21 @@ rec { authors = [ "Amanieu d'Antras " ]; + dependencies = [ + { + name = "ahash"; + packageId = "ahash"; + optional = true; + usesDefaultFeatures = false; + } + { + name = "allocator-api2"; + packageId = "allocator-api2"; + optional = true; + usesDefaultFeatures = false; + features = [ "alloc" ]; + } + ]; features = { "ahash" = [ "dep:ahash" ]; "alloc" = [ "dep:alloc" ]; @@ -4674,7 +4747,7 @@ rec { "rustc-dep-of-std" = [ "nightly" "core" "compiler_builtins" "alloc" "rustc-internal-api" ]; "serde" = [ "dep:serde" ]; }; - resolvedDefaultFeatures = [ "inline-more" "raw" ]; + resolvedDefaultFeatures = [ "ahash" "allocator-api2" "default" "inline-more" "raw" ]; }; "heck" = rec { crateName = "heck"; @@ -6147,6 +6220,28 @@ rec { }; resolvedDefaultFeatures = [ "std" ]; }; + "lru" = rec { + crateName = "lru"; + version = "0.12.3"; + edition = "2015"; + sha256 = "1p5hryc967wdh56q9wzb2x9gdqy3yd0sqmnb2fcf7z28wrsjw9nk"; + authors = [ + "Jerome Froelich " + ]; + dependencies = [ + { + name = "hashbrown"; + packageId = "hashbrown 0.14.3"; + optional = true; + } + ]; + features = { + "default" = [ "hashbrown" ]; + "hashbrown" = [ "dep:hashbrown" ]; + "nightly" = [ "hashbrown" "hashbrown/nightly" ]; + }; + resolvedDefaultFeatures = [ "default" "hashbrown" ]; + }; "lzma-sys" = rec { crateName = "lzma-sys"; version = "0.1.20"; @@ -14310,6 +14405,10 @@ rec { name = "lazy_static"; packageId = "lazy_static"; } + { + name = "lru"; + packageId = "lru"; + } { name = "nix-compat"; packageId = "nix-compat"; @@ -16832,6 +16931,67 @@ rec { "Sergio Benitez " ]; + }; + "zerocopy" = rec { + crateName = "zerocopy"; + version = "0.7.34"; + edition = "2018"; + sha256 = "11xhrwixm78m6ca1jdxf584wdwvpgg7q00vg21fhwl0psvyf71xf"; + authors = [ + "Joshua Liebow-Feeser " + ]; + dependencies = [ + { + name = "zerocopy-derive"; + packageId = "zerocopy-derive"; + optional = true; + } + { + name = "zerocopy-derive"; + packageId = "zerocopy-derive"; + target = { target, features }: false; + } + ]; + devDependencies = [ + { + name = "zerocopy-derive"; + packageId = "zerocopy-derive"; + } + ]; + features = { + "__internal_use_only_features_that_work_on_stable" = [ "alloc" "derive" "simd" ]; + "byteorder" = [ "dep:byteorder" ]; + "default" = [ "byteorder" ]; + "derive" = [ "zerocopy-derive" ]; + "simd-nightly" = [ "simd" ]; + "zerocopy-derive" = [ "dep:zerocopy-derive" ]; + }; + resolvedDefaultFeatures = [ "simd" ]; + }; + "zerocopy-derive" = rec { + crateName = "zerocopy-derive"; + version = "0.7.34"; + edition = "2018"; + sha256 = "0fqvglw01w3hp7xj9gdk1800x9j7v58s9w8ijiyiz2a7krb39s8m"; + procMacro = true; + authors = [ + "Joshua Liebow-Feeser " + ]; + dependencies = [ + { + name = "proc-macro2"; + packageId = "proc-macro2"; + } + { + name = "quote"; + packageId = "quote"; + } + { + name = "syn"; + packageId = "syn 2.0.48"; + } + ]; + }; "zeroize" = rec { crateName = "zeroize"; diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml index f82cdef300..bf04e9a1fb 100644 --- a/tvix/store/Cargo.toml +++ b/tvix/store/Cargo.toml @@ -41,6 +41,7 @@ tvix-castore = { path = "../castore" } url = "2.4.0" walkdir = "2.4.0" reqwest = { version = "0.11.22", features = ["rustls-tls-native-roots", "stream"], default-features = false } +lru = "0.12.3" [dependencies.tonic-reflection] optional = true diff --git a/tvix/store/src/pathinfoservice/lru.rs b/tvix/store/src/pathinfoservice/lru.rs new file mode 100644 index 0000000000..f3790a9054 --- /dev/null +++ b/tvix/store/src/pathinfoservice/lru.rs @@ -0,0 +1,125 @@ +use std::num::NonZeroUsize; +use std::sync::Arc; +use tokio::sync::RwLock; + +use async_stream::try_stream; +use futures::stream::BoxStream; +use lru::LruCache; +use tonic::async_trait; + +use crate::proto::PathInfo; +use tvix_castore::Error; + +use super::PathInfoService; + +pub struct LruPathInfoService { + lru: Arc>>, +} + +impl LruPathInfoService { + pub fn with_capacity(capacity: NonZeroUsize) -> Self { + Self { + lru: Arc::new(RwLock::new(LruCache::new(capacity))), + } + } +} + +#[async_trait] +impl PathInfoService for LruPathInfoService { + async fn get(&self, digest: [u8; 20]) -> Result, Error> { + Ok(self.lru.write().await.get(&digest).cloned()) + } + + async fn put(&self, path_info: PathInfo) -> Result { + // call validate + let store_path = path_info + .validate() + .map_err(|e| Error::InvalidRequest(format!("invalid PathInfo: {}", e)))?; + + self.lru + .write() + .await + .put(*store_path.digest(), path_info.clone()); + + Ok(path_info) + } + + fn list(&self) -> BoxStream<'static, Result> { + let lru = self.lru.clone(); + Box::pin(try_stream! { + let lru = lru.read().await; + let it = lru.iter(); + + for (_k,v) in it { + yield v.clone() + } + }) + } +} + +#[cfg(test)] +mod test { + use std::num::NonZeroUsize; + + use crate::{ + pathinfoservice::{LruPathInfoService, PathInfoService}, + proto::PathInfo, + tests::fixtures::PATH_INFO_WITH_NARINFO, + }; + use lazy_static::lazy_static; + use tvix_castore::proto as castorepb; + + lazy_static! { + static ref PATHINFO_1: PathInfo = PATH_INFO_WITH_NARINFO.clone(); + static ref PATHINFO_1_DIGEST: [u8; 20] = [0; 20]; + static ref PATHINFO_2: PathInfo = { + let mut p = PATHINFO_1.clone(); + let root_node = p.node.as_mut().unwrap(); + if let castorepb::Node { node: Some(node) } = root_node { + let n = node.to_owned(); + *node = n.rename("11111111111111111111111111111111-dummy2".into()); + } else { + unreachable!() + } + p + }; + static ref PATHINFO_2_DIGEST: [u8; 20] = *(PATHINFO_2.validate().unwrap()).digest(); + } + + #[tokio::test] + async fn evict() { + let svc = LruPathInfoService::with_capacity(NonZeroUsize::new(1).unwrap()); + + // pathinfo_1 should not be there + assert!(svc + .get(*PATHINFO_1_DIGEST) + .await + .expect("no error") + .is_none()); + + // insert it + svc.put(PATHINFO_1.clone()).await.expect("no error"); + + // now it should be there. + assert_eq!( + Some(PATHINFO_1.clone()), + svc.get(*PATHINFO_1_DIGEST).await.expect("no error") + ); + + // insert pathinfo_2. This will evict pathinfo 1 + svc.put(PATHINFO_2.clone()).await.expect("no error"); + + // now pathinfo 2 should be there. + assert_eq!( + Some(PATHINFO_2.clone()), + svc.get(*PATHINFO_2_DIGEST).await.expect("no error") + ); + + // … but pathinfo 1 not anymore. + assert!(svc + .get(*PATHINFO_1_DIGEST) + .await + .expect("no error") + .is_none()); + } +} diff --git a/tvix/store/src/pathinfoservice/mod.rs b/tvix/store/src/pathinfoservice/mod.rs index 64c54c7267..5f5da2bff2 100644 --- a/tvix/store/src/pathinfoservice/mod.rs +++ b/tvix/store/src/pathinfoservice/mod.rs @@ -1,5 +1,6 @@ mod from_addr; mod grpc; +mod lru; mod memory; mod nix_http; mod sled; @@ -18,6 +19,7 @@ use crate::proto::PathInfo; pub use self::from_addr::from_addr; pub use self::grpc::GRPCPathInfoService; +pub use self::lru::LruPathInfoService; pub use self::memory::MemoryPathInfoService; pub use self::nix_http::NixHTTPPathInfoService; pub use self::sled::SledPathInfoService; -- cgit 1.4.1