diff options
author | edef <edef@edef.eu> | 2024-10-19T13·58+0000 |
---|---|---|
committer | edef <edef@edef.eu> | 2024-10-19T17·01+0000 |
commit | 201d8f0cf2dd3b959bde91012c62ea5d3d714820 (patch) | |
tree | 3584459985d9268bf9ddf020919683efadb9a9df /users/edef | |
parent | 06d2536eec88bfcfd2388e3ca153ba99815b7e97 (diff) |
feat(users/edef/weave): use FxHashSet and dedupe early r/8846
Deduping early saves a fair bit of memory, but the extra hashing is costly. We switch to FxHash, since we don't need a DoS-proof hash, but we do need it to be *fast*. Change-Id: Ic6b7010874c417862baa9b882593208c8dd1d5e6 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12648 Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'users/edef')
-rw-r--r-- | users/edef/weave/Cargo.lock | 7 | ||||
-rw-r--r-- | users/edef/weave/Cargo.nix | 19 | ||||
-rw-r--r-- | users/edef/weave/Cargo.toml | 1 | ||||
-rw-r--r-- | users/edef/weave/src/main.rs | 10 |
4 files changed, 33 insertions, 4 deletions
diff --git a/users/edef/weave/Cargo.lock b/users/edef/weave/Cargo.lock index 191059ffd729..fb7a02fb1340 100644 --- a/users/edef/weave/Cargo.lock +++ b/users/edef/weave/Cargo.lock @@ -1557,6 +1557,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + +[[package]] name = "rustc_version" version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -2027,6 +2033,7 @@ dependencies = [ "nix-compat", "polars", "rayon", + "rustc-hash", "safer_owning_ref", ] diff --git a/users/edef/weave/Cargo.nix b/users/edef/weave/Cargo.nix index c34a03b6876b..0fd5eeed1724 100644 --- a/users/edef/weave/Cargo.nix +++ b/users/edef/weave/Cargo.nix @@ -5446,6 +5446,21 @@ rec { "rustc-dep-of-std" = [ "core" "compiler_builtins" ]; }; }; + "rustc-hash" = rec { + crateName = "rustc-hash"; + version = "2.0.0"; + edition = "2021"; + sha256 = "0lni0lf846bzrf3jvci6jaf4142n1mdqxvcpczk5ch9pfgyk8c2q"; + libName = "rustc_hash"; + authors = [ + "The Rust Project Developers" + ]; + features = { + "default" = [ "std" ]; + "rand" = [ "dep:rand" "std" ]; + }; + resolvedDefaultFeatures = [ "default" "std" ]; + }; "rustc_version" = rec { crateName = "rustc_version"; version = "0.4.1"; @@ -6767,6 +6782,10 @@ rec { packageId = "rayon"; } { + name = "rustc-hash"; + packageId = "rustc-hash"; + } + { name = "safer_owning_ref"; packageId = "safer_owning_ref"; } diff --git a/users/edef/weave/Cargo.toml b/users/edef/weave/Cargo.toml index 72a205f66914..55b4e187defe 100644 --- a/users/edef/weave/Cargo.toml +++ b/users/edef/weave/Cargo.toml @@ -13,6 +13,7 @@ hashbrown = "0.14.3" nix-compat = { version = "0.1.0", path = "../../../tvix/nix-compat" } safer_owning_ref = "0.5.0" rayon = "1.8.1" +rustc-hash = "2.0.0" [dependencies.polars] version = "0.36.2" diff --git a/users/edef/weave/src/main.rs b/users/edef/weave/src/main.rs index c86725003275..b86992c279d1 100644 --- a/users/edef/weave/src/main.rs +++ b/users/edef/weave/src/main.rs @@ -8,8 +8,9 @@ use anyhow::Result; use hashbrown::{hash_table, HashTable}; use rayon::prelude::*; +use rustc_hash::FxHashSet; use std::{ - collections::{BTreeMap, HashSet}, + collections::BTreeMap, fs::File, ops::Index, sync::atomic::{AtomicU32, Ordering}, @@ -54,7 +55,8 @@ fn main() -> Result<()> { eprintln!("{DONE}"); } - let mut todo = HashSet::with_capacity(roots.len()); + let mut todo = FxHashSet::default(); + todo.reserve(roots.len()); { let mut unknown_roots = 0usize; for (_, idx) in roots.table { @@ -99,14 +101,14 @@ fn main() -> Result<()> { .par_iter() .flat_map(|&parent| { if parent == INDEX_NULL { - return vec![]; + return FxHashSet::default(); } ri_array[parent as usize] .iter() .cloned() .filter(|child| !seen.contains(child)) - .collect::<Vec<u32>>() + .collect::<FxHashSet<u32>>() }) .collect(); |