about summary refs log tree commit diff
path: root/users
diff options
context:
space:
mode:
authoredef <edef@edef.eu>2024-10-19T13·58+0000
committeredef <edef@edef.eu>2024-10-19T17·01+0000
commit201d8f0cf2dd3b959bde91012c62ea5d3d714820 (patch)
tree3584459985d9268bf9ddf020919683efadb9a9df /users
parent06d2536eec88bfcfd2388e3ca153ba99815b7e97 (diff)
feat(users/edef/weave): use FxHashSet and dedupe early r/8846
Deduping early saves a fair bit of memory, but the extra hashing is
costly.

We switch to FxHash, since we don't need a DoS-proof hash, but we do
need it to be *fast*.

Change-Id: Ic6b7010874c417862baa9b882593208c8dd1d5e6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12648
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'users')
-rw-r--r--users/edef/weave/Cargo.lock7
-rw-r--r--users/edef/weave/Cargo.nix19
-rw-r--r--users/edef/weave/Cargo.toml1
-rw-r--r--users/edef/weave/src/main.rs10
4 files changed, 33 insertions, 4 deletions
diff --git a/users/edef/weave/Cargo.lock b/users/edef/weave/Cargo.lock
index 191059ffd729..fb7a02fb1340 100644
--- a/users/edef/weave/Cargo.lock
+++ b/users/edef/weave/Cargo.lock
@@ -1557,6 +1557,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
 
 [[package]]
+name = "rustc-hash"
+version = "2.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152"
+
+[[package]]
 name = "rustc_version"
 version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2027,6 +2033,7 @@ dependencies = [
  "nix-compat",
  "polars",
  "rayon",
+ "rustc-hash",
  "safer_owning_ref",
 ]
 
diff --git a/users/edef/weave/Cargo.nix b/users/edef/weave/Cargo.nix
index c34a03b6876b..0fd5eeed1724 100644
--- a/users/edef/weave/Cargo.nix
+++ b/users/edef/weave/Cargo.nix
@@ -5446,6 +5446,21 @@ rec {
           "rustc-dep-of-std" = [ "core" "compiler_builtins" ];
         };
       };
+      "rustc-hash" = rec {
+        crateName = "rustc-hash";
+        version = "2.0.0";
+        edition = "2021";
+        sha256 = "0lni0lf846bzrf3jvci6jaf4142n1mdqxvcpczk5ch9pfgyk8c2q";
+        libName = "rustc_hash";
+        authors = [
+          "The Rust Project Developers"
+        ];
+        features = {
+          "default" = [ "std" ];
+          "rand" = [ "dep:rand" "std" ];
+        };
+        resolvedDefaultFeatures = [ "default" "std" ];
+      };
       "rustc_version" = rec {
         crateName = "rustc_version";
         version = "0.4.1";
@@ -6767,6 +6782,10 @@ rec {
             packageId = "rayon";
           }
           {
+            name = "rustc-hash";
+            packageId = "rustc-hash";
+          }
+          {
             name = "safer_owning_ref";
             packageId = "safer_owning_ref";
           }
diff --git a/users/edef/weave/Cargo.toml b/users/edef/weave/Cargo.toml
index 72a205f66914..55b4e187defe 100644
--- a/users/edef/weave/Cargo.toml
+++ b/users/edef/weave/Cargo.toml
@@ -13,6 +13,7 @@ hashbrown = "0.14.3"
 nix-compat = { version = "0.1.0", path = "../../../tvix/nix-compat" }
 safer_owning_ref = "0.5.0"
 rayon = "1.8.1"
+rustc-hash = "2.0.0"
 
 [dependencies.polars]
 version = "0.36.2"
diff --git a/users/edef/weave/src/main.rs b/users/edef/weave/src/main.rs
index c86725003275..b86992c279d1 100644
--- a/users/edef/weave/src/main.rs
+++ b/users/edef/weave/src/main.rs
@@ -8,8 +8,9 @@
 use anyhow::Result;
 use hashbrown::{hash_table, HashTable};
 use rayon::prelude::*;
+use rustc_hash::FxHashSet;
 use std::{
-    collections::{BTreeMap, HashSet},
+    collections::BTreeMap,
     fs::File,
     ops::Index,
     sync::atomic::{AtomicU32, Ordering},
@@ -54,7 +55,8 @@ fn main() -> Result<()> {
         eprintln!("{DONE}");
     }
 
-    let mut todo = HashSet::with_capacity(roots.len());
+    let mut todo = FxHashSet::default();
+    todo.reserve(roots.len());
     {
         let mut unknown_roots = 0usize;
         for (_, idx) in roots.table {
@@ -99,14 +101,14 @@ fn main() -> Result<()> {
             .par_iter()
             .flat_map(|&parent| {
                 if parent == INDEX_NULL {
-                    return vec![];
+                    return FxHashSet::default();
                 }
 
                 ri_array[parent as usize]
                     .iter()
                     .cloned()
                     .filter(|child| !seen.contains(child))
-                    .collect::<Vec<u32>>()
+                    .collect::<FxHashSet<u32>>()
             })
             .collect();