about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAspen Smith <root@gws.fyi>2024-07-28T16·50-0400
committerclbot <clbot@tvl.fyi>2024-08-10T15·28+0000
commitd378111d774777f4a5f319946d26e3d481a49870 (patch)
tree50f3f40d24e00ab3f5d9e5a7ea0dc3b543ba34d3
parenta6d6fc418de0cdce921698410265fa78ca6b7f23 (diff)
feat(tvix/eval): Store hash in key of interner r/8474
Rather than storing the leaked allocation for the string as the key in
the interner, store the hash (using NoHashHashBuilder). I thought this
would improve performance, but it doesn't:

hello outpath           time:   [736.85 ms 748.42 ms 760.42 ms]
                        change: [-2.0754% +0.4798% +2.7096%] (p = 0.72 > 0.05)
                        No change in performance detected.

but it at least doesn't *hurt* performance, and it *does* avoid an
`unsafe`, so it's probably net good.

Change-Id: Ie413955bdb6f04b1f468f511e5ebce56e329fa37
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12049
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Autosubmit: aspen <root@gws.fyi>
-rw-r--r--tvix/Cargo.lock7
-rw-r--r--tvix/Cargo.nix17
-rw-r--r--tvix/eval/Cargo.toml1
-rw-r--r--tvix/eval/src/value/string.rs27
-rw-r--r--web/tvixbolt/Cargo.lock7
-rw-r--r--web/tvixbolt/Cargo.nix17
6 files changed, 69 insertions, 7 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index 9bea027e4de3..e92803e99032 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -2401,6 +2401,12 @@ dependencies = [
 ]
 
 [[package]]
+name = "nohash-hasher"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"
+
+[[package]]
 name = "nom"
 version = "7.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -4907,6 +4913,7 @@ dependencies = [
  "lexical-core",
  "md-5",
  "mimalloc",
+ "nohash-hasher",
  "os_str_bytes",
  "path-clean",
  "pretty_assertions",
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index 9df3c8af8ef5..33805ced0726 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -7525,6 +7525,19 @@ rec {
         };
         resolvedDefaultFeatures = [ "async" "default" "pin-project-lite" "tokio" "wire" ];
       };
+      "nohash-hasher" = rec {
+        crateName = "nohash-hasher";
+        version = "0.2.0";
+        edition = "2018";
+        sha256 = "0lf4p6k01w4wm7zn4grnihzj8s7zd5qczjmzng7wviwxawih5x9b";
+        authors = [
+          "Parity Technologies <admin@parity.io>"
+        ];
+        features = {
+          "default" = [ "std" ];
+        };
+        resolvedDefaultFeatures = [ "default" "std" ];
+      };
       "nom" = rec {
         crateName = "nom";
         version = "7.1.3";
@@ -16043,6 +16056,10 @@ rec {
             packageId = "md-5";
           }
           {
+            name = "nohash-hasher";
+            packageId = "nohash-hasher";
+          }
+          {
             name = "os_str_bytes";
             packageId = "os_str_bytes";
             features = [ "conversions" ];
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml
index 9637165e8500..70a356376622 100644
--- a/tvix/eval/Cargo.toml
+++ b/tvix/eval/Cargo.toml
@@ -35,6 +35,7 @@ sha1 = "0.10.6"
 md-5 = "0.10.6"
 data-encoding = "2.6.0"
 rustc-hash = "2.0.0"
+nohash-hasher = "0.2.0"
 
 [dev-dependencies]
 criterion = "0.5"
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs
index 0365f5a9be77..247d23a7c895 100644
--- a/tvix/eval/src/value/string.rs
+++ b/tvix/eval/src/value/string.rs
@@ -4,18 +4,20 @@
 //! level, allowing us to shave off some memory overhead and only
 //! paying the cost when creating new strings.
 use bstr::{BStr, BString, ByteSlice, Chars};
+use nohash_hasher::BuildNoHashHasher;
 use rnix::ast;
-use rustc_hash::{FxHashMap, FxHashSet};
+use rustc_hash::FxHashSet;
+use rustc_hash::FxHasher;
 use std::alloc::dealloc;
 use std::alloc::{alloc, handle_alloc_error, Layout};
 use std::borrow::{Borrow, Cow};
 use std::cell::RefCell;
 use std::ffi::c_void;
 use std::fmt::{self, Debug, Display};
-use std::hash::Hash;
+use std::hash::{Hash, Hasher};
 use std::ops::Deref;
 use std::ptr::{self, NonNull};
-use std::{mem, slice};
+use std::slice;
 
 use serde::de::{Deserializer, Visitor};
 use serde::{Deserialize, Serialize};
@@ -399,22 +401,33 @@ impl NixStringInner {
 
 #[derive(Default)]
 struct InternerInner {
-    map: FxHashMap<&'static [u8], NonNull<c_void>>,
+    #[allow(clippy::disallowed_types)] // Not using the default hasher
+    map: std::collections::HashMap<u64, NonNull<c_void>, BuildNoHashHasher<u64>>,
     #[cfg(feature = "no_leak")]
+    #[allow(clippy::disallowed_types)] // Not using the default hasher
     interned_strings: FxHashSet<NonNull<c_void>>,
 }
 
 unsafe impl Send for InternerInner {}
 
+fn hash<T>(s: T) -> u64
+where
+    T: Hash,
+{
+    let mut hasher = FxHasher::default();
+    s.hash(&mut hasher);
+    hasher.finish()
+}
+
 impl InternerInner {
     pub fn intern(&mut self, s: &[u8]) -> NixString {
-        if let Some(s) = self.map.get(s) {
+        let hash = hash(s);
+        if let Some(s) = self.map.get(&hash) {
             return NixString(*s);
         }
 
         let string = NixString::new_inner(s, None);
-        self.map
-            .insert(unsafe { mem::transmute(string.as_bytes()) }, string.0);
+        self.map.insert(hash, string.0);
         #[cfg(feature = "no_leak")]
         self.interned_strings.insert(string.0);
         string
diff --git a/web/tvixbolt/Cargo.lock b/web/tvixbolt/Cargo.lock
index 82162469dee9..c12647b97e57 100644
--- a/web/tvixbolt/Cargo.lock
+++ b/web/tvixbolt/Cargo.lock
@@ -959,6 +959,12 @@ dependencies = [
 ]
 
 [[package]]
+name = "nohash-hasher"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451"
+
+[[package]]
 name = "nom8"
 version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1562,6 +1568,7 @@ dependencies = [
  "lazy_static",
  "lexical-core",
  "md-5",
+ "nohash-hasher",
  "os_str_bytes",
  "path-clean",
  "regex",
diff --git a/web/tvixbolt/Cargo.nix b/web/tvixbolt/Cargo.nix
index a2ad6cc33e80..68c34901a10b 100644
--- a/web/tvixbolt/Cargo.nix
+++ b/web/tvixbolt/Cargo.nix
@@ -2939,6 +2939,19 @@ rec {
           "simd-adler32" = [ "dep:simd-adler32" ];
         };
       };
+      "nohash-hasher" = rec {
+        crateName = "nohash-hasher";
+        version = "0.2.0";
+        edition = "2018";
+        sha256 = "0lf4p6k01w4wm7zn4grnihzj8s7zd5qczjmzng7wviwxawih5x9b";
+        authors = [
+          "Parity Technologies <admin@parity.io>"
+        ];
+        features = {
+          "default" = [ "std" ];
+        };
+        resolvedDefaultFeatures = [ "default" "std" ];
+      };
       "nom8" = rec {
         crateName = "nom8";
         version = "0.2.0";
@@ -4636,6 +4649,10 @@ rec {
             packageId = "md-5";
           }
           {
+            name = "nohash-hasher";
+            packageId = "nohash-hasher";
+          }
+          {
             name = "os_str_bytes";
             packageId = "os_str_bytes";
             features = [ "conversions" ];