From d378111d774777f4a5f319946d26e3d481a49870 Mon Sep 17 00:00:00 2001 From: Aspen Smith Date: Sun, 28 Jul 2024 12:50:09 -0400 Subject: feat(tvix/eval): Store hash in key of interner Rather than storing the leaked allocation for the string as the key in the interner, store the hash (using NoHashHashBuilder). I thought this would improve performance, but it doesn't: hello outpath time: [736.85 ms 748.42 ms 760.42 ms] change: [-2.0754% +0.4798% +2.7096%] (p = 0.72 > 0.05) No change in performance detected. but it at least doesn't *hurt* performance, and it *does* avoid an `unsafe`, so it's probably net good. Change-Id: Ie413955bdb6f04b1f468f511e5ebce56e329fa37 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12049 Tested-by: BuildkiteCI Reviewed-by: flokli Autosubmit: aspen --- tvix/eval/src/value/string.rs | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'tvix/eval/src/value') diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index 0365f5a9be77..247d23a7c895 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -4,18 +4,20 @@ //! level, allowing us to shave off some memory overhead and only //! paying the cost when creating new strings. use bstr::{BStr, BString, ByteSlice, Chars}; +use nohash_hasher::BuildNoHashHasher; use rnix::ast; -use rustc_hash::{FxHashMap, FxHashSet}; +use rustc_hash::FxHashSet; +use rustc_hash::FxHasher; use std::alloc::dealloc; use std::alloc::{alloc, handle_alloc_error, Layout}; use std::borrow::{Borrow, Cow}; use std::cell::RefCell; use std::ffi::c_void; use std::fmt::{self, Debug, Display}; -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use std::ops::Deref; use std::ptr::{self, NonNull}; -use std::{mem, slice}; +use std::slice; use serde::de::{Deserializer, Visitor}; use serde::{Deserialize, Serialize}; @@ -399,22 +401,33 @@ impl NixStringInner { #[derive(Default)] struct InternerInner { - map: FxHashMap<&'static [u8], NonNull>, + #[allow(clippy::disallowed_types)] // Not using the default hasher + map: std::collections::HashMap, BuildNoHashHasher>, #[cfg(feature = "no_leak")] + #[allow(clippy::disallowed_types)] // Not using the default hasher interned_strings: FxHashSet>, } unsafe impl Send for InternerInner {} +fn hash(s: T) -> u64 +where + T: Hash, +{ + let mut hasher = FxHasher::default(); + s.hash(&mut hasher); + hasher.finish() +} + impl InternerInner { pub fn intern(&mut self, s: &[u8]) -> NixString { - if let Some(s) = self.map.get(s) { + let hash = hash(s); + if let Some(s) = self.map.get(&hash) { return NixString(*s); } let string = NixString::new_inner(s, None); - self.map - .insert(unsafe { mem::transmute(string.as_bytes()) }, string.0); + self.map.insert(hash, string.0); #[cfg(feature = "no_leak")] self.interned_strings.insert(string.0); string -- cgit 1.4.1