diff options
author | Aspen Smith <root@gws.fyi> | 2024-07-28T16·11-0400 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-08-07T12·38+0000 |
commit | b8f92a6d535af09c24ac887855eb230ca25af1ed (patch) | |
tree | 82cea6a4d3979e0c48e9f97285b8564a24e9ceb0 /tvix/eval | |
parent | 1d7ba89c19b231898a997f1af3c13ed8c7247793 (diff) |
feat(tvix/eval): Forbid Hash{Map,Set}, use Fx instead r/8453
Per https://nnethercote.github.io/perf-book/hashing.html, we have basically no reason to use the default hasher over a faster, non-DoS-resistant hasher. This gives a nice perf boost basically for free: hello outpath time: [704.76 ms 714.91 ms 725.63 ms] change: [-7.2391% -6.1018% -4.9189%] (p = 0.00 < 0.05) Performance has improved. Change-Id: If5587f444ed3af69f8af4eead6af3ea303b4ae68 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12046 Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de> Reviewed-by: Ilan Joselevich <personal@ilanjoselevich.com> Autosubmit: aspen <root@gws.fyi>
Diffstat (limited to 'tvix/eval')
-rw-r--r-- | tvix/eval/Cargo.toml | 1 | ||||
-rw-r--r-- | tvix/eval/clippy.toml | 3 | ||||
-rw-r--r-- | tvix/eval/src/builtins/mod.rs | 7 | ||||
-rw-r--r-- | tvix/eval/src/compiler/bindings.rs | 2 | ||||
-rw-r--r-- | tvix/eval/src/compiler/mod.rs | 13 | ||||
-rw-r--r-- | tvix/eval/src/compiler/scope.rs | 8 | ||||
-rw-r--r-- | tvix/eval/src/lib.rs | 10 | ||||
-rw-r--r-- | tvix/eval/src/value/string.rs | 18 | ||||
-rw-r--r-- | tvix/eval/src/value/thunk.rs | 4 | ||||
-rw-r--r-- | tvix/eval/src/vm/mod.rs | 5 |
10 files changed, 41 insertions, 30 deletions
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index 3ebb13f23ce1..cd8d01cbb10a 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -34,6 +34,7 @@ sha2 = "0.10.8" sha1 = "0.10.6" md-5 = "0.10.6" data-encoding = "2.6.0" +rustc-hash = "2.0.0" [dev-dependencies] criterion = "0.5" diff --git a/tvix/eval/clippy.toml b/tvix/eval/clippy.toml new file mode 100644 index 000000000000..c5302112b3ae --- /dev/null +++ b/tvix/eval/clippy.toml @@ -0,0 +1,3 @@ +# See https://nnethercote.github.io/perf-book/hashing.html. Use FxHashMap and +# FxHashSet, not HashMap and HashSet +disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"] diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 88bed35d8a7e..e961738e3653 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -9,8 +9,8 @@ use genawaiter::rc::Gen; use imbl::OrdMap; use regex::Regex; use std::cmp::{self, Ordering}; +use std::collections::BTreeMap; use std::collections::VecDeque; -use std::collections::{BTreeMap, HashSet}; use std::path::PathBuf; use crate::arithmetic_op; @@ -88,6 +88,7 @@ mod pure_builtins { use imbl::Vector; use itertools::Itertools; use os_str_bytes::OsStringBytes; + use rustc_hash::FxHashSet; use crate::{value::PointerEquality, AddContext, NixContext, NixContextElement}; @@ -700,7 +701,7 @@ mod pure_builtins { // // In this implementation, we do none of that, no syntax checks, no realization. // The next `TODO` are the checks that Nix implements. - let mut ctx_elements: HashSet<NixContextElement> = HashSet::new(); + let mut ctx_elements: FxHashSet<NixContextElement> = FxHashSet::default(); let span = generators::request_span(&co).await; let origin = origin .coerce_to_string( @@ -1115,7 +1116,7 @@ mod pure_builtins { .to_list()? .into_iter() .map(|v| v.to_str()) - .collect::<Result<HashSet<_>, _>>()?; + .collect::<Result<FxHashSet<_>, _>>()?; let res = attrs.iter().filter_map(|(k, v)| { if !keys.contains(k) { Some((k.clone(), v.clone())) diff --git a/tvix/eval/src/compiler/bindings.rs b/tvix/eval/src/compiler/bindings.rs index 60203ba5d94b..f5c6376eb1b3 100644 --- a/tvix/eval/src/compiler/bindings.rs +++ b/tvix/eval/src/compiler/bindings.rs @@ -560,7 +560,7 @@ impl Compiler<'_, '_> { /// Emit definitions for all variables in the top-level global env passed to the evaluation (eg /// local variables in the REPL) - pub(super) fn compile_env(&mut self, env: &HashMap<SmolStr, Value>) { + pub(super) fn compile_env(&mut self, env: &FxHashMap<SmolStr, Value>) { for (name, value) in env { self.scope_mut().declare_constant(name.to_string()); self.emit_constant(value.clone(), &EntireFile); diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 3a25052aabb2..4878db1f1a84 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -20,8 +20,9 @@ mod scope; use codemap::Span; use rnix::ast::{self, AstToken}; +use rustc_hash::FxHashMap; use smol_str::SmolStr; -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::rc::{Rc, Weak}; @@ -117,7 +118,7 @@ impl TrackedFormal { /// The map of globally available functions and other values that /// should implicitly be resolvable in the global scope. -pub type GlobalsMap = HashMap<&'static str, Value>; +pub type GlobalsMap = FxHashMap<&'static str, Value>; /// Set of builtins that (if they exist) should be made available in /// the global scope, meaning that they can be accessed not just @@ -187,7 +188,7 @@ impl<'source, 'observer> Compiler<'source, 'observer> { pub(crate) fn new( location: Option<PathBuf>, globals: Rc<GlobalsMap>, - env: Option<&HashMap<SmolStr, Value>>, + env: Option<&FxHashMap<SmolStr, Value>>, source: &'source SourceCode, file: &'source codemap::File, observer: &'observer mut dyn CompilerObserver, @@ -1588,7 +1589,7 @@ pub fn prepare_globals( Rc::new_cyclic(Box::new(move |weak: &Weak<GlobalsMap>| { // First step is to construct the builtins themselves as // `NixAttrs`. - let mut builtins: GlobalsMap = HashMap::from_iter(builtins); + let mut builtins: GlobalsMap = FxHashMap::from_iter(builtins); // At this point, optionally insert `import` if enabled. To // "tie the knot" of `import` needing the full set of globals @@ -1601,7 +1602,7 @@ pub fn prepare_globals( // Next, the actual map of globals which the compiler will use // to resolve identifiers is constructed. - let mut globals: GlobalsMap = HashMap::new(); + let mut globals: GlobalsMap = FxHashMap::default(); // builtins contain themselves (`builtins.builtins`), which we // can resolve by manually constructing a suspended thunk that @@ -1654,7 +1655,7 @@ pub fn compile( expr: &ast::Expr, location: Option<PathBuf>, globals: Rc<GlobalsMap>, - env: Option<&HashMap<SmolStr, Value>>, + env: Option<&FxHashMap<SmolStr, Value>>, source: &SourceCode, file: &codemap::File, observer: &mut dyn CompilerObserver, diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs index 7b0a66004a7f..1087e0153e42 100644 --- a/tvix/eval/src/compiler/scope.rs +++ b/tvix/eval/src/compiler/scope.rs @@ -10,10 +10,8 @@ //! stack indices. To do this, the compiler simulates where locals //! will be at runtime using the data structures implemented here. -use std::{ - collections::{hash_map, HashMap}, - ops::Index, -}; +use rustc_hash::FxHashMap; +use std::{collections::hash_map, ops::Index}; use smol_str::SmolStr; @@ -168,7 +166,7 @@ pub struct Scope { pub upvalues: Vec<Upvalue>, /// Secondary by-name index over locals. - by_name: HashMap<String, ByName>, + by_name: FxHashMap<String, ByName>, /// How many scopes "deep" are these locals? scope_depth: usize, diff --git a/tvix/eval/src/lib.rs b/tvix/eval/src/lib.rs index a53a2a02d140..6897e1dd494f 100644 --- a/tvix/eval/src/lib.rs +++ b/tvix/eval/src/lib.rs @@ -36,7 +36,7 @@ mod test_utils; #[cfg(test)] mod tests; -use std::collections::HashMap; +use rustc_hash::FxHashMap; use std::path::PathBuf; use std::rc::Rc; use std::str::FromStr; @@ -86,7 +86,7 @@ enum BuilderGlobals { pub struct EvaluationBuilder<'co, 'ro, 'env, IO> { source_map: Option<SourceCode>, globals: BuilderGlobals, - env: Option<&'env HashMap<SmolStr, Value>>, + env: Option<&'env FxHashMap<SmolStr, Value>>, io_handle: IO, enable_import: bool, strict: bool, @@ -264,7 +264,7 @@ impl<'co, 'ro, 'env, IO> EvaluationBuilder<'co, 'ro, 'env, IO> { Self { nix_path, ..self } } - pub fn env(self, env: Option<&'env HashMap<SmolStr, Value>>) -> Self { + pub fn env(self, env: Option<&'env FxHashMap<SmolStr, Value>>) -> Self { Self { env, ..self } } @@ -352,7 +352,7 @@ pub struct Evaluation<'co, 'ro, 'env, IO> { globals: Rc<GlobalsMap>, /// Top-level variables to define in the evaluation - env: Option<&'env HashMap<SmolStr, Value>>, + env: Option<&'env FxHashMap<SmolStr, Value>>, /// Implementation of file-IO to use during evaluation, e.g. for /// impure builtins. @@ -570,7 +570,7 @@ fn parse_compile_internal( location: Option<PathBuf>, source: SourceCode, globals: Rc<GlobalsMap>, - env: Option<&HashMap<SmolStr, Value>>, + env: Option<&FxHashMap<SmolStr, Value>>, compiler_observer: &mut dyn CompilerObserver, ) -> Option<Rc<Lambda>> { let parsed = rnix::ast::Root::parse(code); diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index 3e991890ea5a..5c1f31db7eea 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -5,9 +5,9 @@ //! paying the cost when creating new strings. use bstr::{BStr, BString, ByteSlice, Chars}; use rnix::ast; +use rustc_hash::FxHashSet; use std::alloc::{alloc, dealloc, handle_alloc_error, Layout}; use std::borrow::{Borrow, Cow}; -use std::collections::HashSet; use std::ffi::c_void; use std::fmt::{self, Debug, Display}; use std::hash::Hash; @@ -40,23 +40,29 @@ pub enum NixContextElement { /// operations, e.g. concatenation, interpolation and other string operations. #[repr(transparent)] #[derive(Clone, Debug, Serialize, Default)] -pub struct NixContext(HashSet<NixContextElement>); +pub struct NixContext(FxHashSet<NixContextElement>); impl From<NixContextElement> for NixContext { fn from(value: NixContextElement) -> Self { - Self([value].into()) + let mut set = FxHashSet::default(); + set.insert(value); + Self(set) } } -impl From<HashSet<NixContextElement>> for NixContext { - fn from(value: HashSet<NixContextElement>) -> Self { +impl From<FxHashSet<NixContextElement>> for NixContext { + fn from(value: FxHashSet<NixContextElement>) -> Self { Self(value) } } impl<const N: usize> From<[NixContextElement; N]> for NixContext { fn from(value: [NixContextElement; N]) -> Self { - Self(HashSet::from(value)) + let mut set = FxHashSet::default(); + for elt in value { + set.insert(elt); + } + Self(set) } } diff --git a/tvix/eval/src/value/thunk.rs b/tvix/eval/src/value/thunk.rs index 6d1fe1b8a7cc..2019e8ab3239 100644 --- a/tvix/eval/src/value/thunk.rs +++ b/tvix/eval/src/value/thunk.rs @@ -18,9 +18,9 @@ //! object, but when forcing a thunk, the runtime *must* mutate the //! memoisable slot. +use rustc_hash::FxHashSet; use std::{ cell::{Ref, RefCell, RefMut}, - collections::HashSet, fmt::Debug, rc::Rc, }; @@ -413,7 +413,7 @@ impl TotalDisplay for Thunk { /// The inner `HashSet` is not available on the outside, as it would be /// potentially unsafe to interact with the pointers in the set. #[derive(Default)] -pub struct ThunkSet(HashSet<*const ThunkRepr>); +pub struct ThunkSet(FxHashSet<*const ThunkRepr>); impl ThunkSet { /// Check whether the given thunk has already been seen. Will mark the thunk diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 65117b79a0bc..a6d0941e8d7a 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -14,8 +14,9 @@ mod macros; use bstr::{BString, ByteSlice, ByteVec}; use codemap::Span; +use rustc_hash::FxHashMap; use serde_json::json; -use std::{cmp::Ordering, collections::HashMap, ops::DerefMut, path::PathBuf, rc::Rc}; +use std::{cmp::Ordering, ops::DerefMut, path::PathBuf, rc::Rc}; use crate::{ arithmetic_op, @@ -211,7 +212,7 @@ impl Frame { } #[derive(Default)] -struct ImportCache(HashMap<PathBuf, Value>); +struct ImportCache(FxHashMap<PathBuf, Value>); /// The `ImportCache` holds the `Value` resulting from `import`ing a certain /// file, so that the same file doesn't need to be re-evaluated multiple times. |