diff options
author | Aspen Smith <root@gws.fyi> | 2024-07-28T16·11-0400 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-08-07T12·38+0000 |
commit | b8f92a6d535af09c24ac887855eb230ca25af1ed (patch) | |
tree | 82cea6a4d3979e0c48e9f97285b8564a24e9ceb0 /tvix | |
parent | 1d7ba89c19b231898a997f1af3c13ed8c7247793 (diff) |
feat(tvix/eval): Forbid Hash{Map,Set}, use Fx instead r/8453
Per https://nnethercote.github.io/perf-book/hashing.html, we have basically no reason to use the default hasher over a faster, non-DoS-resistant hasher. This gives a nice perf boost basically for free: hello outpath time: [704.76 ms 714.91 ms 725.63 ms] change: [-7.2391% -6.1018% -4.9189%] (p = 0.00 < 0.05) Performance has improved. Change-Id: If5587f444ed3af69f8af4eead6af3ea303b4ae68 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12046 Tested-by: BuildkiteCI Reviewed-by: flokli <flokli@flokli.de> Reviewed-by: Ilan Joselevich <personal@ilanjoselevich.com> Autosubmit: aspen <root@gws.fyi>
Diffstat (limited to 'tvix')
-rw-r--r-- | tvix/Cargo.lock | 14 | ||||
-rw-r--r-- | tvix/Cargo.nix | 30 | ||||
-rw-r--r-- | tvix/cli/Cargo.toml | 1 | ||||
-rw-r--r-- | tvix/cli/src/lib.rs | 8 | ||||
-rw-r--r-- | tvix/cli/src/repl.rs | 7 | ||||
-rw-r--r-- | tvix/eval/Cargo.toml | 1 | ||||
-rw-r--r-- | tvix/eval/clippy.toml | 3 | ||||
-rw-r--r-- | tvix/eval/src/builtins/mod.rs | 7 | ||||
-rw-r--r-- | tvix/eval/src/compiler/bindings.rs | 2 | ||||
-rw-r--r-- | tvix/eval/src/compiler/mod.rs | 13 | ||||
-rw-r--r-- | tvix/eval/src/compiler/scope.rs | 8 | ||||
-rw-r--r-- | tvix/eval/src/lib.rs | 10 | ||||
-rw-r--r-- | tvix/eval/src/value/string.rs | 18 | ||||
-rw-r--r-- | tvix/eval/src/value/thunk.rs | 4 | ||||
-rw-r--r-- | tvix/eval/src/vm/mod.rs | 5 |
15 files changed, 88 insertions, 43 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index cb3ed4c096e8..8e5c67bc4307 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -3085,7 +3085,7 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash", + "rustc-hash 1.1.0", "rustls 0.23.7", "thiserror", "tokio", @@ -3101,7 +3101,7 @@ dependencies = [ "bytes", "rand", "ring", - "rustc-hash", + "rustc-hash 1.1.0", "rustls 0.23.7", "slab", "thiserror", @@ -3416,7 +3416,7 @@ dependencies = [ "countme", "hashbrown 0.14.3", "memoffset 0.9.0", - "rustc-hash", + "rustc-hash 1.1.0", "text-size", ] @@ -3474,6 +3474,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] +name = "rustc-hash" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" + +[[package]] name = "rustc_version" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" @@ -4864,6 +4870,7 @@ dependencies = [ "nix-compat", "rnix", "rowan", + "rustc-hash 2.0.0", "rustyline", "smol_str", "thiserror", @@ -4905,6 +4912,7 @@ dependencies = [ "rnix", "rowan", "rstest", + "rustc-hash 2.0.0", "serde", "serde_json", "sha1", diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 926145c31fca..23539daab988 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -9701,7 +9701,7 @@ rec { } { name = "rustc-hash"; - packageId = "rustc-hash"; + packageId = "rustc-hash 1.1.0"; } { name = "rustls"; @@ -9768,7 +9768,7 @@ rec { } { name = "rustc-hash"; - packageId = "rustc-hash"; + packageId = "rustc-hash 1.1.0"; } { name = "rustls"; @@ -10901,7 +10901,7 @@ rec { } { name = "rustc-hash"; - packageId = "rustc-hash"; + packageId = "rustc-hash 1.1.0"; } { name = "text-size"; @@ -11051,7 +11051,7 @@ rec { "rustc-dep-of-std" = [ "core" "compiler_builtins" ]; }; }; - "rustc-hash" = rec { + "rustc-hash 1.1.0" = rec { crateName = "rustc-hash"; version = "1.1.0"; edition = "2015"; @@ -11064,6 +11064,20 @@ rec { }; resolvedDefaultFeatures = [ "default" "std" ]; }; + "rustc-hash 2.0.0" = rec { + crateName = "rustc-hash"; + version = "2.0.0"; + edition = "2021"; + sha256 = "0lni0lf846bzrf3jvci6jaf4142n1mdqxvcpczk5ch9pfgyk8c2q"; + authors = [ + "The Rust Project Developers" + ]; + features = { + "default" = [ "std" ]; + "rand" = [ "dep:rand" "std" ]; + }; + resolvedDefaultFeatures = [ "default" "std" ]; + }; "rustc_version" = rec { crateName = "rustc_version"; version = "0.4.0"; @@ -15893,6 +15907,10 @@ rec { packageId = "rowan"; } { + name = "rustc-hash"; + packageId = "rustc-hash 2.0.0"; + } + { name = "rustyline"; packageId = "rustyline"; } @@ -16050,6 +16068,10 @@ rec { packageId = "rowan"; } { + name = "rustc-hash"; + packageId = "rustc-hash 2.0.0"; + } + { name = "serde"; packageId = "serde"; features = [ "rc" "derive" ]; diff --git a/tvix/cli/Cargo.toml b/tvix/cli/Cargo.toml index abbb7e6c2bd0..7f93da85d47a 100644 --- a/tvix/cli/Cargo.toml +++ b/tvix/cli/Cargo.toml @@ -26,6 +26,7 @@ thiserror = "1.0.38" tokio = "1.28.0" tracing = "0.1.40" tracing-indicatif = "0.3.6" +rustc-hash = "2.0.0" [dependencies.wu-manber] git = "https://github.com/tvlfyi/wu-manber.git" diff --git a/tvix/cli/src/lib.rs b/tvix/cli/src/lib.rs index 7686cc2c0fe8..2351da13a771 100644 --- a/tvix/cli/src/lib.rs +++ b/tvix/cli/src/lib.rs @@ -1,5 +1,7 @@ -use std::{collections::HashMap, path::PathBuf, rc::Rc}; +use std::path::PathBuf; +use std::rc::Rc; +use rustc_hash::FxHashMap; use smol_str::SmolStr; use std::fmt::Write; use tracing::{instrument, Span}; @@ -86,7 +88,7 @@ pub fn evaluate( path: Option<PathBuf>, args: &Args, allow_incomplete: AllowIncomplete, - env: Option<&HashMap<SmolStr, Value>>, + env: Option<&FxHashMap<SmolStr, Value>>, globals: Option<Rc<GlobalsMap>>, source_map: Option<SourceCode>, ) -> Result<EvalResult, IncompleteInput> { @@ -218,7 +220,7 @@ pub fn interpret( args: &Args, explain: bool, allow_incomplete: AllowIncomplete, - env: Option<&HashMap<SmolStr, Value>>, + env: Option<&FxHashMap<SmolStr, Value>>, globals: Option<Rc<GlobalsMap>>, source_map: Option<SourceCode>, ) -> Result<InterpretResult, IncompleteInput> { diff --git a/tvix/cli/src/repl.rs b/tvix/cli/src/repl.rs index 601b639154a6..e4b499609829 100644 --- a/tvix/cli/src/repl.rs +++ b/tvix/cli/src/repl.rs @@ -1,6 +1,7 @@ +use std::path::PathBuf; use std::rc::Rc; -use std::{collections::HashMap, path::PathBuf}; +use rustc_hash::FxHashMap; use rustyline::{error::ReadlineError, Editor}; use smol_str::SmolStr; use tvix_eval::{GlobalsMap, SourceCode, Value}; @@ -88,7 +89,7 @@ pub struct Repl<'a> { multiline_input: Option<String>, rl: Editor<()>, /// Local variables defined at the top-level in the repl - env: HashMap<SmolStr, Value>, + env: FxHashMap<SmolStr, Value>, io_handle: Rc<TvixStoreIO>, args: &'a Args, @@ -102,7 +103,7 @@ impl<'a> Repl<'a> { Self { multiline_input: None, rl, - env: HashMap::new(), + env: FxHashMap::default(), io_handle, args, source_map: Default::default(), diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index 3ebb13f23ce1..cd8d01cbb10a 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -34,6 +34,7 @@ sha2 = "0.10.8" sha1 = "0.10.6" md-5 = "0.10.6" data-encoding = "2.6.0" +rustc-hash = "2.0.0" [dev-dependencies] criterion = "0.5" diff --git a/tvix/eval/clippy.toml b/tvix/eval/clippy.toml new file mode 100644 index 000000000000..c5302112b3ae --- /dev/null +++ b/tvix/eval/clippy.toml @@ -0,0 +1,3 @@ +# See https://nnethercote.github.io/perf-book/hashing.html. Use FxHashMap and +# FxHashSet, not HashMap and HashSet +disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"] diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 88bed35d8a7e..e961738e3653 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -9,8 +9,8 @@ use genawaiter::rc::Gen; use imbl::OrdMap; use regex::Regex; use std::cmp::{self, Ordering}; +use std::collections::BTreeMap; use std::collections::VecDeque; -use std::collections::{BTreeMap, HashSet}; use std::path::PathBuf; use crate::arithmetic_op; @@ -88,6 +88,7 @@ mod pure_builtins { use imbl::Vector; use itertools::Itertools; use os_str_bytes::OsStringBytes; + use rustc_hash::FxHashSet; use crate::{value::PointerEquality, AddContext, NixContext, NixContextElement}; @@ -700,7 +701,7 @@ mod pure_builtins { // // In this implementation, we do none of that, no syntax checks, no realization. // The next `TODO` are the checks that Nix implements. - let mut ctx_elements: HashSet<NixContextElement> = HashSet::new(); + let mut ctx_elements: FxHashSet<NixContextElement> = FxHashSet::default(); let span = generators::request_span(&co).await; let origin = origin .coerce_to_string( @@ -1115,7 +1116,7 @@ mod pure_builtins { .to_list()? .into_iter() .map(|v| v.to_str()) - .collect::<Result<HashSet<_>, _>>()?; + .collect::<Result<FxHashSet<_>, _>>()?; let res = attrs.iter().filter_map(|(k, v)| { if !keys.contains(k) { Some((k.clone(), v.clone())) diff --git a/tvix/eval/src/compiler/bindings.rs b/tvix/eval/src/compiler/bindings.rs index 60203ba5d94b..f5c6376eb1b3 100644 --- a/tvix/eval/src/compiler/bindings.rs +++ b/tvix/eval/src/compiler/bindings.rs @@ -560,7 +560,7 @@ impl Compiler<'_, '_> { /// Emit definitions for all variables in the top-level global env passed to the evaluation (eg /// local variables in the REPL) - pub(super) fn compile_env(&mut self, env: &HashMap<SmolStr, Value>) { + pub(super) fn compile_env(&mut self, env: &FxHashMap<SmolStr, Value>) { for (name, value) in env { self.scope_mut().declare_constant(name.to_string()); self.emit_constant(value.clone(), &EntireFile); diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 3a25052aabb2..4878db1f1a84 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -20,8 +20,9 @@ mod scope; use codemap::Span; use rnix::ast::{self, AstToken}; +use rustc_hash::FxHashMap; use smol_str::SmolStr; -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::rc::{Rc, Weak}; @@ -117,7 +118,7 @@ impl TrackedFormal { /// The map of globally available functions and other values that /// should implicitly be resolvable in the global scope. -pub type GlobalsMap = HashMap<&'static str, Value>; +pub type GlobalsMap = FxHashMap<&'static str, Value>; /// Set of builtins that (if they exist) should be made available in /// the global scope, meaning that they can be accessed not just @@ -187,7 +188,7 @@ impl<'source, 'observer> Compiler<'source, 'observer> { pub(crate) fn new( location: Option<PathBuf>, globals: Rc<GlobalsMap>, - env: Option<&HashMap<SmolStr, Value>>, + env: Option<&FxHashMap<SmolStr, Value>>, source: &'source SourceCode, file: &'source codemap::File, observer: &'observer mut dyn CompilerObserver, @@ -1588,7 +1589,7 @@ pub fn prepare_globals( Rc::new_cyclic(Box::new(move |weak: &Weak<GlobalsMap>| { // First step is to construct the builtins themselves as // `NixAttrs`. - let mut builtins: GlobalsMap = HashMap::from_iter(builtins); + let mut builtins: GlobalsMap = FxHashMap::from_iter(builtins); // At this point, optionally insert `import` if enabled. To // "tie the knot" of `import` needing the full set of globals @@ -1601,7 +1602,7 @@ pub fn prepare_globals( // Next, the actual map of globals which the compiler will use // to resolve identifiers is constructed. - let mut globals: GlobalsMap = HashMap::new(); + let mut globals: GlobalsMap = FxHashMap::default(); // builtins contain themselves (`builtins.builtins`), which we // can resolve by manually constructing a suspended thunk that @@ -1654,7 +1655,7 @@ pub fn compile( expr: &ast::Expr, location: Option<PathBuf>, globals: Rc<GlobalsMap>, - env: Option<&HashMap<SmolStr, Value>>, + env: Option<&FxHashMap<SmolStr, Value>>, source: &SourceCode, file: &codemap::File, observer: &mut dyn CompilerObserver, diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs index 7b0a66004a7f..1087e0153e42 100644 --- a/tvix/eval/src/compiler/scope.rs +++ b/tvix/eval/src/compiler/scope.rs @@ -10,10 +10,8 @@ //! stack indices. To do this, the compiler simulates where locals //! will be at runtime using the data structures implemented here. -use std::{ - collections::{hash_map, HashMap}, - ops::Index, -}; +use rustc_hash::FxHashMap; +use std::{collections::hash_map, ops::Index}; use smol_str::SmolStr; @@ -168,7 +166,7 @@ pub struct Scope { pub upvalues: Vec<Upvalue>, /// Secondary by-name index over locals. - by_name: HashMap<String, ByName>, + by_name: FxHashMap<String, ByName>, /// How many scopes "deep" are these locals? scope_depth: usize, diff --git a/tvix/eval/src/lib.rs b/tvix/eval/src/lib.rs index a53a2a02d140..6897e1dd494f 100644 --- a/tvix/eval/src/lib.rs +++ b/tvix/eval/src/lib.rs @@ -36,7 +36,7 @@ mod test_utils; #[cfg(test)] mod tests; -use std::collections::HashMap; +use rustc_hash::FxHashMap; use std::path::PathBuf; use std::rc::Rc; use std::str::FromStr; @@ -86,7 +86,7 @@ enum BuilderGlobals { pub struct EvaluationBuilder<'co, 'ro, 'env, IO> { source_map: Option<SourceCode>, globals: BuilderGlobals, - env: Option<&'env HashMap<SmolStr, Value>>, + env: Option<&'env FxHashMap<SmolStr, Value>>, io_handle: IO, enable_import: bool, strict: bool, @@ -264,7 +264,7 @@ impl<'co, 'ro, 'env, IO> EvaluationBuilder<'co, 'ro, 'env, IO> { Self { nix_path, ..self } } - pub fn env(self, env: Option<&'env HashMap<SmolStr, Value>>) -> Self { + pub fn env(self, env: Option<&'env FxHashMap<SmolStr, Value>>) -> Self { Self { env, ..self } } @@ -352,7 +352,7 @@ pub struct Evaluation<'co, 'ro, 'env, IO> { globals: Rc<GlobalsMap>, /// Top-level variables to define in the evaluation - env: Option<&'env HashMap<SmolStr, Value>>, + env: Option<&'env FxHashMap<SmolStr, Value>>, /// Implementation of file-IO to use during evaluation, e.g. for /// impure builtins. @@ -570,7 +570,7 @@ fn parse_compile_internal( location: Option<PathBuf>, source: SourceCode, globals: Rc<GlobalsMap>, - env: Option<&HashMap<SmolStr, Value>>, + env: Option<&FxHashMap<SmolStr, Value>>, compiler_observer: &mut dyn CompilerObserver, ) -> Option<Rc<Lambda>> { let parsed = rnix::ast::Root::parse(code); diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index 3e991890ea5a..5c1f31db7eea 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -5,9 +5,9 @@ //! paying the cost when creating new strings. use bstr::{BStr, BString, ByteSlice, Chars}; use rnix::ast; +use rustc_hash::FxHashSet; use std::alloc::{alloc, dealloc, handle_alloc_error, Layout}; use std::borrow::{Borrow, Cow}; -use std::collections::HashSet; use std::ffi::c_void; use std::fmt::{self, Debug, Display}; use std::hash::Hash; @@ -40,23 +40,29 @@ pub enum NixContextElement { /// operations, e.g. concatenation, interpolation and other string operations. #[repr(transparent)] #[derive(Clone, Debug, Serialize, Default)] -pub struct NixContext(HashSet<NixContextElement>); +pub struct NixContext(FxHashSet<NixContextElement>); impl From<NixContextElement> for NixContext { fn from(value: NixContextElement) -> Self { - Self([value].into()) + let mut set = FxHashSet::default(); + set.insert(value); + Self(set) } } -impl From<HashSet<NixContextElement>> for NixContext { - fn from(value: HashSet<NixContextElement>) -> Self { +impl From<FxHashSet<NixContextElement>> for NixContext { + fn from(value: FxHashSet<NixContextElement>) -> Self { Self(value) } } impl<const N: usize> From<[NixContextElement; N]> for NixContext { fn from(value: [NixContextElement; N]) -> Self { - Self(HashSet::from(value)) + let mut set = FxHashSet::default(); + for elt in value { + set.insert(elt); + } + Self(set) } } diff --git a/tvix/eval/src/value/thunk.rs b/tvix/eval/src/value/thunk.rs index 6d1fe1b8a7cc..2019e8ab3239 100644 --- a/tvix/eval/src/value/thunk.rs +++ b/tvix/eval/src/value/thunk.rs @@ -18,9 +18,9 @@ //! object, but when forcing a thunk, the runtime *must* mutate the //! memoisable slot. +use rustc_hash::FxHashSet; use std::{ cell::{Ref, RefCell, RefMut}, - collections::HashSet, fmt::Debug, rc::Rc, }; @@ -413,7 +413,7 @@ impl TotalDisplay for Thunk { /// The inner `HashSet` is not available on the outside, as it would be /// potentially unsafe to interact with the pointers in the set. #[derive(Default)] -pub struct ThunkSet(HashSet<*const ThunkRepr>); +pub struct ThunkSet(FxHashSet<*const ThunkRepr>); impl ThunkSet { /// Check whether the given thunk has already been seen. Will mark the thunk diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 65117b79a0bc..a6d0941e8d7a 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -14,8 +14,9 @@ mod macros; use bstr::{BString, ByteSlice, ByteVec}; use codemap::Span; +use rustc_hash::FxHashMap; use serde_json::json; -use std::{cmp::Ordering, collections::HashMap, ops::DerefMut, path::PathBuf, rc::Rc}; +use std::{cmp::Ordering, ops::DerefMut, path::PathBuf, rc::Rc}; use crate::{ arithmetic_op, @@ -211,7 +212,7 @@ impl Frame { } #[derive(Default)] -struct ImportCache(HashMap<PathBuf, Value>); +struct ImportCache(FxHashMap<PathBuf, Value>); /// The `ImportCache` holds the `Value` resulting from `import`ing a certain /// file, so that the same file doesn't need to be re-evaluated multiple times. |