about summary refs log tree commit diff
path: root/tvix/eval
diff options
context:
space:
mode:
authorAspen Smith <root@gws.fyi>2024-07-28T16·11-0400
committerclbot <clbot@tvl.fyi>2024-08-07T12·38+0000
commitb8f92a6d535af09c24ac887855eb230ca25af1ed (patch)
tree82cea6a4d3979e0c48e9f97285b8564a24e9ceb0 /tvix/eval
parent1d7ba89c19b231898a997f1af3c13ed8c7247793 (diff)
feat(tvix/eval): Forbid Hash{Map,Set}, use Fx instead r/8453
Per https://nnethercote.github.io/perf-book/hashing.html, we have
basically no reason to use the default hasher over a faster,
non-DoS-resistant hasher. This gives a nice perf boost basically for
free:

hello outpath           time:   [704.76 ms 714.91 ms 725.63 ms]
                        change: [-7.2391% -6.1018% -4.9189%] (p = 0.00 < 0.05)
                        Performance has improved.

Change-Id: If5587f444ed3af69f8af4eead6af3ea303b4ae68
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12046
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Reviewed-by: Ilan Joselevich <personal@ilanjoselevich.com>
Autosubmit: aspen <root@gws.fyi>
Diffstat (limited to 'tvix/eval')
-rw-r--r--tvix/eval/Cargo.toml1
-rw-r--r--tvix/eval/clippy.toml3
-rw-r--r--tvix/eval/src/builtins/mod.rs7
-rw-r--r--tvix/eval/src/compiler/bindings.rs2
-rw-r--r--tvix/eval/src/compiler/mod.rs13
-rw-r--r--tvix/eval/src/compiler/scope.rs8
-rw-r--r--tvix/eval/src/lib.rs10
-rw-r--r--tvix/eval/src/value/string.rs18
-rw-r--r--tvix/eval/src/value/thunk.rs4
-rw-r--r--tvix/eval/src/vm/mod.rs5
10 files changed, 41 insertions, 30 deletions
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml
index 3ebb13f23ce1..cd8d01cbb10a 100644
--- a/tvix/eval/Cargo.toml
+++ b/tvix/eval/Cargo.toml
@@ -34,6 +34,7 @@ sha2 = "0.10.8"
 sha1 = "0.10.6"
 md-5 = "0.10.6"
 data-encoding = "2.6.0"
+rustc-hash = "2.0.0"
 
 [dev-dependencies]
 criterion = "0.5"
diff --git a/tvix/eval/clippy.toml b/tvix/eval/clippy.toml
new file mode 100644
index 000000000000..c5302112b3ae
--- /dev/null
+++ b/tvix/eval/clippy.toml
@@ -0,0 +1,3 @@
+# See https://nnethercote.github.io/perf-book/hashing.html. Use FxHashMap and
+# FxHashSet, not HashMap and HashSet
+disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"]
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index 88bed35d8a7e..e961738e3653 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -9,8 +9,8 @@ use genawaiter::rc::Gen;
 use imbl::OrdMap;
 use regex::Regex;
 use std::cmp::{self, Ordering};
+use std::collections::BTreeMap;
 use std::collections::VecDeque;
-use std::collections::{BTreeMap, HashSet};
 use std::path::PathBuf;
 
 use crate::arithmetic_op;
@@ -88,6 +88,7 @@ mod pure_builtins {
     use imbl::Vector;
     use itertools::Itertools;
     use os_str_bytes::OsStringBytes;
+    use rustc_hash::FxHashSet;
 
     use crate::{value::PointerEquality, AddContext, NixContext, NixContextElement};
 
@@ -700,7 +701,7 @@ mod pure_builtins {
         //
         // In this implementation, we do none of that, no syntax checks, no realization.
         // The next `TODO` are the checks that Nix implements.
-        let mut ctx_elements: HashSet<NixContextElement> = HashSet::new();
+        let mut ctx_elements: FxHashSet<NixContextElement> = FxHashSet::default();
         let span = generators::request_span(&co).await;
         let origin = origin
             .coerce_to_string(
@@ -1115,7 +1116,7 @@ mod pure_builtins {
             .to_list()?
             .into_iter()
             .map(|v| v.to_str())
-            .collect::<Result<HashSet<_>, _>>()?;
+            .collect::<Result<FxHashSet<_>, _>>()?;
         let res = attrs.iter().filter_map(|(k, v)| {
             if !keys.contains(k) {
                 Some((k.clone(), v.clone()))
diff --git a/tvix/eval/src/compiler/bindings.rs b/tvix/eval/src/compiler/bindings.rs
index 60203ba5d94b..f5c6376eb1b3 100644
--- a/tvix/eval/src/compiler/bindings.rs
+++ b/tvix/eval/src/compiler/bindings.rs
@@ -560,7 +560,7 @@ impl Compiler<'_, '_> {
 
     /// Emit definitions for all variables in the top-level global env passed to the evaluation (eg
     /// local variables in the REPL)
-    pub(super) fn compile_env(&mut self, env: &HashMap<SmolStr, Value>) {
+    pub(super) fn compile_env(&mut self, env: &FxHashMap<SmolStr, Value>) {
         for (name, value) in env {
             self.scope_mut().declare_constant(name.to_string());
             self.emit_constant(value.clone(), &EntireFile);
diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs
index 3a25052aabb2..4878db1f1a84 100644
--- a/tvix/eval/src/compiler/mod.rs
+++ b/tvix/eval/src/compiler/mod.rs
@@ -20,8 +20,9 @@ mod scope;
 
 use codemap::Span;
 use rnix::ast::{self, AstToken};
+use rustc_hash::FxHashMap;
 use smol_str::SmolStr;
-use std::collections::{BTreeMap, HashMap};
+use std::collections::BTreeMap;
 use std::path::{Path, PathBuf};
 use std::rc::{Rc, Weak};
 
@@ -117,7 +118,7 @@ impl TrackedFormal {
 
 /// The map of globally available functions and other values that
 /// should implicitly be resolvable in the global scope.
-pub type GlobalsMap = HashMap<&'static str, Value>;
+pub type GlobalsMap = FxHashMap<&'static str, Value>;
 
 /// Set of builtins that (if they exist) should be made available in
 /// the global scope, meaning that they can be accessed not just
@@ -187,7 +188,7 @@ impl<'source, 'observer> Compiler<'source, 'observer> {
     pub(crate) fn new(
         location: Option<PathBuf>,
         globals: Rc<GlobalsMap>,
-        env: Option<&HashMap<SmolStr, Value>>,
+        env: Option<&FxHashMap<SmolStr, Value>>,
         source: &'source SourceCode,
         file: &'source codemap::File,
         observer: &'observer mut dyn CompilerObserver,
@@ -1588,7 +1589,7 @@ pub fn prepare_globals(
     Rc::new_cyclic(Box::new(move |weak: &Weak<GlobalsMap>| {
         // First step is to construct the builtins themselves as
         // `NixAttrs`.
-        let mut builtins: GlobalsMap = HashMap::from_iter(builtins);
+        let mut builtins: GlobalsMap = FxHashMap::from_iter(builtins);
 
         // At this point, optionally insert `import` if enabled. To
         // "tie the knot" of `import` needing the full set of globals
@@ -1601,7 +1602,7 @@ pub fn prepare_globals(
 
         // Next, the actual map of globals which the compiler will use
         // to resolve identifiers is constructed.
-        let mut globals: GlobalsMap = HashMap::new();
+        let mut globals: GlobalsMap = FxHashMap::default();
 
         // builtins contain themselves (`builtins.builtins`), which we
         // can resolve by manually constructing a suspended thunk that
@@ -1654,7 +1655,7 @@ pub fn compile(
     expr: &ast::Expr,
     location: Option<PathBuf>,
     globals: Rc<GlobalsMap>,
-    env: Option<&HashMap<SmolStr, Value>>,
+    env: Option<&FxHashMap<SmolStr, Value>>,
     source: &SourceCode,
     file: &codemap::File,
     observer: &mut dyn CompilerObserver,
diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs
index 7b0a66004a7f..1087e0153e42 100644
--- a/tvix/eval/src/compiler/scope.rs
+++ b/tvix/eval/src/compiler/scope.rs
@@ -10,10 +10,8 @@
 //! stack indices. To do this, the compiler simulates where locals
 //! will be at runtime using the data structures implemented here.
 
-use std::{
-    collections::{hash_map, HashMap},
-    ops::Index,
-};
+use rustc_hash::FxHashMap;
+use std::{collections::hash_map, ops::Index};
 
 use smol_str::SmolStr;
 
@@ -168,7 +166,7 @@ pub struct Scope {
     pub upvalues: Vec<Upvalue>,
 
     /// Secondary by-name index over locals.
-    by_name: HashMap<String, ByName>,
+    by_name: FxHashMap<String, ByName>,
 
     /// How many scopes "deep" are these locals?
     scope_depth: usize,
diff --git a/tvix/eval/src/lib.rs b/tvix/eval/src/lib.rs
index a53a2a02d140..6897e1dd494f 100644
--- a/tvix/eval/src/lib.rs
+++ b/tvix/eval/src/lib.rs
@@ -36,7 +36,7 @@ mod test_utils;
 #[cfg(test)]
 mod tests;
 
-use std::collections::HashMap;
+use rustc_hash::FxHashMap;
 use std::path::PathBuf;
 use std::rc::Rc;
 use std::str::FromStr;
@@ -86,7 +86,7 @@ enum BuilderGlobals {
 pub struct EvaluationBuilder<'co, 'ro, 'env, IO> {
     source_map: Option<SourceCode>,
     globals: BuilderGlobals,
-    env: Option<&'env HashMap<SmolStr, Value>>,
+    env: Option<&'env FxHashMap<SmolStr, Value>>,
     io_handle: IO,
     enable_import: bool,
     strict: bool,
@@ -264,7 +264,7 @@ impl<'co, 'ro, 'env, IO> EvaluationBuilder<'co, 'ro, 'env, IO> {
         Self { nix_path, ..self }
     }
 
-    pub fn env(self, env: Option<&'env HashMap<SmolStr, Value>>) -> Self {
+    pub fn env(self, env: Option<&'env FxHashMap<SmolStr, Value>>) -> Self {
         Self { env, ..self }
     }
 
@@ -352,7 +352,7 @@ pub struct Evaluation<'co, 'ro, 'env, IO> {
     globals: Rc<GlobalsMap>,
 
     /// Top-level variables to define in the evaluation
-    env: Option<&'env HashMap<SmolStr, Value>>,
+    env: Option<&'env FxHashMap<SmolStr, Value>>,
 
     /// Implementation of file-IO to use during evaluation, e.g. for
     /// impure builtins.
@@ -570,7 +570,7 @@ fn parse_compile_internal(
     location: Option<PathBuf>,
     source: SourceCode,
     globals: Rc<GlobalsMap>,
-    env: Option<&HashMap<SmolStr, Value>>,
+    env: Option<&FxHashMap<SmolStr, Value>>,
     compiler_observer: &mut dyn CompilerObserver,
 ) -> Option<Rc<Lambda>> {
     let parsed = rnix::ast::Root::parse(code);
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs
index 3e991890ea5a..5c1f31db7eea 100644
--- a/tvix/eval/src/value/string.rs
+++ b/tvix/eval/src/value/string.rs
@@ -5,9 +5,9 @@
 //! paying the cost when creating new strings.
 use bstr::{BStr, BString, ByteSlice, Chars};
 use rnix::ast;
+use rustc_hash::FxHashSet;
 use std::alloc::{alloc, dealloc, handle_alloc_error, Layout};
 use std::borrow::{Borrow, Cow};
-use std::collections::HashSet;
 use std::ffi::c_void;
 use std::fmt::{self, Debug, Display};
 use std::hash::Hash;
@@ -40,23 +40,29 @@ pub enum NixContextElement {
 /// operations, e.g. concatenation, interpolation and other string operations.
 #[repr(transparent)]
 #[derive(Clone, Debug, Serialize, Default)]
-pub struct NixContext(HashSet<NixContextElement>);
+pub struct NixContext(FxHashSet<NixContextElement>);
 
 impl From<NixContextElement> for NixContext {
     fn from(value: NixContextElement) -> Self {
-        Self([value].into())
+        let mut set = FxHashSet::default();
+        set.insert(value);
+        Self(set)
     }
 }
 
-impl From<HashSet<NixContextElement>> for NixContext {
-    fn from(value: HashSet<NixContextElement>) -> Self {
+impl From<FxHashSet<NixContextElement>> for NixContext {
+    fn from(value: FxHashSet<NixContextElement>) -> Self {
         Self(value)
     }
 }
 
 impl<const N: usize> From<[NixContextElement; N]> for NixContext {
     fn from(value: [NixContextElement; N]) -> Self {
-        Self(HashSet::from(value))
+        let mut set = FxHashSet::default();
+        for elt in value {
+            set.insert(elt);
+        }
+        Self(set)
     }
 }
 
diff --git a/tvix/eval/src/value/thunk.rs b/tvix/eval/src/value/thunk.rs
index 6d1fe1b8a7cc..2019e8ab3239 100644
--- a/tvix/eval/src/value/thunk.rs
+++ b/tvix/eval/src/value/thunk.rs
@@ -18,9 +18,9 @@
 //! object, but when forcing a thunk, the runtime *must* mutate the
 //! memoisable slot.
 
+use rustc_hash::FxHashSet;
 use std::{
     cell::{Ref, RefCell, RefMut},
-    collections::HashSet,
     fmt::Debug,
     rc::Rc,
 };
@@ -413,7 +413,7 @@ impl TotalDisplay for Thunk {
 /// The inner `HashSet` is not available on the outside, as it would be
 /// potentially unsafe to interact with the pointers in the set.
 #[derive(Default)]
-pub struct ThunkSet(HashSet<*const ThunkRepr>);
+pub struct ThunkSet(FxHashSet<*const ThunkRepr>);
 
 impl ThunkSet {
     /// Check whether the given thunk has already been seen. Will mark the thunk
diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs
index 65117b79a0bc..a6d0941e8d7a 100644
--- a/tvix/eval/src/vm/mod.rs
+++ b/tvix/eval/src/vm/mod.rs
@@ -14,8 +14,9 @@ mod macros;
 
 use bstr::{BString, ByteSlice, ByteVec};
 use codemap::Span;
+use rustc_hash::FxHashMap;
 use serde_json::json;
-use std::{cmp::Ordering, collections::HashMap, ops::DerefMut, path::PathBuf, rc::Rc};
+use std::{cmp::Ordering, ops::DerefMut, path::PathBuf, rc::Rc};
 
 use crate::{
     arithmetic_op,
@@ -211,7 +212,7 @@ impl Frame {
 }
 
 #[derive(Default)]
-struct ImportCache(HashMap<PathBuf, Value>);
+struct ImportCache(FxHashMap<PathBuf, Value>);
 
 /// The `ImportCache` holds the `Value` resulting from `import`ing a certain
 /// file, so that the same file doesn't need to be re-evaluated multiple times.