about summary refs log tree commit diff
path: root/tvix/eval
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2022-12-29T11·44+0300
committertazjin <tazjin@tvl.su>2022-12-29T12·27+0000
commit5d73c06b1a67bce68dcc0b2bd5f087ce00ab6317 (patch)
tree62d40c8ca8e1620bff028c8d89a5a0f05ee00661 /tvix/eval
parent6ab8320f075e36f2328a86f5fcf7674844a0bd12 (diff)
refactor(tvix/eval): use im::Vector for NixList representation r/5534
This is a persistent, structurally sharing data structure which is
more efficient in some of our use-cases. I have verified the
efficiency improvement using `hyperfine` repeatedly over expressions
on nixpkgs.

Lists are not the most performance-critical structure in Nix (that
would be attribute sets), but we can already see a small (~5-10%)
improvement.

Note that there are a handful of cases where we still go via `Vec`
that need to be fixed, most notable for `builtins.sort` which can not
currently be implemented directly using `im::Vector` because of a
restrictive type bound.

Change-Id: I237cc50cbd7629a046e5a5e4601fbb40355e551d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7670
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
Diffstat (limited to 'tvix/eval')
-rw-r--r--tvix/eval/Cargo.toml21
-rw-r--r--tvix/eval/src/builtins/mod.rs7
-rw-r--r--tvix/eval/src/value/list.rs58
-rw-r--r--tvix/eval/src/vm.rs7
4 files changed, 49 insertions, 44 deletions
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml
index 93d45c2166d0..dac605f3e1ae 100644
--- a/tvix/eval/Cargo.toml
+++ b/tvix/eval/Cargo.toml
@@ -9,21 +9,22 @@ edition = "2021"
 name = "tvix_eval"
 
 [dependencies]
-smol_str = "0.1"
-dirs = "4.0.0"
-path-clean = "0.1"
-tabwriter = "1.2"
-rowan = "*" # pinned by rnix
+backtrace-on-stack-overflow = { version = "0.2.0", optional = true }
+builtin-macros = { path = "./builtin-macros", package = "tvix-eval-builtin-macros" }
 codemap = "0.1.3"
 codemap-diagnostic = "0.1.1"
+dirs = "4.0.0"
+im = "15.1"
+path-clean = "0.1"
 proptest = { version = "1.0.0", default_features = false, features = ["std", "alloc", "break-dead-code", "tempfile"], optional = true }
-test-strategy = { version = "0.2.1", optional = true }
-serde = "1.0"
-serde_json = "1.0"
 regex = "1.6.0"
-builtin-macros = { path = "./builtin-macros", package = "tvix-eval-builtin-macros" }
-backtrace-on-stack-overflow = { version = "0.2.0", optional = true }
 rnix = "0.11.0"
+rowan = "*" # pinned by rnix
+serde = "1.0"
+serde_json = "1.0"
+smol_str = "0.1"
+tabwriter = "1.2"
+test-strategy = { version = "0.2.1", optional = true }
 
 [dev-dependencies]
 criterion = "0.4"
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index 3c635eb364ca..709d2918f3c0 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -772,7 +772,12 @@ mod pure_builtins {
 
     #[builtin("sort")]
     fn builtin_sort(vm: &mut VM, comparator: Value, list: Value) -> Result<Value, ErrorKind> {
-        let mut list = list.to_list()?.into_vec();
+        // TODO: the bound on the sort function in
+        // `im::Vector::sort_by` is `Fn(...)`, which means that we can
+        // not use the mutable VM inside of its closure, hence the
+        // dance via `Vec`. I think this is just an unnecessarily
+        // restrictive bound in `im`, not a functional requirement.
+        let mut list = list.to_list()?.into_iter().collect::<Vec<_>>();
 
         // Used to let errors "escape" from the sorting closure. If anything
         // ends up setting an error, it is returned from this function.
diff --git a/tvix/eval/src/value/list.rs b/tvix/eval/src/value/list.rs
index 2604b935ed67..0a7ff8a58a79 100644
--- a/tvix/eval/src/value/list.rs
+++ b/tvix/eval/src/value/list.rs
@@ -1,6 +1,7 @@
 //! This module implements Nix lists.
-use std::ops::Deref;
-use std::rc::Rc;
+use std::ops::Index;
+
+use im::{vector, Vector};
 
 use crate::errors::ErrorKind;
 use crate::vm::VM;
@@ -11,13 +12,13 @@ use super::Value;
 
 #[repr(transparent)]
 #[derive(Clone, Debug)]
-pub struct NixList(Rc<Vec<Value>>);
+pub struct NixList(Vector<Value>);
 
 impl TotalDisplay for NixList {
     fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result {
         f.write_str("[ ")?;
 
-        for v in self.0.as_ref() {
+        for v in self {
             v.total_fmt(f, set)?;
             f.write_str(" ")?;
         }
@@ -26,15 +27,17 @@ impl TotalDisplay for NixList {
     }
 }
 
-impl From<Rc<Vec<Value>>> for NixList {
-    fn from(v: Rc<Vec<Value>>) -> Self {
-        Self(v)
+// TODO(tazjin): uses of this instance are likely inefficient and can be optimised.
+// Eventually this instance should be removed.
+impl From<Vec<Value>> for NixList {
+    fn from(vs: Vec<Value>) -> Self {
+        Self(Vector::from_iter(vs.into_iter()))
     }
 }
 
-impl From<Vec<Value>> for NixList {
-    fn from(vs: Vec<Value>) -> Self {
-        Self(Rc::new(vs))
+impl From<Vector<Value>> for NixList {
+    fn from(vs: Vector<Value>) -> Self {
+        Self(vs)
     }
 }
 
@@ -48,20 +51,18 @@ mod arbitrary {
     use super::*;
 
     impl Arbitrary for NixList {
+        // TODO(tazjin): im seems to implement arbitrary instances,
+        // but I couldn't figure out how to enable them.
         type Parameters = <Vec<Value> as Arbitrary>::Parameters;
         type Strategy = BoxedStrategy<Self>;
 
         fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
-            any_with::<Rc<Vec<Value>>>(args).prop_map(Self).boxed()
+            any_with::<Vec<Value>>(args).prop_map(|v| v.into()).boxed()
         }
     }
 }
 
 impl NixList {
-    pub fn new() -> Self {
-        Self(Rc::new(vec![]))
-    }
-
     pub fn len(&self) -> usize {
         self.0.len()
     }
@@ -78,15 +79,15 @@ impl NixList {
             stack_slice.len(),
         );
 
-        NixList(Rc::new(stack_slice))
+        stack_slice.into()
     }
 
-    pub fn iter(&self) -> std::slice::Iter<Value> {
+    pub fn iter(&self) -> vector::Iter<Value> {
         self.0.iter()
     }
 
     pub fn ptr_eq(&self, other: &Self) -> bool {
-        Rc::ptr_eq(&self.0, &other.0)
+        self.0.ptr_eq(&other.0)
     }
 
     /// Compare `self` against `other` for equality using Nix equality semantics
@@ -112,34 +113,33 @@ impl NixList {
         self.iter().try_for_each(|v| v.force(vm).map(|_| ()))
     }
 
-    pub fn into_vec(self) -> Vec<Value> {
-        crate::unwrap_or_clone_rc(self.0)
+    pub fn into_inner(self) -> Vector<Value> {
+        self.0
     }
 }
 
 impl IntoIterator for NixList {
     type Item = Value;
-    type IntoIter = std::vec::IntoIter<Value>;
+    type IntoIter = im::vector::ConsumingIter<Value>;
 
-    fn into_iter(self) -> std::vec::IntoIter<Value> {
-        self.into_vec().into_iter()
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
     }
 }
 
 impl<'a> IntoIterator for &'a NixList {
     type Item = &'a Value;
-
-    type IntoIter = std::slice::Iter<'a, Value>;
+    type IntoIter = im::vector::Iter<'a, Value>;
 
     fn into_iter(self) -> Self::IntoIter {
         self.0.iter()
     }
 }
 
-impl Deref for NixList {
-    type Target = Vec<Value>;
+impl Index<usize> for NixList {
+    type Output = Value;
 
-    fn deref(&self) -> &Self::Target {
-        &self.0
+    fn index(&self, index: usize) -> &Self::Output {
+        &self.0[index]
     }
 }
diff --git a/tvix/eval/src/vm.rs b/tvix/eval/src/vm.rs
index f1dc99439638..5bc0f7736ae4 100644
--- a/tvix/eval/src/vm.rs
+++ b/tvix/eval/src/vm.rs
@@ -815,10 +815,9 @@ impl<'o> VM<'o> {
             }
 
             OpCode::OpConcat => {
-                let rhs = fallible!(self, self.pop().to_list());
-                let mut lhs = fallible!(self, self.pop().to_list()).into_vec();
-                lhs.extend_from_slice(&rhs);
-                self.push(Value::List(NixList::from(lhs)))
+                let rhs = fallible!(self, self.pop().to_list()).into_inner();
+                let lhs = fallible!(self, self.pop().to_list()).into_inner();
+                self.push(Value::List(NixList::from(lhs + rhs)))
             }
 
             OpCode::OpInterpolate(Count(count)) => self.run_interpolate(count)?,