about summary refs log tree commit diff
path: root/tvix
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2022-12-29T11·44+0300
committertazjin <tazjin@tvl.su>2022-12-29T12·27+0000
commit5d73c06b1a67bce68dcc0b2bd5f087ce00ab6317 (patch)
tree62d40c8ca8e1620bff028c8d89a5a0f05ee00661 /tvix
parent6ab8320f075e36f2328a86f5fcf7674844a0bd12 (diff)
refactor(tvix/eval): use im::Vector for NixList representation r/5534
This is a persistent, structurally sharing data structure which is
more efficient in some of our use-cases. I have verified the
efficiency improvement using `hyperfine` repeatedly over expressions
on nixpkgs.

Lists are not the most performance-critical structure in Nix (that
would be attribute sets), but we can already see a small (~5-10%)
improvement.

Note that there are a handful of cases where we still go via `Vec`
that need to be fixed, most notable for `builtins.sort` which can not
currently be implemented directly using `im::Vector` because of a
restrictive type bound.

Change-Id: I237cc50cbd7629a046e5a5e4601fbb40355e551d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7670
Tested-by: BuildkiteCI
Reviewed-by: sterni <sternenseemann@systemli.org>
Diffstat (limited to 'tvix')
-rw-r--r--tvix/Cargo.lock43
-rw-r--r--tvix/Cargo.nix114
-rw-r--r--tvix/eval/Cargo.toml21
-rw-r--r--tvix/eval/src/builtins/mod.rs7
-rw-r--r--tvix/eval/src/value/list.rs58
-rw-r--r--tvix/eval/src/vm.rs7
6 files changed, 206 insertions, 44 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index 04af420aea..c1303be31f 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -184,6 +184,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
+name = "bitmaps"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2"
+dependencies = [
+ "typenum",
+]
+
+[[package]]
 name = "blake3"
 version = "1.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -810,6 +819,20 @@ dependencies = [
 ]
 
 [[package]]
+name = "im"
+version = "15.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0acd33ff0285af998aaf9b57342af478078f53492322fafc47450e09397e0e9"
+dependencies = [
+ "bitmaps",
+ "rand_core 0.6.4",
+ "rand_xoshiro",
+ "sized-chunks",
+ "typenum",
+ "version_check",
+]
+
+[[package]]
 name = "indexmap"
 version = "1.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1387,6 +1410,15 @@ dependencies = [
 ]
 
 [[package]]
+name = "rand_xoshiro"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa"
+dependencies = [
+ "rand_core 0.6.4",
+]
+
+[[package]]
 name = "rayon"
 version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -1594,6 +1626,16 @@ dependencies = [
 ]
 
 [[package]]
+name = "sized-chunks"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "16d69225bde7a69b235da73377861095455d298f2b970996eec25ddbb42b3d1e"
+dependencies = [
+ "bitmaps",
+ "typenum",
+]
+
+[[package]]
 name = "slab"
 version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -2031,6 +2073,7 @@ dependencies = [
  "codemap-diagnostic",
  "criterion",
  "dirs",
+ "im",
  "itertools",
  "path-clean",
  "pretty_assertions",
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index aa9fc5d9dc..7d3c4c8691 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -644,6 +644,25 @@ rec {
         };
         resolvedDefaultFeatures = [ "default" ];
       };
+      "bitmaps" = rec {
+        crateName = "bitmaps";
+        version = "2.1.0";
+        edition = "2018";
+        sha256 = "18k4mcwxl96yvii5kcljkpb8pg5j4jj1zbsdn26nsx4r83846403";
+        authors = [
+          "Bodil Stokke <bodil@bodil.org>"
+        ];
+        dependencies = [
+          {
+            name = "typenum";
+            packageId = "typenum";
+          }
+        ];
+        features = {
+          "default" = [ "std" ];
+        };
+        resolvedDefaultFeatures = [ "default" "std" ];
+      };
       "blake3" = rec {
         crateName = "blake3";
         version = "1.3.3";
@@ -2353,6 +2372,51 @@ rec {
         ];
 
       };
+      "im" = rec {
+        crateName = "im";
+        version = "15.1.0";
+        edition = "2018";
+        sha256 = "1sg0jy9y0l3lqjpjyclj6kspi027mx177dgrmacgjni8y0zx7b6h";
+        authors = [
+          "Bodil Stokke <bodil@bodil.org>"
+        ];
+        dependencies = [
+          {
+            name = "bitmaps";
+            packageId = "bitmaps";
+          }
+          {
+            name = "rand_core";
+            packageId = "rand_core 0.6.4";
+          }
+          {
+            name = "rand_xoshiro";
+            packageId = "rand_xoshiro";
+          }
+          {
+            name = "sized-chunks";
+            packageId = "sized-chunks";
+          }
+          {
+            name = "typenum";
+            packageId = "typenum";
+          }
+        ];
+        buildDependencies = [
+          {
+            name = "version_check";
+            packageId = "version_check";
+          }
+        ];
+        features = {
+          "arbitrary" = [ "dep:arbitrary" ];
+          "proptest" = [ "dep:proptest" ];
+          "quickcheck" = [ "dep:quickcheck" ];
+          "rayon" = [ "dep:rayon" ];
+          "refpool" = [ "dep:refpool" ];
+          "serde" = [ "dep:serde" ];
+        };
+      };
       "indexmap" = rec {
         crateName = "indexmap";
         version = "1.9.2";
@@ -3933,6 +3997,25 @@ rec {
           "serde1" = [ "serde" ];
         };
       };
+      "rand_xoshiro" = rec {
+        crateName = "rand_xoshiro";
+        version = "0.6.0";
+        edition = "2018";
+        sha256 = "1ajsic84rzwz5qr0mzlay8vi17swqi684bqvwqyiim3flfrcv5vg";
+        authors = [
+          "The Rand Project Developers"
+        ];
+        dependencies = [
+          {
+            name = "rand_core";
+            packageId = "rand_core 0.6.4";
+          }
+        ];
+        features = {
+          "serde" = [ "dep:serde" ];
+          "serde1" = [ "serde" ];
+        };
+      };
       "rayon" = rec {
         crateName = "rayon";
         version = "1.6.0";
@@ -4568,6 +4651,33 @@ rec {
         };
         resolvedDefaultFeatures = [ "default" "std" ];
       };
+      "sized-chunks" = rec {
+        crateName = "sized-chunks";
+        version = "0.6.5";
+        edition = "2018";
+        sha256 = "07ix5fsdnpf2xsb0k5rbiwlmsicm2237fcx7blirp9p7pljr5mhn";
+        authors = [
+          "Bodil Stokke <bodil@bodil.org>"
+        ];
+        dependencies = [
+          {
+            name = "bitmaps";
+            packageId = "bitmaps";
+          }
+          {
+            name = "typenum";
+            packageId = "typenum";
+          }
+        ];
+        features = {
+          "arbitrary" = [ "dep:arbitrary" ];
+          "array-ops" = [ "dep:array-ops" ];
+          "default" = [ "std" ];
+          "refpool" = [ "dep:refpool" ];
+          "ringbuffer" = [ "array-ops" ];
+        };
+        resolvedDefaultFeatures = [ "default" "std" ];
+      };
       "slab" = rec {
         crateName = "slab";
         version = "0.4.7";
@@ -6014,6 +6124,10 @@ rec {
             packageId = "dirs";
           }
           {
+            name = "im";
+            packageId = "im";
+          }
+          {
             name = "path-clean";
             packageId = "path-clean";
           }
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml
index 93d45c2166..dac605f3e1 100644
--- a/tvix/eval/Cargo.toml
+++ b/tvix/eval/Cargo.toml
@@ -9,21 +9,22 @@ edition = "2021"
 name = "tvix_eval"
 
 [dependencies]
-smol_str = "0.1"
-dirs = "4.0.0"
-path-clean = "0.1"
-tabwriter = "1.2"
-rowan = "*" # pinned by rnix
+backtrace-on-stack-overflow = { version = "0.2.0", optional = true }
+builtin-macros = { path = "./builtin-macros", package = "tvix-eval-builtin-macros" }
 codemap = "0.1.3"
 codemap-diagnostic = "0.1.1"
+dirs = "4.0.0"
+im = "15.1"
+path-clean = "0.1"
 proptest = { version = "1.0.0", default_features = false, features = ["std", "alloc", "break-dead-code", "tempfile"], optional = true }
-test-strategy = { version = "0.2.1", optional = true }
-serde = "1.0"
-serde_json = "1.0"
 regex = "1.6.0"
-builtin-macros = { path = "./builtin-macros", package = "tvix-eval-builtin-macros" }
-backtrace-on-stack-overflow = { version = "0.2.0", optional = true }
 rnix = "0.11.0"
+rowan = "*" # pinned by rnix
+serde = "1.0"
+serde_json = "1.0"
+smol_str = "0.1"
+tabwriter = "1.2"
+test-strategy = { version = "0.2.1", optional = true }
 
 [dev-dependencies]
 criterion = "0.4"
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index 3c635eb364..709d2918f3 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -772,7 +772,12 @@ mod pure_builtins {
 
     #[builtin("sort")]
     fn builtin_sort(vm: &mut VM, comparator: Value, list: Value) -> Result<Value, ErrorKind> {
-        let mut list = list.to_list()?.into_vec();
+        // TODO: the bound on the sort function in
+        // `im::Vector::sort_by` is `Fn(...)`, which means that we can
+        // not use the mutable VM inside of its closure, hence the
+        // dance via `Vec`. I think this is just an unnecessarily
+        // restrictive bound in `im`, not a functional requirement.
+        let mut list = list.to_list()?.into_iter().collect::<Vec<_>>();
 
         // Used to let errors "escape" from the sorting closure. If anything
         // ends up setting an error, it is returned from this function.
diff --git a/tvix/eval/src/value/list.rs b/tvix/eval/src/value/list.rs
index 2604b935ed..0a7ff8a58a 100644
--- a/tvix/eval/src/value/list.rs
+++ b/tvix/eval/src/value/list.rs
@@ -1,6 +1,7 @@
 //! This module implements Nix lists.
-use std::ops::Deref;
-use std::rc::Rc;
+use std::ops::Index;
+
+use im::{vector, Vector};
 
 use crate::errors::ErrorKind;
 use crate::vm::VM;
@@ -11,13 +12,13 @@ use super::Value;
 
 #[repr(transparent)]
 #[derive(Clone, Debug)]
-pub struct NixList(Rc<Vec<Value>>);
+pub struct NixList(Vector<Value>);
 
 impl TotalDisplay for NixList {
     fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result {
         f.write_str("[ ")?;
 
-        for v in self.0.as_ref() {
+        for v in self {
             v.total_fmt(f, set)?;
             f.write_str(" ")?;
         }
@@ -26,15 +27,17 @@ impl TotalDisplay for NixList {
     }
 }
 
-impl From<Rc<Vec<Value>>> for NixList {
-    fn from(v: Rc<Vec<Value>>) -> Self {
-        Self(v)
+// TODO(tazjin): uses of this instance are likely inefficient and can be optimised.
+// Eventually this instance should be removed.
+impl From<Vec<Value>> for NixList {
+    fn from(vs: Vec<Value>) -> Self {
+        Self(Vector::from_iter(vs.into_iter()))
     }
 }
 
-impl From<Vec<Value>> for NixList {
-    fn from(vs: Vec<Value>) -> Self {
-        Self(Rc::new(vs))
+impl From<Vector<Value>> for NixList {
+    fn from(vs: Vector<Value>) -> Self {
+        Self(vs)
     }
 }
 
@@ -48,20 +51,18 @@ mod arbitrary {
     use super::*;
 
     impl Arbitrary for NixList {
+        // TODO(tazjin): im seems to implement arbitrary instances,
+        // but I couldn't figure out how to enable them.
         type Parameters = <Vec<Value> as Arbitrary>::Parameters;
         type Strategy = BoxedStrategy<Self>;
 
         fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
-            any_with::<Rc<Vec<Value>>>(args).prop_map(Self).boxed()
+            any_with::<Vec<Value>>(args).prop_map(|v| v.into()).boxed()
         }
     }
 }
 
 impl NixList {
-    pub fn new() -> Self {
-        Self(Rc::new(vec![]))
-    }
-
     pub fn len(&self) -> usize {
         self.0.len()
     }
@@ -78,15 +79,15 @@ impl NixList {
             stack_slice.len(),
         );
 
-        NixList(Rc::new(stack_slice))
+        stack_slice.into()
     }
 
-    pub fn iter(&self) -> std::slice::Iter<Value> {
+    pub fn iter(&self) -> vector::Iter<Value> {
         self.0.iter()
     }
 
     pub fn ptr_eq(&self, other: &Self) -> bool {
-        Rc::ptr_eq(&self.0, &other.0)
+        self.0.ptr_eq(&other.0)
     }
 
     /// Compare `self` against `other` for equality using Nix equality semantics
@@ -112,34 +113,33 @@ impl NixList {
         self.iter().try_for_each(|v| v.force(vm).map(|_| ()))
     }
 
-    pub fn into_vec(self) -> Vec<Value> {
-        crate::unwrap_or_clone_rc(self.0)
+    pub fn into_inner(self) -> Vector<Value> {
+        self.0
     }
 }
 
 impl IntoIterator for NixList {
     type Item = Value;
-    type IntoIter = std::vec::IntoIter<Value>;
+    type IntoIter = im::vector::ConsumingIter<Value>;
 
-    fn into_iter(self) -> std::vec::IntoIter<Value> {
-        self.into_vec().into_iter()
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
     }
 }
 
 impl<'a> IntoIterator for &'a NixList {
     type Item = &'a Value;
-
-    type IntoIter = std::slice::Iter<'a, Value>;
+    type IntoIter = im::vector::Iter<'a, Value>;
 
     fn into_iter(self) -> Self::IntoIter {
         self.0.iter()
     }
 }
 
-impl Deref for NixList {
-    type Target = Vec<Value>;
+impl Index<usize> for NixList {
+    type Output = Value;
 
-    fn deref(&self) -> &Self::Target {
-        &self.0
+    fn index(&self, index: usize) -> &Self::Output {
+        &self.0[index]
     }
 }
diff --git a/tvix/eval/src/vm.rs b/tvix/eval/src/vm.rs
index f1dc994396..5bc0f7736a 100644
--- a/tvix/eval/src/vm.rs
+++ b/tvix/eval/src/vm.rs
@@ -815,10 +815,9 @@ impl<'o> VM<'o> {
             }
 
             OpCode::OpConcat => {
-                let rhs = fallible!(self, self.pop().to_list());
-                let mut lhs = fallible!(self, self.pop().to_list()).into_vec();
-                lhs.extend_from_slice(&rhs);
-                self.push(Value::List(NixList::from(lhs)))
+                let rhs = fallible!(self, self.pop().to_list()).into_inner();
+                let lhs = fallible!(self, self.pop().to_list()).into_inner();
+                self.push(Value::List(NixList::from(lhs + rhs)))
             }
 
             OpCode::OpInterpolate(Count(count)) => self.run_interpolate(count)?,