From 5d73c06b1a67bce68dcc0b2bd5f087ce00ab6317 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Thu, 29 Dec 2022 14:44:09 +0300 Subject: refactor(tvix/eval): use im::Vector for NixList representation This is a persistent, structurally sharing data structure which is more efficient in some of our use-cases. I have verified the efficiency improvement using `hyperfine` repeatedly over expressions on nixpkgs. Lists are not the most performance-critical structure in Nix (that would be attribute sets), but we can already see a small (~5-10%) improvement. Note that there are a handful of cases where we still go via `Vec` that need to be fixed, most notable for `builtins.sort` which can not currently be implemented directly using `im::Vector` because of a restrictive type bound. Change-Id: I237cc50cbd7629a046e5a5e4601fbb40355e551d Reviewed-on: https://cl.tvl.fyi/c/depot/+/7670 Tested-by: BuildkiteCI Reviewed-by: sterni --- corp/tvixbolt/Cargo.lock | 55 ++++++++++++++++++++ corp/tvixbolt/Cargo.toml | 2 - tvix/Cargo.lock | 43 ++++++++++++++++ tvix/Cargo.nix | 114 ++++++++++++++++++++++++++++++++++++++++++ tvix/eval/Cargo.toml | 21 ++++---- tvix/eval/src/builtins/mod.rs | 7 ++- tvix/eval/src/value/list.rs | 58 ++++++++++----------- tvix/eval/src/vm.rs | 7 ++- 8 files changed, 261 insertions(+), 46 deletions(-) diff --git a/corp/tvixbolt/Cargo.lock b/corp/tvixbolt/Cargo.lock index 5ddeb6beecd2..41bd8a315f4a 100644 --- a/corp/tvixbolt/Cargo.lock +++ b/corp/tvixbolt/Cargo.lock @@ -34,6 +34,15 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitmaps" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" +dependencies = [ + "typenum", +] + [[package]] name = "boolinator" version = "2.4.0" @@ -273,6 +282,20 @@ dependencies = [ "libc", ] +[[package]] +name = "im" +version = "15.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0acd33ff0285af998aaf9b57342af478078f53492322fafc47450e09397e0e9" +dependencies = [ + "bitmaps", + "rand_core", + "rand_xoshiro", + "sized-chunks", + "typenum", + "version_check", +] + [[package]] name = "indexmap" version = "1.9.1" @@ -394,6 +417,21 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" + +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core", +] + [[package]] name = "redox_syscall" version = "0.2.16" @@ -532,6 +570,16 @@ dependencies = [ "serde", ] +[[package]] +name = "sized-chunks" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16d69225bde7a69b235da73377861095455d298f2b970996eec25ddbb42b3d1e" +dependencies = [ + "bitmaps", + "typenum", +] + [[package]] name = "slab" version = "0.4.7" @@ -612,6 +660,7 @@ dependencies = [ "codemap", "codemap-diagnostic", "dirs", + "im", "path-clean", "regex", "rnix", @@ -647,6 +696,12 @@ dependencies = [ "yew-router", ] +[[package]] +name = "typenum" +version = "1.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" + [[package]] name = "unicode-ident" version = "1.0.4" diff --git a/corp/tvixbolt/Cargo.toml b/corp/tvixbolt/Cargo.toml index 75006bec188e..80a513aef00e 100644 --- a/corp/tvixbolt/Cargo.toml +++ b/corp/tvixbolt/Cargo.toml @@ -3,8 +3,6 @@ name = "tvixbolt" version = "0.1.0" edition = "2021" -# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html - [dependencies] yew = "0.19.3" yew-router = "0.16" diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 04af420aeac3..c1303be31f91 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -183,6 +183,15 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "bitmaps" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "031043d04099746d8db04daf1fa424b2bc8bd69d92b25962dcde24da39ab64a2" +dependencies = [ + "typenum", +] + [[package]] name = "blake3" version = "1.3.3" @@ -809,6 +818,20 @@ dependencies = [ "tokio-io-timeout", ] +[[package]] +name = "im" +version = "15.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0acd33ff0285af998aaf9b57342af478078f53492322fafc47450e09397e0e9" +dependencies = [ + "bitmaps", + "rand_core 0.6.4", + "rand_xoshiro", + "sized-chunks", + "typenum", + "version_check", +] + [[package]] name = "indexmap" version = "1.9.2" @@ -1386,6 +1409,15 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "rand_xoshiro" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f97cdb2a36ed4183de61b2f824cc45c9f1037f28afe0a322e9fff4c108b5aaa" +dependencies = [ + "rand_core 0.6.4", +] + [[package]] name = "rayon" version = "1.6.0" @@ -1593,6 +1625,16 @@ dependencies = [ "serde", ] +[[package]] +name = "sized-chunks" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16d69225bde7a69b235da73377861095455d298f2b970996eec25ddbb42b3d1e" +dependencies = [ + "bitmaps", + "typenum", +] + [[package]] name = "slab" version = "0.4.7" @@ -2031,6 +2073,7 @@ dependencies = [ "codemap-diagnostic", "criterion", "dirs", + "im", "itertools", "path-clean", "pretty_assertions", diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index aa9fc5d9dcfd..7d3c4c869196 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -644,6 +644,25 @@ rec { }; resolvedDefaultFeatures = [ "default" ]; }; + "bitmaps" = rec { + crateName = "bitmaps"; + version = "2.1.0"; + edition = "2018"; + sha256 = "18k4mcwxl96yvii5kcljkpb8pg5j4jj1zbsdn26nsx4r83846403"; + authors = [ + "Bodil Stokke " + ]; + dependencies = [ + { + name = "typenum"; + packageId = "typenum"; + } + ]; + features = { + "default" = [ "std" ]; + }; + resolvedDefaultFeatures = [ "default" "std" ]; + }; "blake3" = rec { crateName = "blake3"; version = "1.3.3"; @@ -2353,6 +2372,51 @@ rec { ]; }; + "im" = rec { + crateName = "im"; + version = "15.1.0"; + edition = "2018"; + sha256 = "1sg0jy9y0l3lqjpjyclj6kspi027mx177dgrmacgjni8y0zx7b6h"; + authors = [ + "Bodil Stokke " + ]; + dependencies = [ + { + name = "bitmaps"; + packageId = "bitmaps"; + } + { + name = "rand_core"; + packageId = "rand_core 0.6.4"; + } + { + name = "rand_xoshiro"; + packageId = "rand_xoshiro"; + } + { + name = "sized-chunks"; + packageId = "sized-chunks"; + } + { + name = "typenum"; + packageId = "typenum"; + } + ]; + buildDependencies = [ + { + name = "version_check"; + packageId = "version_check"; + } + ]; + features = { + "arbitrary" = [ "dep:arbitrary" ]; + "proptest" = [ "dep:proptest" ]; + "quickcheck" = [ "dep:quickcheck" ]; + "rayon" = [ "dep:rayon" ]; + "refpool" = [ "dep:refpool" ]; + "serde" = [ "dep:serde" ]; + }; + }; "indexmap" = rec { crateName = "indexmap"; version = "1.9.2"; @@ -3933,6 +3997,25 @@ rec { "serde1" = [ "serde" ]; }; }; + "rand_xoshiro" = rec { + crateName = "rand_xoshiro"; + version = "0.6.0"; + edition = "2018"; + sha256 = "1ajsic84rzwz5qr0mzlay8vi17swqi684bqvwqyiim3flfrcv5vg"; + authors = [ + "The Rand Project Developers" + ]; + dependencies = [ + { + name = "rand_core"; + packageId = "rand_core 0.6.4"; + } + ]; + features = { + "serde" = [ "dep:serde" ]; + "serde1" = [ "serde" ]; + }; + }; "rayon" = rec { crateName = "rayon"; version = "1.6.0"; @@ -4568,6 +4651,33 @@ rec { }; resolvedDefaultFeatures = [ "default" "std" ]; }; + "sized-chunks" = rec { + crateName = "sized-chunks"; + version = "0.6.5"; + edition = "2018"; + sha256 = "07ix5fsdnpf2xsb0k5rbiwlmsicm2237fcx7blirp9p7pljr5mhn"; + authors = [ + "Bodil Stokke " + ]; + dependencies = [ + { + name = "bitmaps"; + packageId = "bitmaps"; + } + { + name = "typenum"; + packageId = "typenum"; + } + ]; + features = { + "arbitrary" = [ "dep:arbitrary" ]; + "array-ops" = [ "dep:array-ops" ]; + "default" = [ "std" ]; + "refpool" = [ "dep:refpool" ]; + "ringbuffer" = [ "array-ops" ]; + }; + resolvedDefaultFeatures = [ "default" "std" ]; + }; "slab" = rec { crateName = "slab"; version = "0.4.7"; @@ -6013,6 +6123,10 @@ rec { name = "dirs"; packageId = "dirs"; } + { + name = "im"; + packageId = "im"; + } { name = "path-clean"; packageId = "path-clean"; diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index 93d45c2166d0..dac605f3e1ae 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -9,21 +9,22 @@ edition = "2021" name = "tvix_eval" [dependencies] -smol_str = "0.1" -dirs = "4.0.0" -path-clean = "0.1" -tabwriter = "1.2" -rowan = "*" # pinned by rnix +backtrace-on-stack-overflow = { version = "0.2.0", optional = true } +builtin-macros = { path = "./builtin-macros", package = "tvix-eval-builtin-macros" } codemap = "0.1.3" codemap-diagnostic = "0.1.1" +dirs = "4.0.0" +im = "15.1" +path-clean = "0.1" proptest = { version = "1.0.0", default_features = false, features = ["std", "alloc", "break-dead-code", "tempfile"], optional = true } -test-strategy = { version = "0.2.1", optional = true } -serde = "1.0" -serde_json = "1.0" regex = "1.6.0" -builtin-macros = { path = "./builtin-macros", package = "tvix-eval-builtin-macros" } -backtrace-on-stack-overflow = { version = "0.2.0", optional = true } rnix = "0.11.0" +rowan = "*" # pinned by rnix +serde = "1.0" +serde_json = "1.0" +smol_str = "0.1" +tabwriter = "1.2" +test-strategy = { version = "0.2.1", optional = true } [dev-dependencies] criterion = "0.4" diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 3c635eb364ca..709d2918f3c0 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -772,7 +772,12 @@ mod pure_builtins { #[builtin("sort")] fn builtin_sort(vm: &mut VM, comparator: Value, list: Value) -> Result { - let mut list = list.to_list()?.into_vec(); + // TODO: the bound on the sort function in + // `im::Vector::sort_by` is `Fn(...)`, which means that we can + // not use the mutable VM inside of its closure, hence the + // dance via `Vec`. I think this is just an unnecessarily + // restrictive bound in `im`, not a functional requirement. + let mut list = list.to_list()?.into_iter().collect::>(); // Used to let errors "escape" from the sorting closure. If anything // ends up setting an error, it is returned from this function. diff --git a/tvix/eval/src/value/list.rs b/tvix/eval/src/value/list.rs index 2604b935ed67..0a7ff8a58a79 100644 --- a/tvix/eval/src/value/list.rs +++ b/tvix/eval/src/value/list.rs @@ -1,6 +1,7 @@ //! This module implements Nix lists. -use std::ops::Deref; -use std::rc::Rc; +use std::ops::Index; + +use im::{vector, Vector}; use crate::errors::ErrorKind; use crate::vm::VM; @@ -11,13 +12,13 @@ use super::Value; #[repr(transparent)] #[derive(Clone, Debug)] -pub struct NixList(Rc>); +pub struct NixList(Vector); impl TotalDisplay for NixList { fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result { f.write_str("[ ")?; - for v in self.0.as_ref() { + for v in self { v.total_fmt(f, set)?; f.write_str(" ")?; } @@ -26,15 +27,17 @@ impl TotalDisplay for NixList { } } -impl From>> for NixList { - fn from(v: Rc>) -> Self { - Self(v) +// TODO(tazjin): uses of this instance are likely inefficient and can be optimised. +// Eventually this instance should be removed. +impl From> for NixList { + fn from(vs: Vec) -> Self { + Self(Vector::from_iter(vs.into_iter())) } } -impl From> for NixList { - fn from(vs: Vec) -> Self { - Self(Rc::new(vs)) +impl From> for NixList { + fn from(vs: Vector) -> Self { + Self(vs) } } @@ -48,20 +51,18 @@ mod arbitrary { use super::*; impl Arbitrary for NixList { + // TODO(tazjin): im seems to implement arbitrary instances, + // but I couldn't figure out how to enable them. type Parameters = as Arbitrary>::Parameters; type Strategy = BoxedStrategy; fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { - any_with::>>(args).prop_map(Self).boxed() + any_with::>(args).prop_map(|v| v.into()).boxed() } } } impl NixList { - pub fn new() -> Self { - Self(Rc::new(vec![])) - } - pub fn len(&self) -> usize { self.0.len() } @@ -78,15 +79,15 @@ impl NixList { stack_slice.len(), ); - NixList(Rc::new(stack_slice)) + stack_slice.into() } - pub fn iter(&self) -> std::slice::Iter { + pub fn iter(&self) -> vector::Iter { self.0.iter() } pub fn ptr_eq(&self, other: &Self) -> bool { - Rc::ptr_eq(&self.0, &other.0) + self.0.ptr_eq(&other.0) } /// Compare `self` against `other` for equality using Nix equality semantics @@ -112,34 +113,33 @@ impl NixList { self.iter().try_for_each(|v| v.force(vm).map(|_| ())) } - pub fn into_vec(self) -> Vec { - crate::unwrap_or_clone_rc(self.0) + pub fn into_inner(self) -> Vector { + self.0 } } impl IntoIterator for NixList { type Item = Value; - type IntoIter = std::vec::IntoIter; + type IntoIter = im::vector::ConsumingIter; - fn into_iter(self) -> std::vec::IntoIter { - self.into_vec().into_iter() + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() } } impl<'a> IntoIterator for &'a NixList { type Item = &'a Value; - - type IntoIter = std::slice::Iter<'a, Value>; + type IntoIter = im::vector::Iter<'a, Value>; fn into_iter(self) -> Self::IntoIter { self.0.iter() } } -impl Deref for NixList { - type Target = Vec; +impl Index for NixList { + type Output = Value; - fn deref(&self) -> &Self::Target { - &self.0 + fn index(&self, index: usize) -> &Self::Output { + &self.0[index] } } diff --git a/tvix/eval/src/vm.rs b/tvix/eval/src/vm.rs index f1dc99439638..5bc0f7736ae4 100644 --- a/tvix/eval/src/vm.rs +++ b/tvix/eval/src/vm.rs @@ -815,10 +815,9 @@ impl<'o> VM<'o> { } OpCode::OpConcat => { - let rhs = fallible!(self, self.pop().to_list()); - let mut lhs = fallible!(self, self.pop().to_list()).into_vec(); - lhs.extend_from_slice(&rhs); - self.push(Value::List(NixList::from(lhs))) + let rhs = fallible!(self, self.pop().to_list()).into_inner(); + let lhs = fallible!(self, self.pop().to_list()).into_inner(); + self.push(Value::List(NixList::from(lhs + rhs))) } OpCode::OpInterpolate(Count(count)) => self.run_interpolate(count)?, -- cgit 1.4.1