From e3c92ac3b4b07a7397b565738ec4237b9bf621f6 Mon Sep 17 00:00:00 2001 From: Aspen Smith Date: Sat, 10 Feb 2024 11:29:58 -0500 Subject: fix(tvix/eval): Replace inner NixString repr with Box Storing a full BString here incurs the extra overhead of the capacity for the inner byte-vector, which we basically never use as Nix strings are immutable (and we don't do any mutation / sharing analysis). Switching to a Box cuts us from 72 bytes to 64 bytes per string (and there are a lot of strings!) Change-Id: I11f34c14a08fa02759f260b1c78b2a2b981714e4 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10794 Autosubmit: aspen Reviewed-by: flokli Tested-by: BuildkiteCI --- tvix/eval/src/builtins/mod.rs | 46 ++++++++++++------------- tvix/eval/src/value/string.rs | 80 ++++++++++++++++++++++++++++++------------- tvix/eval/src/vm/mod.rs | 2 +- 3 files changed, 79 insertions(+), 49 deletions(-) (limited to 'tvix/eval/src') diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 3048b32fb5..a0b990ec54 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -3,7 +3,7 @@ //! See //tvix/eval/docs/builtins.md for a some context on the //! available builtins in Nix. -use bstr::ByteVec; +use bstr::{ByteSlice, ByteVec}; use builtin_macros::builtins; use genawaiter::rc::Gen; use imbl::OrdMap; @@ -67,7 +67,7 @@ pub async fn coerce_value_to_path( .await { Ok(vs) => { - let path = (**vs).clone().into_path_buf()?; + let path = vs.to_path()?.to_owned(); if path.is_absolute() { Ok(Ok(path)) } else { @@ -82,7 +82,7 @@ pub async fn coerce_value_to_path( mod pure_builtins { use std::ffi::OsString; - use bstr::{BString, ByteSlice}; + use bstr::{BString, ByteSlice, B}; use imbl::Vector; use itertools::Itertools; use os_str_bytes::OsStringBytes; @@ -171,26 +171,23 @@ mod pure_builtins { #[builtin("baseNameOf")] async fn builtin_base_name_of(co: GenCo, s: Value) -> Result { let span = generators::request_span(&co).await; - let mut s = match s { - val @ Value::Catchable(_) => return Ok(val), - _ => s - .coerce_to_string( - co, - CoercionKind { - strong: false, - import_paths: false, - }, - span, - ) - .await? - .to_contextful_str()?, - }; + let s = s + .coerce_to_string( + co, + CoercionKind { + strong: false, + import_paths: false, + }, + span, + ) + .await? + .to_contextful_str()?; - let bs = s.as_mut_bstring(); + let mut bs = (**s).to_owned(); if let Some(last_slash) = bs.rfind_char('/') { - *bs = bs[(last_slash + 1)..].into(); + bs = bs[(last_slash + 1)..].into(); } - Ok(s.into()) + Ok(NixString::new_inherit_context_from(&s, bs).into()) } #[builtin("bitAnd")] @@ -344,7 +341,7 @@ mod pure_builtins { .map(|last_slash| { let x = &str[..last_slash]; if x.is_empty() { - b"/" + B("/") } else { x } @@ -356,8 +353,7 @@ mod pure_builtins { )))) } else { Ok(Value::from(NixString::new_inherit_context_from( - &str, - result.into(), + &str, result, ))) } } @@ -1190,7 +1186,7 @@ mod pure_builtins { // push the unmatched characters preceding the match ret.push_back(Value::from(NixString::new_inherit_context_from( &s, - (&text[pos..thematch.start()]).into(), + &text[pos..thematch.start()], ))); // Push a list with one element for each capture @@ -1363,7 +1359,7 @@ mod pure_builtins { Ok(Value::from(NixString::new_inherit_context_from( &x, - (&x[beg..end]).into(), + &x[beg..end], ))) } diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index e2767dd22c..5a7783b8fd 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -146,7 +146,7 @@ impl NixContext { // FIXME: when serializing, ignore the context? #[derive(Clone, Debug, Serialize)] -pub struct NixString(BString, Option); +pub struct NixString(Box, Option); impl PartialEq for NixString { fn eq(&self, other: &Self) -> bool { @@ -180,45 +180,66 @@ impl Ord for NixString { } } +impl From> for NixString { + fn from(value: Box) -> Self { + Self(value, None) + } +} + +impl From for NixString { + fn from(value: BString) -> Self { + Self(Vec::::from(value).into_boxed_slice().into(), None) + } +} + impl From<&BStr> for NixString { fn from(value: &BStr) -> Self { - Self(value.to_owned(), None) + value.to_owned().into() } } impl From<&[u8]> for NixString { fn from(value: &[u8]) -> Self { - Self(value.into(), None) + Self::from(value.to_owned()) } } impl From> for NixString { fn from(value: Vec) -> Self { + value.into_boxed_slice().into() + } +} + +impl From> for NixString { + fn from(value: Box<[u8]>) -> Self { Self(value.into(), None) } } impl From<&str> for NixString { fn from(s: &str) -> Self { - Self(s.as_bytes().into(), None) + s.as_bytes().into() } } impl From for NixString { fn from(s: String) -> Self { - Self(s.into(), None) + s.into_bytes().into() } } -impl From<(String, Option)> for NixString { - fn from((s, ctx): (String, Option)) -> Self { - NixString(s.into(), ctx) +impl From<(T, Option)> for NixString +where + NixString: From, +{ + fn from((s, ctx): (T, Option)) -> Self { + NixString(NixString::from(s).0, ctx) } } impl From> for NixString { fn from(s: Box) -> Self { - Self(s.into_boxed_bytes().into_vec().into(), None) + s.into_boxed_bytes().into() } } @@ -234,12 +255,18 @@ impl<'a> From<&'a NixString> for &'a BStr { } } -impl From for BString { +impl From for Box { fn from(s: NixString) -> Self { s.0 } } +impl From for BString { + fn from(s: NixString) -> Self { + s.0.to_vec().into() + } +} + impl AsRef<[u8]> for NixString { fn as_ref(&self) -> &[u8] { &self.0 @@ -292,7 +319,7 @@ impl<'de> Deserialize<'de> for NixString { } impl Deref for NixString { - type Target = BString; + type Target = BStr; fn deref(&self) -> &Self::Target { &self.0 @@ -317,13 +344,19 @@ mod arbitrary { } impl NixString { - pub fn new_inherit_context_from(other: &NixString, new_contents: BString) -> Self { - Self(new_contents, other.1.clone()) + pub fn new_inherit_context_from(other: &NixString, new_contents: T) -> Self + where + NixString: From, + { + Self(Self::from(new_contents).0, other.1.clone()) } - pub fn new_context_from(context: NixContext, contents: BString) -> Self { + pub fn new_context_from(context: NixContext, contents: T) -> Self + where + NixString: From, + { Self( - contents, + Self::from(contents).0, if context.is_empty() { None } else { @@ -332,12 +365,8 @@ impl NixString { ) } - pub fn as_mut_bstring(&mut self) -> &mut BString { - &mut self.0 - } - pub fn as_bstr(&self) -> &BStr { - BStr::new(self.as_bytes()) + self } pub fn as_bytes(&self) -> &[u8] { @@ -345,7 +374,7 @@ impl NixString { } pub fn into_bstring(self) -> BString { - self.0 + (*self.0).to_owned() } /// Return a displayable representation of the string as an @@ -378,7 +407,7 @@ impl NixString { pub fn concat(&self, other: &Self) -> Self { let mut s = self.to_vec(); - s.extend(other.0.as_slice()); + s.extend(&(***other)); let context = [&self.1, &other.1] .into_iter() @@ -386,7 +415,7 @@ impl NixString { .fold(NixContext::new(), |acc_ctx, new_ctx| { acc_ctx.join(&mut new_ctx.clone()) }); - Self::new_context_from(context, s.into()) + Self::new_context_from(context, s) } pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> { @@ -514,6 +543,11 @@ mod tests { use crate::properties::{eq_laws, hash_laws, ord_laws}; + #[test] + fn size() { + assert_eq!(std::mem::size_of::(), 64); + } + eq_laws!(NixString); hash_laws!(NixString); ord_laws!(NixString); diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 88d616fe3a..6483122f96 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -558,7 +558,7 @@ where return frame.error( self, ErrorKind::AttributeNotFound { - name: (**key).clone().into_string_lossy() + name: key.to_str_lossy().into_owned() }, ); } -- cgit 1.4.1