From e3c92ac3b4b07a7397b565738ec4237b9bf621f6 Mon Sep 17 00:00:00 2001 From: Aspen Smith Date: Sat, 10 Feb 2024 11:29:58 -0500 Subject: fix(tvix/eval): Replace inner NixString repr with Box Storing a full BString here incurs the extra overhead of the capacity for the inner byte-vector, which we basically never use as Nix strings are immutable (and we don't do any mutation / sharing analysis). Switching to a Box cuts us from 72 bytes to 64 bytes per string (and there are a lot of strings!) Change-Id: I11f34c14a08fa02759f260b1c78b2a2b981714e4 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10794 Autosubmit: aspen Reviewed-by: flokli Tested-by: BuildkiteCI --- tvix/eval/src/builtins/mod.rs | 46 ++++++++++----------- tvix/eval/src/value/string.rs | 80 +++++++++++++++++++++++++----------- tvix/eval/src/vm/mod.rs | 2 +- tvix/glue/src/builtins/derivation.rs | 25 +++++------ tvix/glue/src/tvix_store_io.rs | 4 +- 5 files changed, 92 insertions(+), 65 deletions(-) diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 3048b32fb5..a0b990ec54 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -3,7 +3,7 @@ //! See //tvix/eval/docs/builtins.md for a some context on the //! available builtins in Nix. -use bstr::ByteVec; +use bstr::{ByteSlice, ByteVec}; use builtin_macros::builtins; use genawaiter::rc::Gen; use imbl::OrdMap; @@ -67,7 +67,7 @@ pub async fn coerce_value_to_path( .await { Ok(vs) => { - let path = (**vs).clone().into_path_buf()?; + let path = vs.to_path()?.to_owned(); if path.is_absolute() { Ok(Ok(path)) } else { @@ -82,7 +82,7 @@ pub async fn coerce_value_to_path( mod pure_builtins { use std::ffi::OsString; - use bstr::{BString, ByteSlice}; + use bstr::{BString, ByteSlice, B}; use imbl::Vector; use itertools::Itertools; use os_str_bytes::OsStringBytes; @@ -171,26 +171,23 @@ mod pure_builtins { #[builtin("baseNameOf")] async fn builtin_base_name_of(co: GenCo, s: Value) -> Result { let span = generators::request_span(&co).await; - let mut s = match s { - val @ Value::Catchable(_) => return Ok(val), - _ => s - .coerce_to_string( - co, - CoercionKind { - strong: false, - import_paths: false, - }, - span, - ) - .await? - .to_contextful_str()?, - }; + let s = s + .coerce_to_string( + co, + CoercionKind { + strong: false, + import_paths: false, + }, + span, + ) + .await? + .to_contextful_str()?; - let bs = s.as_mut_bstring(); + let mut bs = (**s).to_owned(); if let Some(last_slash) = bs.rfind_char('/') { - *bs = bs[(last_slash + 1)..].into(); + bs = bs[(last_slash + 1)..].into(); } - Ok(s.into()) + Ok(NixString::new_inherit_context_from(&s, bs).into()) } #[builtin("bitAnd")] @@ -344,7 +341,7 @@ mod pure_builtins { .map(|last_slash| { let x = &str[..last_slash]; if x.is_empty() { - b"/" + B("/") } else { x } @@ -356,8 +353,7 @@ mod pure_builtins { )))) } else { Ok(Value::from(NixString::new_inherit_context_from( - &str, - result.into(), + &str, result, ))) } } @@ -1190,7 +1186,7 @@ mod pure_builtins { // push the unmatched characters preceding the match ret.push_back(Value::from(NixString::new_inherit_context_from( &s, - (&text[pos..thematch.start()]).into(), + &text[pos..thematch.start()], ))); // Push a list with one element for each capture @@ -1363,7 +1359,7 @@ mod pure_builtins { Ok(Value::from(NixString::new_inherit_context_from( &x, - (&x[beg..end]).into(), + &x[beg..end], ))) } diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index e2767dd22c..5a7783b8fd 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -146,7 +146,7 @@ impl NixContext { // FIXME: when serializing, ignore the context? #[derive(Clone, Debug, Serialize)] -pub struct NixString(BString, Option); +pub struct NixString(Box, Option); impl PartialEq for NixString { fn eq(&self, other: &Self) -> bool { @@ -180,45 +180,66 @@ impl Ord for NixString { } } +impl From> for NixString { + fn from(value: Box) -> Self { + Self(value, None) + } +} + +impl From for NixString { + fn from(value: BString) -> Self { + Self(Vec::::from(value).into_boxed_slice().into(), None) + } +} + impl From<&BStr> for NixString { fn from(value: &BStr) -> Self { - Self(value.to_owned(), None) + value.to_owned().into() } } impl From<&[u8]> for NixString { fn from(value: &[u8]) -> Self { - Self(value.into(), None) + Self::from(value.to_owned()) } } impl From> for NixString { fn from(value: Vec) -> Self { + value.into_boxed_slice().into() + } +} + +impl From> for NixString { + fn from(value: Box<[u8]>) -> Self { Self(value.into(), None) } } impl From<&str> for NixString { fn from(s: &str) -> Self { - Self(s.as_bytes().into(), None) + s.as_bytes().into() } } impl From for NixString { fn from(s: String) -> Self { - Self(s.into(), None) + s.into_bytes().into() } } -impl From<(String, Option)> for NixString { - fn from((s, ctx): (String, Option)) -> Self { - NixString(s.into(), ctx) +impl From<(T, Option)> for NixString +where + NixString: From, +{ + fn from((s, ctx): (T, Option)) -> Self { + NixString(NixString::from(s).0, ctx) } } impl From> for NixString { fn from(s: Box) -> Self { - Self(s.into_boxed_bytes().into_vec().into(), None) + s.into_boxed_bytes().into() } } @@ -234,12 +255,18 @@ impl<'a> From<&'a NixString> for &'a BStr { } } -impl From for BString { +impl From for Box { fn from(s: NixString) -> Self { s.0 } } +impl From for BString { + fn from(s: NixString) -> Self { + s.0.to_vec().into() + } +} + impl AsRef<[u8]> for NixString { fn as_ref(&self) -> &[u8] { &self.0 @@ -292,7 +319,7 @@ impl<'de> Deserialize<'de> for NixString { } impl Deref for NixString { - type Target = BString; + type Target = BStr; fn deref(&self) -> &Self::Target { &self.0 @@ -317,13 +344,19 @@ mod arbitrary { } impl NixString { - pub fn new_inherit_context_from(other: &NixString, new_contents: BString) -> Self { - Self(new_contents, other.1.clone()) + pub fn new_inherit_context_from(other: &NixString, new_contents: T) -> Self + where + NixString: From, + { + Self(Self::from(new_contents).0, other.1.clone()) } - pub fn new_context_from(context: NixContext, contents: BString) -> Self { + pub fn new_context_from(context: NixContext, contents: T) -> Self + where + NixString: From, + { Self( - contents, + Self::from(contents).0, if context.is_empty() { None } else { @@ -332,12 +365,8 @@ impl NixString { ) } - pub fn as_mut_bstring(&mut self) -> &mut BString { - &mut self.0 - } - pub fn as_bstr(&self) -> &BStr { - BStr::new(self.as_bytes()) + self } pub fn as_bytes(&self) -> &[u8] { @@ -345,7 +374,7 @@ impl NixString { } pub fn into_bstring(self) -> BString { - self.0 + (*self.0).to_owned() } /// Return a displayable representation of the string as an @@ -378,7 +407,7 @@ impl NixString { pub fn concat(&self, other: &Self) -> Self { let mut s = self.to_vec(); - s.extend(other.0.as_slice()); + s.extend(&(***other)); let context = [&self.1, &other.1] .into_iter() @@ -386,7 +415,7 @@ impl NixString { .fold(NixContext::new(), |acc_ctx, new_ctx| { acc_ctx.join(&mut new_ctx.clone()) }); - Self::new_context_from(context, s.into()) + Self::new_context_from(context, s) } pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> { @@ -514,6 +543,11 @@ mod tests { use crate::properties::{eq_laws, hash_laws, ord_laws}; + #[test] + fn size() { + assert_eq!(std::mem::size_of::(), 64); + } + eq_laws!(NixString); hash_laws!(NixString); ord_laws!(NixString); diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 88d616fe3a..6483122f96 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -558,7 +558,7 @@ where return frame.error( self, ErrorKind::AttributeNotFound { - name: (**key).clone().into_string_lossy() + name: key.to_str_lossy().into_owned() }, ); } diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index b1487f4505..299d064c40 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -125,7 +125,7 @@ pub(crate) mod derivation_builtins { use std::collections::BTreeMap; use super::*; - use bstr::{ByteSlice, ByteVec}; + use bstr::ByteSlice; use nix_compat::store_path::hash_placeholder; use tvix_eval::generators::Gen; use tvix_eval::{NixContext, NixContextElement, NixString}; @@ -251,7 +251,7 @@ pub(crate) mod derivation_builtins { Err(cek) => return Ok(Value::Catchable(cek)), Ok(s) => { input_context.mimic(&s); - drv.arguments.push((**s).clone().into_string()?) + drv.arguments.push(s.to_str()?.to_owned()) } } } @@ -280,14 +280,14 @@ pub(crate) mod derivation_builtins { // Populate drv.outputs if drv .outputs - .insert((**output_name).clone().into_string()?, Default::default()) + .insert(output_name.to_str()?.to_owned(), Default::default()) .is_some() { Err(DerivationError::DuplicateOutput( - (**output_name).clone().into_string_lossy(), + output_name.to_str_lossy().into_owned(), ))? } - output_names.push((**output_name).clone().into_string()?); + output_names.push(output_name.to_str()?.to_owned()); } // Add drv.environment[outputs] unconditionally. @@ -304,9 +304,9 @@ pub(crate) mod derivation_builtins { input_context.mimic(&val_str); if arg_name == "builder" { - drv.builder = (**val_str).clone().into_string()?; + drv.builder = val_str.to_str()?.to_owned(); } else { - drv.system = (**val_str).clone().into_string()?; + drv.system = val_str.to_str()?.to_owned(); } // Either populate drv.environment or structured_attrs. @@ -314,7 +314,7 @@ pub(crate) mod derivation_builtins { // No need to check for dups, we only iterate over every attribute name once structured_attrs.insert( arg_name.to_owned(), - (**val_str).clone().into_string()?.into(), + val_str.to_str()?.to_owned().into(), ); } else { insert_env(&mut drv, arg_name, val_str.as_bytes().into())?; @@ -373,7 +373,7 @@ pub(crate) mod derivation_builtins { if let Some(attr) = attrs.select(key) { match strong_importing_coerce_to_string(co, attr.clone()).await { Err(cek) => return Ok(Err(cek)), - Ok(str) => return Ok(Ok(Some((**str).clone().into_string()?))), + Ok(str) => return Ok(Ok(Some(str.to_str()?.to_owned()))), } } @@ -520,7 +520,7 @@ pub(crate) mod derivation_builtins { nix_compat::store_path::build_text_path(name.to_str()?, &content, content.iter_plain()) .map_err(|_e| { nix_compat::derivation::DerivationError::InvalidOutputName( - (**name).clone().into_string_lossy(), + name.to_str_lossy().into_owned(), ) }) .map_err(DerivationError::InvalidDerivation)? @@ -530,9 +530,6 @@ pub(crate) mod derivation_builtins { // TODO: actually persist the file in the store at that path ... - Ok(Value::from(NixString::new_context_from( - context, - path.into(), - ))) + Ok(Value::from(NixString::new_context_from(context, path))) } } diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index 4146ededb4..ed06eba896 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -297,7 +297,7 @@ impl EvalIO for TvixStoreIO { mod tests { use std::{path::Path, rc::Rc, sync::Arc}; - use bstr::ByteVec; + use bstr::ByteSlice; use tempfile::TempDir; use tvix_build::buildservice::DummyBuildService; use tvix_castore::{ @@ -356,7 +356,7 @@ mod tests { let value = result.value.expect("must be some"); match value { - tvix_eval::Value::String(s) => Some((***s).clone().into_string_lossy()), + tvix_eval::Value::String(s) => Some(s.to_str_lossy().into_owned()), _ => panic!("unexpected value type: {:?}", value), } } -- cgit 1.4.1