about summary refs log tree commit diff
path: root/tvix
diff options
context:
space:
mode:
authorAspen Smith <root@gws.fyi>2024-02-10T16·29-0500
committerclbot <clbot@tvl.fyi>2024-02-13T16·49+0000
commite3c92ac3b4b07a7397b565738ec4237b9bf621f6 (patch)
tree0cb85c787223782f3eb5fc628e7ffd0ed309f122 /tvix
parent24a089f87d8a80a2d31c5440b40f72ee466417e7 (diff)
fix(tvix/eval): Replace inner NixString repr with Box<Bstr> r/7506
Storing a full BString here incurs the extra overhead of the capacity
for the inner byte-vector, which we basically never use as Nix strings
are immutable (and we don't do any mutation / sharing analysis).
Switching to a Box<BStr> cuts us from 72 bytes to 64 bytes per
string (and there are a lot of strings!)

Change-Id: I11f34c14a08fa02759f260b1c78b2a2b981714e4
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10794
Autosubmit: aspen <root@gws.fyi>
Reviewed-by: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix')
-rw-r--r--tvix/eval/src/builtins/mod.rs46
-rw-r--r--tvix/eval/src/value/string.rs80
-rw-r--r--tvix/eval/src/vm/mod.rs2
-rw-r--r--tvix/glue/src/builtins/derivation.rs25
-rw-r--r--tvix/glue/src/tvix_store_io.rs4
5 files changed, 92 insertions, 65 deletions
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index 3048b32fb550..a0b990ec542c 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -3,7 +3,7 @@
 //! See //tvix/eval/docs/builtins.md for a some context on the
 //! available builtins in Nix.
 
-use bstr::ByteVec;
+use bstr::{ByteSlice, ByteVec};
 use builtin_macros::builtins;
 use genawaiter::rc::Gen;
 use imbl::OrdMap;
@@ -67,7 +67,7 @@ pub async fn coerce_value_to_path(
     .await
     {
         Ok(vs) => {
-            let path = (**vs).clone().into_path_buf()?;
+            let path = vs.to_path()?.to_owned();
             if path.is_absolute() {
                 Ok(Ok(path))
             } else {
@@ -82,7 +82,7 @@ pub async fn coerce_value_to_path(
 mod pure_builtins {
     use std::ffi::OsString;
 
-    use bstr::{BString, ByteSlice};
+    use bstr::{BString, ByteSlice, B};
     use imbl::Vector;
     use itertools::Itertools;
     use os_str_bytes::OsStringBytes;
@@ -171,26 +171,23 @@ mod pure_builtins {
     #[builtin("baseNameOf")]
     async fn builtin_base_name_of(co: GenCo, s: Value) -> Result<Value, ErrorKind> {
         let span = generators::request_span(&co).await;
-        let mut s = match s {
-            val @ Value::Catchable(_) => return Ok(val),
-            _ => s
-                .coerce_to_string(
-                    co,
-                    CoercionKind {
-                        strong: false,
-                        import_paths: false,
-                    },
-                    span,
-                )
-                .await?
-                .to_contextful_str()?,
-        };
+        let s = s
+            .coerce_to_string(
+                co,
+                CoercionKind {
+                    strong: false,
+                    import_paths: false,
+                },
+                span,
+            )
+            .await?
+            .to_contextful_str()?;
 
-        let bs = s.as_mut_bstring();
+        let mut bs = (**s).to_owned();
         if let Some(last_slash) = bs.rfind_char('/') {
-            *bs = bs[(last_slash + 1)..].into();
+            bs = bs[(last_slash + 1)..].into();
         }
-        Ok(s.into())
+        Ok(NixString::new_inherit_context_from(&s, bs).into())
     }
 
     #[builtin("bitAnd")]
@@ -344,7 +341,7 @@ mod pure_builtins {
             .map(|last_slash| {
                 let x = &str[..last_slash];
                 if x.is_empty() {
-                    b"/"
+                    B("/")
                 } else {
                     x
                 }
@@ -356,8 +353,7 @@ mod pure_builtins {
             ))))
         } else {
             Ok(Value::from(NixString::new_inherit_context_from(
-                &str,
-                result.into(),
+                &str, result,
             )))
         }
     }
@@ -1190,7 +1186,7 @@ mod pure_builtins {
             // push the unmatched characters preceding the match
             ret.push_back(Value::from(NixString::new_inherit_context_from(
                 &s,
-                (&text[pos..thematch.start()]).into(),
+                &text[pos..thematch.start()],
             )));
 
             // Push a list with one element for each capture
@@ -1363,7 +1359,7 @@ mod pure_builtins {
 
         Ok(Value::from(NixString::new_inherit_context_from(
             &x,
-            (&x[beg..end]).into(),
+            &x[beg..end],
         )))
     }
 
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs
index e2767dd22cdb..5a7783b8fd72 100644
--- a/tvix/eval/src/value/string.rs
+++ b/tvix/eval/src/value/string.rs
@@ -146,7 +146,7 @@ impl NixContext {
 
 // FIXME: when serializing, ignore the context?
 #[derive(Clone, Debug, Serialize)]
-pub struct NixString(BString, Option<NixContext>);
+pub struct NixString(Box<BStr>, Option<NixContext>);
 
 impl PartialEq for NixString {
     fn eq(&self, other: &Self) -> bool {
@@ -180,45 +180,66 @@ impl Ord for NixString {
     }
 }
 
+impl From<Box<BStr>> for NixString {
+    fn from(value: Box<BStr>) -> Self {
+        Self(value, None)
+    }
+}
+
+impl From<BString> for NixString {
+    fn from(value: BString) -> Self {
+        Self(Vec::<u8>::from(value).into_boxed_slice().into(), None)
+    }
+}
+
 impl From<&BStr> for NixString {
     fn from(value: &BStr) -> Self {
-        Self(value.to_owned(), None)
+        value.to_owned().into()
     }
 }
 
 impl From<&[u8]> for NixString {
     fn from(value: &[u8]) -> Self {
-        Self(value.into(), None)
+        Self::from(value.to_owned())
     }
 }
 
 impl From<Vec<u8>> for NixString {
     fn from(value: Vec<u8>) -> Self {
+        value.into_boxed_slice().into()
+    }
+}
+
+impl From<Box<[u8]>> for NixString {
+    fn from(value: Box<[u8]>) -> Self {
         Self(value.into(), None)
     }
 }
 
 impl From<&str> for NixString {
     fn from(s: &str) -> Self {
-        Self(s.as_bytes().into(), None)
+        s.as_bytes().into()
     }
 }
 
 impl From<String> for NixString {
     fn from(s: String) -> Self {
-        Self(s.into(), None)
+        s.into_bytes().into()
     }
 }
 
-impl From<(String, Option<NixContext>)> for NixString {
-    fn from((s, ctx): (String, Option<NixContext>)) -> Self {
-        NixString(s.into(), ctx)
+impl<T> From<(T, Option<NixContext>)> for NixString
+where
+    NixString: From<T>,
+{
+    fn from((s, ctx): (T, Option<NixContext>)) -> Self {
+        NixString(NixString::from(s).0, ctx)
     }
 }
 
 impl From<Box<str>> for NixString {
     fn from(s: Box<str>) -> Self {
-        Self(s.into_boxed_bytes().into_vec().into(), None)
+        s.into_boxed_bytes().into()
     }
 }
 
@@ -234,12 +255,18 @@ impl<'a> From<&'a NixString> for &'a BStr {
     }
 }
 
-impl From<NixString> for BString {
+impl From<NixString> for Box<BStr> {
     fn from(s: NixString) -> Self {
         s.0
     }
 }
 
+impl From<NixString> for BString {
+    fn from(s: NixString) -> Self {
+        s.0.to_vec().into()
+    }
+}
+
 impl AsRef<[u8]> for NixString {
     fn as_ref(&self) -> &[u8] {
         &self.0
@@ -292,7 +319,7 @@ impl<'de> Deserialize<'de> for NixString {
 }
 
 impl Deref for NixString {
-    type Target = BString;
+    type Target = BStr;
 
     fn deref(&self) -> &Self::Target {
         &self.0
@@ -317,13 +344,19 @@ mod arbitrary {
 }
 
 impl NixString {
-    pub fn new_inherit_context_from(other: &NixString, new_contents: BString) -> Self {
-        Self(new_contents, other.1.clone())
+    pub fn new_inherit_context_from<T>(other: &NixString, new_contents: T) -> Self
+    where
+        NixString: From<T>,
+    {
+        Self(Self::from(new_contents).0, other.1.clone())
     }
 
-    pub fn new_context_from(context: NixContext, contents: BString) -> Self {
+    pub fn new_context_from<T>(context: NixContext, contents: T) -> Self
+    where
+        NixString: From<T>,
+    {
         Self(
-            contents,
+            Self::from(contents).0,
             if context.is_empty() {
                 None
             } else {
@@ -332,12 +365,8 @@ impl NixString {
         )
     }
 
-    pub fn as_mut_bstring(&mut self) -> &mut BString {
-        &mut self.0
-    }
-
     pub fn as_bstr(&self) -> &BStr {
-        BStr::new(self.as_bytes())
+        self
     }
 
     pub fn as_bytes(&self) -> &[u8] {
@@ -345,7 +374,7 @@ impl NixString {
     }
 
     pub fn into_bstring(self) -> BString {
-        self.0
+        (*self.0).to_owned()
     }
 
     /// Return a displayable representation of the string as an
@@ -378,7 +407,7 @@ impl NixString {
 
     pub fn concat(&self, other: &Self) -> Self {
         let mut s = self.to_vec();
-        s.extend(other.0.as_slice());
+        s.extend(&(***other));
 
         let context = [&self.1, &other.1]
             .into_iter()
@@ -386,7 +415,7 @@ impl NixString {
             .fold(NixContext::new(), |acc_ctx, new_ctx| {
                 acc_ctx.join(&mut new_ctx.clone())
             });
-        Self::new_context_from(context, s.into())
+        Self::new_context_from(context, s)
     }
 
     pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> {
@@ -514,6 +543,11 @@ mod tests {
 
     use crate::properties::{eq_laws, hash_laws, ord_laws};
 
+    #[test]
+    fn size() {
+        assert_eq!(std::mem::size_of::<NixString>(), 64);
+    }
+
     eq_laws!(NixString);
     hash_laws!(NixString);
     ord_laws!(NixString);
diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs
index 88d616fe3adc..6483122f96f6 100644
--- a/tvix/eval/src/vm/mod.rs
+++ b/tvix/eval/src/vm/mod.rs
@@ -558,7 +558,7 @@ where
                                 return frame.error(
                                     self,
                                     ErrorKind::AttributeNotFound {
-                                        name: (**key).clone().into_string_lossy()
+                                        name: key.to_str_lossy().into_owned()
                                     },
                                 );
                             }
diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs
index b1487f4505e0..299d064c40d6 100644
--- a/tvix/glue/src/builtins/derivation.rs
+++ b/tvix/glue/src/builtins/derivation.rs
@@ -125,7 +125,7 @@ pub(crate) mod derivation_builtins {
     use std::collections::BTreeMap;
 
     use super::*;
-    use bstr::{ByteSlice, ByteVec};
+    use bstr::ByteSlice;
     use nix_compat::store_path::hash_placeholder;
     use tvix_eval::generators::Gen;
     use tvix_eval::{NixContext, NixContextElement, NixString};
@@ -251,7 +251,7 @@ pub(crate) mod derivation_builtins {
                             Err(cek) => return Ok(Value::Catchable(cek)),
                             Ok(s) => {
                                 input_context.mimic(&s);
-                                drv.arguments.push((**s).clone().into_string()?)
+                                drv.arguments.push(s.to_str()?.to_owned())
                             }
                         }
                     }
@@ -280,14 +280,14 @@ pub(crate) mod derivation_builtins {
                         // Populate drv.outputs
                         if drv
                             .outputs
-                            .insert((**output_name).clone().into_string()?, Default::default())
+                            .insert(output_name.to_str()?.to_owned(), Default::default())
                             .is_some()
                         {
                             Err(DerivationError::DuplicateOutput(
-                                (**output_name).clone().into_string_lossy(),
+                                output_name.to_str_lossy().into_owned(),
                             ))?
                         }
-                        output_names.push((**output_name).clone().into_string()?);
+                        output_names.push(output_name.to_str()?.to_owned());
                     }
 
                     // Add drv.environment[outputs] unconditionally.
@@ -304,9 +304,9 @@ pub(crate) mod derivation_builtins {
                             input_context.mimic(&val_str);
 
                             if arg_name == "builder" {
-                                drv.builder = (**val_str).clone().into_string()?;
+                                drv.builder = val_str.to_str()?.to_owned();
                             } else {
-                                drv.system = (**val_str).clone().into_string()?;
+                                drv.system = val_str.to_str()?.to_owned();
                             }
 
                             // Either populate drv.environment or structured_attrs.
@@ -314,7 +314,7 @@ pub(crate) mod derivation_builtins {
                                 // No need to check for dups, we only iterate over every attribute name once
                                 structured_attrs.insert(
                                     arg_name.to_owned(),
-                                    (**val_str).clone().into_string()?.into(),
+                                    val_str.to_str()?.to_owned().into(),
                                 );
                             } else {
                                 insert_env(&mut drv, arg_name, val_str.as_bytes().into())?;
@@ -373,7 +373,7 @@ pub(crate) mod derivation_builtins {
                 if let Some(attr) = attrs.select(key) {
                     match strong_importing_coerce_to_string(co, attr.clone()).await {
                         Err(cek) => return Ok(Err(cek)),
-                        Ok(str) => return Ok(Ok(Some((**str).clone().into_string()?))),
+                        Ok(str) => return Ok(Ok(Some(str.to_str()?.to_owned()))),
                     }
                 }
 
@@ -520,7 +520,7 @@ pub(crate) mod derivation_builtins {
             nix_compat::store_path::build_text_path(name.to_str()?, &content, content.iter_plain())
                 .map_err(|_e| {
                     nix_compat::derivation::DerivationError::InvalidOutputName(
-                        (**name).clone().into_string_lossy(),
+                        name.to_str_lossy().into_owned(),
                     )
                 })
                 .map_err(DerivationError::InvalidDerivation)?
@@ -530,9 +530,6 @@ pub(crate) mod derivation_builtins {
 
         // TODO: actually persist the file in the store at that path ...
 
-        Ok(Value::from(NixString::new_context_from(
-            context,
-            path.into(),
-        )))
+        Ok(Value::from(NixString::new_context_from(context, path)))
     }
 }
diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs
index 4146ededb40c..ed06eba89686 100644
--- a/tvix/glue/src/tvix_store_io.rs
+++ b/tvix/glue/src/tvix_store_io.rs
@@ -297,7 +297,7 @@ impl EvalIO for TvixStoreIO {
 mod tests {
     use std::{path::Path, rc::Rc, sync::Arc};
 
-    use bstr::ByteVec;
+    use bstr::ByteSlice;
     use tempfile::TempDir;
     use tvix_build::buildservice::DummyBuildService;
     use tvix_castore::{
@@ -356,7 +356,7 @@ mod tests {
 
         let value = result.value.expect("must be some");
         match value {
-            tvix_eval::Value::String(s) => Some((***s).clone().into_string_lossy()),
+            tvix_eval::Value::String(s) => Some(s.to_str_lossy().into_owned()),
             _ => panic!("unexpected value type: {:?}", value),
         }
     }