about summary refs log tree commit diff
path: root/tvix/glue
diff options
context:
space:
mode:
authorAspen Smith <root@gws.fyi>2023-12-05T22·25-0500
committeraspen <root@gws.fyi>2024-01-31T14·51+0000
commit201173afaca7d70aa039a1e37a91c49af3a99b0b (patch)
treed661ca257820aca975339ee7d17dd1a08df85932 /tvix/glue
parent6f9e25943f3e2f83d191cadcc76a278073626fe8 (diff)
fix(tvix): Represent strings as byte arrays r/7460
C++ nix uses C-style zero-terminated char pointers to represent strings
internally - however, up to this point, tvix has used Rust `String` and
`str` for string values. Since those are required to be valid utf-8, we
haven't been able to properly represent all the string values that Nix
supports.

To fix that, this change converts the internal representation of the
NixString struct from `Box<str>` to `BString`, from the `bstr` crate -
this is a wrapper around a `Vec<u8>` with extra functions for treating
that byte vector as a "morally string-like" value, which is basically
exactly what we need.

Since this changes a pretty fundamental assumption about a pretty core
type, there are a *lot* of changes in a lot of places to make this work,
but I've tried to keep the general philosophy and intent of most of the
code in most places intact. Most notably, there's nothing that's been
done to make the derivation stuff in //tvix/glue work with non-utf8
strings everywhere, instead opting to just convert to String/str when
passing things into that - there *might* be something to be done there,
but I don't know what the rules should be and I don't want to figure
them out in this change.

To deal with OS-native paths in a way that also works in WASM for
tvixbolt, this also adds a dependency on the "os_str_bytes" crate.

Fixes: b/189
Fixes: b/337
Change-Id: I5e6eb29c62f47dd91af954f5e12bfc3d186f5526
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10200
Reviewed-by: tazjin <tazjin@tvl.su>
Reviewed-by: flokli <flokli@flokli.de>
Reviewed-by: sterni <sternenseemann@systemli.org>
Autosubmit: aspen <root@gws.fyi>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/glue')
-rw-r--r--tvix/glue/src/builtins/derivation.rs72
-rw-r--r--tvix/glue/src/builtins/mod.rs11
-rw-r--r--tvix/glue/src/tvix_store_io.rs5
3 files changed, 48 insertions, 40 deletions
diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs
index 227b703f36..93a885bdd9 100644
--- a/tvix/glue/src/builtins/derivation.rs
+++ b/tvix/glue/src/builtins/derivation.rs
@@ -125,6 +125,7 @@ pub(crate) mod derivation_builtins {
     use std::collections::BTreeMap;
 
     use super::*;
+    use bstr::{ByteSlice, ByteVec};
     use nix_compat::store_path::hash_placeholder;
     use tvix_eval::generators::Gen;
     use tvix_eval::{NixContext, NixContextElement, NixString};
@@ -139,7 +140,7 @@ pub(crate) mod derivation_builtins {
             input
                 .to_str()
                 .context("looking at output name in builtins.placeholder")?
-                .as_str(),
+                .to_str()?,
         );
 
         Ok(placeholder.into())
@@ -167,10 +168,10 @@ pub(crate) mod derivation_builtins {
         }
 
         let name = name.to_str().context("determining derivation name")?;
-
         if name.is_empty() {
             return Err(ErrorKind::Abort("derivation has empty name".to_string()));
         }
+        let name = name.to_str()?;
 
         let mut drv = Derivation::default();
         drv.outputs.insert("out".to_string(), Default::default());
@@ -199,7 +200,11 @@ pub(crate) mod derivation_builtins {
 
         /// Inserts a key and value into the drv.environment BTreeMap, and fails if the
         /// key did already exist before.
-        fn insert_env(drv: &mut Derivation, k: &str, v: BString) -> Result<(), DerivationError> {
+        fn insert_env(
+            drv: &mut Derivation,
+            k: &str, /* TODO: non-utf8 env keys */
+            v: BString,
+        ) -> Result<(), DerivationError> {
             if drv.environment.insert(k.into(), v).is_some() {
                 return Err(DerivationError::DuplicateEnvVar(k.into()));
             }
@@ -228,6 +233,7 @@ pub(crate) mod derivation_builtins {
         // Some set special fields in the Derivation struct, some change
         // behaviour of other functionality.
         for (arg_name, arg_value) in input.clone().into_iter_sorted() {
+            let arg_name = arg_name.to_str()?;
             // force the current value.
             let value = generators::request_force(&co, arg_value).await;
 
@@ -236,7 +242,7 @@ pub(crate) mod derivation_builtins {
                 continue;
             }
 
-            match arg_name.as_str() {
+            match arg_name {
                 // Command line arguments to the builder.
                 // These are only set in drv.arguments.
                 "args" => {
@@ -245,7 +251,7 @@ pub(crate) mod derivation_builtins {
                             Err(cek) => return Ok(Value::Catchable(cek)),
                             Ok(s) => {
                                 input_context.mimic(&s);
-                                drv.arguments.push(s.as_str().to_string())
+                                drv.arguments.push((**s).clone().into_string()?)
                             }
                         }
                     }
@@ -274,18 +280,18 @@ pub(crate) mod derivation_builtins {
                         // Populate drv.outputs
                         if drv
                             .outputs
-                            .insert(output_name.as_str().to_string(), Default::default())
+                            .insert((**output_name).clone().into_string()?, Default::default())
                             .is_some()
                         {
                             Err(DerivationError::DuplicateOutput(
-                                output_name.as_str().into(),
+                                (**output_name).clone().into_string_lossy(),
                             ))?
                         }
-                        output_names.push(output_name.as_str().to_string());
+                        output_names.push((**output_name).clone().into_string()?);
                     }
 
                     // Add drv.environment[outputs] unconditionally.
-                    insert_env(&mut drv, arg_name.as_str(), output_names.join(" ").into())?;
+                    insert_env(&mut drv, arg_name, output_names.join(" ").into())?;
                     // drv.environment[$output_name] is added after the loop,
                     // with whatever is in drv.outputs[$output_name].
                 }
@@ -297,19 +303,21 @@ pub(crate) mod derivation_builtins {
                         Ok(val_str) => {
                             input_context.mimic(&val_str);
 
-                            if arg_name.as_str() == "builder" {
-                                drv.builder = val_str.as_str().to_owned();
+                            if arg_name == "builder" {
+                                drv.builder = (**val_str).clone().into_string()?;
                             } else {
-                                drv.system = val_str.as_str().to_owned();
+                                drv.system = (**val_str).clone().into_string()?;
                             }
 
                             // Either populate drv.environment or structured_attrs.
                             if let Some(ref mut structured_attrs) = structured_attrs {
                                 // No need to check for dups, we only iterate over every attribute name once
-                                structured_attrs
-                                    .insert(arg_name.as_str().into(), val_str.as_str().into());
+                                structured_attrs.insert(
+                                    arg_name.to_owned(),
+                                    (**val_str).clone().into_string()?.into(),
+                                );
                             } else {
-                                insert_env(&mut drv, arg_name.as_str(), val_str.as_bytes().into())?;
+                                insert_env(&mut drv, arg_name, val_str.as_bytes().into())?;
                             }
                         }
                     }
@@ -339,14 +347,14 @@ pub(crate) mod derivation_builtins {
                         };
 
                         // No need to check for dups, we only iterate over every attribute name once
-                        structured_attrs.insert(arg_name.as_str().to_string(), val_json);
+                        structured_attrs.insert(arg_name.to_owned(), val_json);
                     } else {
                         match strong_importing_coerce_to_string(&co, value).await {
                             Err(cek) => return Ok(Value::Catchable(cek)),
                             Ok(val_str) => {
                                 input_context.mimic(&val_str);
 
-                                insert_env(&mut drv, arg_name.as_str(), val_str.as_bytes().into())?;
+                                insert_env(&mut drv, arg_name, val_str.as_bytes().into())?;
                             }
                         }
                     }
@@ -365,7 +373,7 @@ pub(crate) mod derivation_builtins {
                 if let Some(attr) = attrs.select(key) {
                     match strong_importing_coerce_to_string(co, attr.clone()).await {
                         Err(cek) => return Ok(Err(cek)),
-                        Ok(str) => return Ok(Ok(Some(str.as_str().to_string()))),
+                        Ok(str) => return Ok(Ok(Some((**str).clone().into_string()?))),
                     }
                 }
 
@@ -438,11 +446,11 @@ pub(crate) mod derivation_builtins {
         });
 
         // Mutate the Derivation struct and set output paths
-        drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp)
+        drv.calculate_output_paths(name, &derivation_or_fod_hash_tmp)
             .map_err(DerivationError::InvalidDerivation)?;
 
         let drv_path = drv
-            .calculate_derivation_path(&name)
+            .calculate_derivation_path(name)
             .map_err(DerivationError::InvalidDerivation)?;
 
         // recompute the hash derivation modulo and add to known_paths
@@ -508,21 +516,23 @@ pub(crate) mod derivation_builtins {
             return Err(ErrorKind::UnexpectedContext);
         }
 
-        let path = nix_compat::store_path::build_text_path(
-            name.as_str(),
-            content.as_str(),
-            content.iter_plain(),
-        )
-        .map_err(|_e| {
-            nix_compat::derivation::DerivationError::InvalidOutputName(name.as_str().to_string())
-        })
-        .map_err(DerivationError::InvalidDerivation)?
-        .to_absolute_path();
+        let path =
+            nix_compat::store_path::build_text_path(name.to_str()?, &content, content.iter_plain())
+                .map_err(|_e| {
+                    nix_compat::derivation::DerivationError::InvalidOutputName(
+                        (**name).clone().into_string_lossy(),
+                    )
+                })
+                .map_err(DerivationError::InvalidDerivation)?
+                .to_absolute_path();
 
         let context: NixContext = NixContextElement::Plain(path.clone()).into();
 
         // TODO: actually persist the file in the store at that path ...
 
-        Ok(Value::String(NixString::new_context_from(context, &path)))
+        Ok(Value::String(NixString::new_context_from(
+            context,
+            path.into(),
+        )))
     }
 }
diff --git a/tvix/glue/src/builtins/mod.rs b/tvix/glue/src/builtins/mod.rs
index 58be31d7f8..c3c267a987 100644
--- a/tvix/glue/src/builtins/mod.rs
+++ b/tvix/glue/src/builtins/mod.rs
@@ -74,10 +74,7 @@ mod tests {
 
         match value {
             tvix_eval::Value::String(s) => {
-                assert_eq!(
-                    "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo",
-                    s.as_str()
-                );
+                assert_eq!(s, "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo",);
             }
             _ => panic!("unexpected value type: {:?}", value),
         }
@@ -162,7 +159,7 @@ mod tests {
 
         match value {
             tvix_eval::Value::String(s) => {
-                assert_eq!(expected_path, s.as_str());
+                assert_eq!(s, expected_path);
             }
             _ => panic!("unexpected value type: {:?}", value),
         }
@@ -285,7 +282,7 @@ mod tests {
 
         match value {
             tvix_eval::Value::String(s) => {
-                assert_eq!(expected_drvpath, s.as_str());
+                assert_eq!(s, expected_drvpath);
             }
 
             _ => panic!("unexpected value type: {:?}", value),
@@ -314,7 +311,7 @@ mod tests {
 
         match value {
             tvix_eval::Value::String(s) => {
-                assert_eq!(expected_path, s.as_str());
+                assert_eq!(s, expected_path);
             }
             _ => panic!("unexpected value type: {:?}", value),
         }
diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs
index 1277a1d977..fea336e235 100644
--- a/tvix/glue/src/tvix_store_io.rs
+++ b/tvix/glue/src/tvix_store_io.rs
@@ -297,6 +297,7 @@ impl EvalIO for TvixStoreIO {
 mod tests {
     use std::{path::Path, rc::Rc, sync::Arc};
 
+    use bstr::ByteVec;
     use tempfile::TempDir;
     use tvix_build::buildservice::DummyBuildService;
     use tvix_castore::{
@@ -355,7 +356,7 @@ mod tests {
 
         let value = result.value.expect("must be some");
         match value {
-            tvix_eval::Value::String(s) => return Some(s.as_str().to_owned()),
+            tvix_eval::Value::String(s) => Some((**s).clone().into_string_lossy()),
             _ => panic!("unexpected value type: {:?}", value),
         }
     }
@@ -421,7 +422,7 @@ mod tests {
 
         match value {
             tvix_eval::Value::String(s) => {
-                assert_eq!("/deep/thought", s.as_str());
+                assert_eq!(s, "/deep/thought");
             }
             _ => panic!("unexpected value type: {:?}", value),
         }