about summary refs log tree commit diff
path: root/tvix/serde
diff options
context:
space:
mode:
authorAspen Smith <root@gws.fyi>2023-12-05T22·25-0500
committeraspen <root@gws.fyi>2024-01-31T14·51+0000
commit201173afaca7d70aa039a1e37a91c49af3a99b0b (patch)
treed661ca257820aca975339ee7d17dd1a08df85932 /tvix/serde
parent6f9e25943f3e2f83d191cadcc76a278073626fe8 (diff)
fix(tvix): Represent strings as byte arrays r/7460
C++ nix uses C-style zero-terminated char pointers to represent strings
internally - however, up to this point, tvix has used Rust `String` and
`str` for string values. Since those are required to be valid utf-8, we
haven't been able to properly represent all the string values that Nix
supports.

To fix that, this change converts the internal representation of the
NixString struct from `Box<str>` to `BString`, from the `bstr` crate -
this is a wrapper around a `Vec<u8>` with extra functions for treating
that byte vector as a "morally string-like" value, which is basically
exactly what we need.

Since this changes a pretty fundamental assumption about a pretty core
type, there are a *lot* of changes in a lot of places to make this work,
but I've tried to keep the general philosophy and intent of most of the
code in most places intact. Most notably, there's nothing that's been
done to make the derivation stuff in //tvix/glue work with non-utf8
strings everywhere, instead opting to just convert to String/str when
passing things into that - there *might* be something to be done there,
but I don't know what the rules should be and I don't want to figure
them out in this change.

To deal with OS-native paths in a way that also works in WASM for
tvixbolt, this also adds a dependency on the "os_str_bytes" crate.

Fixes: b/189
Fixes: b/337
Change-Id: I5e6eb29c62f47dd91af954f5e12bfc3d186f5526
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10200
Reviewed-by: tazjin <tazjin@tvl.su>
Reviewed-by: flokli <flokli@flokli.de>
Reviewed-by: sterni <sternenseemann@systemli.org>
Autosubmit: aspen <root@gws.fyi>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/serde')
-rw-r--r--tvix/serde/Cargo.toml3
-rw-r--r--tvix/serde/src/de.rs28
-rw-r--r--tvix/serde/src/de_tests.rs3
3 files changed, 25 insertions, 9 deletions
diff --git a/tvix/serde/Cargo.toml b/tvix/serde/Cargo.toml
index e535f6e8a3..5652126ada 100644
--- a/tvix/serde/Cargo.toml
+++ b/tvix/serde/Cargo.toml
@@ -5,4 +5,5 @@ edition = "2021"
 
 [dependencies]
 tvix-eval = { path = "../eval" }
-serde = { version = "1.0", features = ["derive"] }
\ No newline at end of file
+serde = { version = "1.0", features = ["derive"] }
+bstr = { version = "1.8.0", features = ["serde"] }
diff --git a/tvix/serde/src/de.rs b/tvix/serde/src/de.rs
index 15ab07c536..cf85ffab2e 100644
--- a/tvix/serde/src/de.rs
+++ b/tvix/serde/src/de.rs
@@ -1,5 +1,6 @@
 //! Deserialisation from Nix to Rust values.
 
+use bstr::ByteSlice;
 use serde::de::value::{MapDeserializer, SeqDeserializer};
 use serde::de::{self, EnumAccess, VariantAccess};
 pub use tvix_eval::Evaluation;
@@ -209,7 +210,7 @@ impl<'de> de::Deserializer<'de> for NixDeserializer {
         V: de::Visitor<'de>,
     {
         if let Value::String(s) = &self.value {
-            let chars = s.as_str().chars().collect::<Vec<_>>();
+            let chars = s.chars().collect::<Vec<_>>();
             if chars.len() == 1 {
                 return visitor.visit_char(chars[0]);
             }
@@ -223,7 +224,9 @@ impl<'de> de::Deserializer<'de> for NixDeserializer {
         V: de::Visitor<'de>,
     {
         if let Value::String(s) = &self.value {
-            return visitor.visit_str(s.as_str());
+            if let Ok(s) = s.to_str() {
+                return visitor.visit_str(s);
+            }
         }
 
         Err(unexpected("string", &self.value))
@@ -234,7 +237,9 @@ impl<'de> de::Deserializer<'de> for NixDeserializer {
         V: de::Visitor<'de>,
     {
         if let Value::String(s) = &self.value {
-            return visitor.visit_str(s.as_str());
+            if let Ok(s) = s.to_str() {
+                return visitor.visit_str(s);
+            }
         }
 
         Err(unexpected("string", &self.value))
@@ -379,7 +384,13 @@ impl<'de> de::Deserializer<'de> for NixDeserializer {
     {
         match self.value {
             // a string represents a unit variant
-            Value::String(s) => visitor.visit_enum(de::value::StrDeserializer::new(s.as_str())),
+            Value::String(ref s) => {
+                if let Ok(s) = s.to_str() {
+                    visitor.visit_enum(de::value::StrDeserializer::new(s))
+                } else {
+                    Err(unexpected(name, &self.value))
+                }
+            }
 
             // an attribute set however represents an externally
             // tagged enum with content
@@ -420,9 +431,12 @@ impl<'de> EnumAccess<'de> for Enum {
         }
 
         let (key, value) = self.0.into_iter().next().expect("length asserted above");
-        let val = seed.deserialize(de::value::StrDeserializer::<Error>::new(key.as_str()))?;
-
-        Ok((val, NixDeserializer::new(value)))
+        if let Ok(k) = key.to_str() {
+            let val = seed.deserialize(de::value::StrDeserializer::<Error>::new(k))?;
+            Ok((val, NixDeserializer::new(value)))
+        } else {
+            Err(unexpected("string", &key.clone().into()))
+        }
     }
 }
 
diff --git a/tvix/serde/src/de_tests.rs b/tvix/serde/src/de_tests.rs
index 807d953c77..54c2fdf8f7 100644
--- a/tvix/serde/src/de_tests.rs
+++ b/tvix/serde/src/de_tests.rs
@@ -213,6 +213,7 @@ fn deserialize_with_config() {
 
 #[builtins]
 mod test_builtins {
+    use bstr::ByteSlice;
     use tvix_eval::generators::{Gen, GenCo};
     use tvix_eval::{ErrorKind, NixString, Value};
 
@@ -220,7 +221,7 @@ mod test_builtins {
     pub async fn builtin_prepend_hello(co: GenCo, x: Value) -> Result<Value, ErrorKind> {
         match x {
             Value::String(s) => {
-                let new_string = NixString::from(format!("hello {}", s.as_str()));
+                let new_string = NixString::from(format!("hello {}", s.to_str().unwrap()));
                 Ok(Value::String(new_string))
             }
             _ => Err(ErrorKind::TypeError {