From 201173afaca7d70aa039a1e37a91c49af3a99b0b Mon Sep 17 00:00:00 2001 From: Aspen Smith Date: Tue, 5 Dec 2023 17:25:52 -0500 Subject: fix(tvix): Represent strings as byte arrays C++ nix uses C-style zero-terminated char pointers to represent strings internally - however, up to this point, tvix has used Rust `String` and `str` for string values. Since those are required to be valid utf-8, we haven't been able to properly represent all the string values that Nix supports. To fix that, this change converts the internal representation of the NixString struct from `Box` to `BString`, from the `bstr` crate - this is a wrapper around a `Vec` with extra functions for treating that byte vector as a "morally string-like" value, which is basically exactly what we need. Since this changes a pretty fundamental assumption about a pretty core type, there are a *lot* of changes in a lot of places to make this work, but I've tried to keep the general philosophy and intent of most of the code in most places intact. Most notably, there's nothing that's been done to make the derivation stuff in //tvix/glue work with non-utf8 strings everywhere, instead opting to just convert to String/str when passing things into that - there *might* be something to be done there, but I don't know what the rules should be and I don't want to figure them out in this change. To deal with OS-native paths in a way that also works in WASM for tvixbolt, this also adds a dependency on the "os_str_bytes" crate. Fixes: b/189 Fixes: b/337 Change-Id: I5e6eb29c62f47dd91af954f5e12bfc3d186f5526 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10200 Reviewed-by: tazjin Reviewed-by: flokli Reviewed-by: sterni Autosubmit: aspen Tested-by: BuildkiteCI --- tvix/glue/src/builtins/derivation.rs | 72 ++++++++++++++++++++---------------- tvix/glue/src/builtins/mod.rs | 11 ++---- tvix/glue/src/tvix_store_io.rs | 5 ++- 3 files changed, 48 insertions(+), 40 deletions(-) (limited to 'tvix/glue/src') diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index 227b703f36a4..93a885bdd994 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -125,6 +125,7 @@ pub(crate) mod derivation_builtins { use std::collections::BTreeMap; use super::*; + use bstr::{ByteSlice, ByteVec}; use nix_compat::store_path::hash_placeholder; use tvix_eval::generators::Gen; use tvix_eval::{NixContext, NixContextElement, NixString}; @@ -139,7 +140,7 @@ pub(crate) mod derivation_builtins { input .to_str() .context("looking at output name in builtins.placeholder")? - .as_str(), + .to_str()?, ); Ok(placeholder.into()) @@ -167,10 +168,10 @@ pub(crate) mod derivation_builtins { } let name = name.to_str().context("determining derivation name")?; - if name.is_empty() { return Err(ErrorKind::Abort("derivation has empty name".to_string())); } + let name = name.to_str()?; let mut drv = Derivation::default(); drv.outputs.insert("out".to_string(), Default::default()); @@ -199,7 +200,11 @@ pub(crate) mod derivation_builtins { /// Inserts a key and value into the drv.environment BTreeMap, and fails if the /// key did already exist before. - fn insert_env(drv: &mut Derivation, k: &str, v: BString) -> Result<(), DerivationError> { + fn insert_env( + drv: &mut Derivation, + k: &str, /* TODO: non-utf8 env keys */ + v: BString, + ) -> Result<(), DerivationError> { if drv.environment.insert(k.into(), v).is_some() { return Err(DerivationError::DuplicateEnvVar(k.into())); } @@ -228,6 +233,7 @@ pub(crate) mod derivation_builtins { // Some set special fields in the Derivation struct, some change // behaviour of other functionality. for (arg_name, arg_value) in input.clone().into_iter_sorted() { + let arg_name = arg_name.to_str()?; // force the current value. let value = generators::request_force(&co, arg_value).await; @@ -236,7 +242,7 @@ pub(crate) mod derivation_builtins { continue; } - match arg_name.as_str() { + match arg_name { // Command line arguments to the builder. // These are only set in drv.arguments. "args" => { @@ -245,7 +251,7 @@ pub(crate) mod derivation_builtins { Err(cek) => return Ok(Value::Catchable(cek)), Ok(s) => { input_context.mimic(&s); - drv.arguments.push(s.as_str().to_string()) + drv.arguments.push((**s).clone().into_string()?) } } } @@ -274,18 +280,18 @@ pub(crate) mod derivation_builtins { // Populate drv.outputs if drv .outputs - .insert(output_name.as_str().to_string(), Default::default()) + .insert((**output_name).clone().into_string()?, Default::default()) .is_some() { Err(DerivationError::DuplicateOutput( - output_name.as_str().into(), + (**output_name).clone().into_string_lossy(), ))? } - output_names.push(output_name.as_str().to_string()); + output_names.push((**output_name).clone().into_string()?); } // Add drv.environment[outputs] unconditionally. - insert_env(&mut drv, arg_name.as_str(), output_names.join(" ").into())?; + insert_env(&mut drv, arg_name, output_names.join(" ").into())?; // drv.environment[$output_name] is added after the loop, // with whatever is in drv.outputs[$output_name]. } @@ -297,19 +303,21 @@ pub(crate) mod derivation_builtins { Ok(val_str) => { input_context.mimic(&val_str); - if arg_name.as_str() == "builder" { - drv.builder = val_str.as_str().to_owned(); + if arg_name == "builder" { + drv.builder = (**val_str).clone().into_string()?; } else { - drv.system = val_str.as_str().to_owned(); + drv.system = (**val_str).clone().into_string()?; } // Either populate drv.environment or structured_attrs. if let Some(ref mut structured_attrs) = structured_attrs { // No need to check for dups, we only iterate over every attribute name once - structured_attrs - .insert(arg_name.as_str().into(), val_str.as_str().into()); + structured_attrs.insert( + arg_name.to_owned(), + (**val_str).clone().into_string()?.into(), + ); } else { - insert_env(&mut drv, arg_name.as_str(), val_str.as_bytes().into())?; + insert_env(&mut drv, arg_name, val_str.as_bytes().into())?; } } } @@ -339,14 +347,14 @@ pub(crate) mod derivation_builtins { }; // No need to check for dups, we only iterate over every attribute name once - structured_attrs.insert(arg_name.as_str().to_string(), val_json); + structured_attrs.insert(arg_name.to_owned(), val_json); } else { match strong_importing_coerce_to_string(&co, value).await { Err(cek) => return Ok(Value::Catchable(cek)), Ok(val_str) => { input_context.mimic(&val_str); - insert_env(&mut drv, arg_name.as_str(), val_str.as_bytes().into())?; + insert_env(&mut drv, arg_name, val_str.as_bytes().into())?; } } } @@ -365,7 +373,7 @@ pub(crate) mod derivation_builtins { if let Some(attr) = attrs.select(key) { match strong_importing_coerce_to_string(co, attr.clone()).await { Err(cek) => return Ok(Err(cek)), - Ok(str) => return Ok(Ok(Some(str.as_str().to_string()))), + Ok(str) => return Ok(Ok(Some((**str).clone().into_string()?))), } } @@ -438,11 +446,11 @@ pub(crate) mod derivation_builtins { }); // Mutate the Derivation struct and set output paths - drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp) + drv.calculate_output_paths(name, &derivation_or_fod_hash_tmp) .map_err(DerivationError::InvalidDerivation)?; let drv_path = drv - .calculate_derivation_path(&name) + .calculate_derivation_path(name) .map_err(DerivationError::InvalidDerivation)?; // recompute the hash derivation modulo and add to known_paths @@ -508,21 +516,23 @@ pub(crate) mod derivation_builtins { return Err(ErrorKind::UnexpectedContext); } - let path = nix_compat::store_path::build_text_path( - name.as_str(), - content.as_str(), - content.iter_plain(), - ) - .map_err(|_e| { - nix_compat::derivation::DerivationError::InvalidOutputName(name.as_str().to_string()) - }) - .map_err(DerivationError::InvalidDerivation)? - .to_absolute_path(); + let path = + nix_compat::store_path::build_text_path(name.to_str()?, &content, content.iter_plain()) + .map_err(|_e| { + nix_compat::derivation::DerivationError::InvalidOutputName( + (**name).clone().into_string_lossy(), + ) + }) + .map_err(DerivationError::InvalidDerivation)? + .to_absolute_path(); let context: NixContext = NixContextElement::Plain(path.clone()).into(); // TODO: actually persist the file in the store at that path ... - Ok(Value::String(NixString::new_context_from(context, &path))) + Ok(Value::String(NixString::new_context_from( + context, + path.into(), + ))) } } diff --git a/tvix/glue/src/builtins/mod.rs b/tvix/glue/src/builtins/mod.rs index 58be31d7f87b..c3c267a98782 100644 --- a/tvix/glue/src/builtins/mod.rs +++ b/tvix/glue/src/builtins/mod.rs @@ -74,10 +74,7 @@ mod tests { match value { tvix_eval::Value::String(s) => { - assert_eq!( - "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo", - s.as_str() - ); + assert_eq!(s, "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo",); } _ => panic!("unexpected value type: {:?}", value), } @@ -162,7 +159,7 @@ mod tests { match value { tvix_eval::Value::String(s) => { - assert_eq!(expected_path, s.as_str()); + assert_eq!(s, expected_path); } _ => panic!("unexpected value type: {:?}", value), } @@ -285,7 +282,7 @@ mod tests { match value { tvix_eval::Value::String(s) => { - assert_eq!(expected_drvpath, s.as_str()); + assert_eq!(s, expected_drvpath); } _ => panic!("unexpected value type: {:?}", value), @@ -314,7 +311,7 @@ mod tests { match value { tvix_eval::Value::String(s) => { - assert_eq!(expected_path, s.as_str()); + assert_eq!(s, expected_path); } _ => panic!("unexpected value type: {:?}", value), } diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index 1277a1d97792..fea336e2350b 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -297,6 +297,7 @@ impl EvalIO for TvixStoreIO { mod tests { use std::{path::Path, rc::Rc, sync::Arc}; + use bstr::ByteVec; use tempfile::TempDir; use tvix_build::buildservice::DummyBuildService; use tvix_castore::{ @@ -355,7 +356,7 @@ mod tests { let value = result.value.expect("must be some"); match value { - tvix_eval::Value::String(s) => return Some(s.as_str().to_owned()), + tvix_eval::Value::String(s) => Some((**s).clone().into_string_lossy()), _ => panic!("unexpected value type: {:?}", value), } } @@ -421,7 +422,7 @@ mod tests { match value { tvix_eval::Value::String(s) => { - assert_eq!("/deep/thought", s.as_str()); + assert_eq!(s, "/deep/thought"); } _ => panic!("unexpected value type: {:?}", value), } -- cgit 1.4.1