about summary refs log tree commit diff
path: root/tvix/eval
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/eval')
-rw-r--r--tvix/eval/Cargo.toml2
-rw-r--r--tvix/eval/src/builtins/impure.rs7
-rw-r--r--tvix/eval/src/builtins/mod.rs109
-rw-r--r--tvix/eval/src/builtins/to_xml.rs9
-rw-r--r--tvix/eval/src/builtins/versions.rs38
-rw-r--r--tvix/eval/src/compiler/import.rs5
-rw-r--r--tvix/eval/src/compiler/mod.rs2
-rw-r--r--tvix/eval/src/errors.rs27
-rw-r--r--tvix/eval/src/value/attrs.rs41
-rw-r--r--tvix/eval/src/value/attrs/tests.rs4
-rw-r--r--tvix/eval/src/value/json.rs7
-rw-r--r--tvix/eval/src/value/mod.rs28
-rw-r--r--tvix/eval/src/value/string.rs149
-rw-r--r--tvix/eval/src/vm/mod.rs25
14 files changed, 280 insertions, 173 deletions
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml
index d347d0ea687a..4f4551a349d2 100644
--- a/tvix/eval/Cargo.toml
+++ b/tvix/eval/Cargo.toml
@@ -11,6 +11,7 @@ name = "tvix_eval"
 [dependencies]
 builtin-macros = { path = "./builtin-macros", package = "tvix-eval-builtin-macros" }
 bytes = "1.4.0"
+bstr = { version = "1.8.0", features = ["serde"] }
 codemap = "0.1.3"
 codemap-diagnostic = "0.1.1"
 dirs = "4.0.0"
@@ -19,6 +20,7 @@ imbl = { version = "2.0", features = [ "serde" ] }
 itertools = "0.12.0"
 lazy_static = "1.4.0"
 lexical-core = { version = "0.8.5", features = ["format", "parse-floats"] }
+os_str_bytes = { version = "6.3", features = ["conversions"] }
 path-clean = "0.1"
 proptest = { version = "1.3.0", default_features = false, features = ["std", "alloc", "tempfile"], optional = true }
 regex = "1.6.0"
diff --git a/tvix/eval/src/builtins/impure.rs b/tvix/eval/src/builtins/impure.rs
index 5324fd565304..28b8697644dd 100644
--- a/tvix/eval/src/builtins/impure.rs
+++ b/tvix/eval/src/builtins/impure.rs
@@ -17,12 +17,17 @@ use crate::{
 
 #[builtins]
 mod impure_builtins {
+    use std::ffi::OsStr;
+    use std::os::unix::ffi::OsStrExt;
+
     use super::*;
     use crate::builtins::coerce_value_to_path;
 
     #[builtin("getEnv")]
     async fn builtin_get_env(co: GenCo, var: Value) -> Result<Value, ErrorKind> {
-        Ok(env::var(var.to_str()?).unwrap_or_else(|_| "".into()).into())
+        Ok(env::var(OsStr::from_bytes(&var.to_str()?))
+            .unwrap_or_else(|_| "".into())
+            .into())
     }
 
     #[builtin("pathExists")]
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index dc3ea3fc15d4..6ccae2f9de1c 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -3,6 +3,7 @@
 //! See //tvix/eval/docs/builtins.md for a some context on the
 //! available builtins in Nix.
 
+use bstr::ByteVec;
 use builtin_macros::builtins;
 use genawaiter::rc::Gen;
 use imbl::OrdMap;
@@ -66,7 +67,7 @@ pub async fn coerce_value_to_path(
     .await
     {
         Ok(vs) => {
-            let path = PathBuf::from(vs.as_str());
+            let path = (**vs).clone().into_path_buf()?;
             if path.is_absolute() {
                 Ok(Ok(path))
             } else {
@@ -79,8 +80,12 @@ pub async fn coerce_value_to_path(
 
 #[builtins]
 mod pure_builtins {
+    use std::ffi::OsString;
+
+    use bstr::{BString, ByteSlice};
     use imbl::Vector;
     use itertools::Itertools;
+    use os_str_bytes::OsStringBytes;
 
     use crate::{value::PointerEquality, NixContext, NixContextElement};
 
@@ -187,7 +192,7 @@ mod pure_builtins {
     #[builtin("baseNameOf")]
     async fn builtin_base_name_of(co: GenCo, s: Value) -> Result<Value, ErrorKind> {
         let span = generators::request_span(&co).await;
-        let s = match s {
+        let mut s = match s {
             val @ Value::Catchable(_) => return Ok(val),
             _ => s
                 .coerce_to_string(
@@ -201,11 +206,12 @@ mod pure_builtins {
                 .await?
                 .to_contextful_str()?,
         };
-        let result: NixString = NixString::new_inherit_context_from(
-            &s,
-            s.rsplit_once('/').map(|(_, x)| x).unwrap_or(&s),
-        );
-        Ok(result.into())
+
+        let bs = s.as_mut_bstring();
+        if let Some(last_slash) = bs.rfind_char('/') {
+            *bs = bs[(last_slash + 1)..].into();
+        }
+        Ok(s.into())
     }
 
     #[builtin("bitAnd")]
@@ -240,7 +246,7 @@ mod pure_builtins {
         for item in list.into_iter() {
             let set = generators::request_force(&co, item).await.to_attrs()?;
 
-            if let Some(value) = set.select(key.as_str()) {
+            if let Some(value) = set.select(&key) {
                 output.push(value.clone());
             }
         }
@@ -256,9 +262,9 @@ mod pure_builtins {
     #[builtin("compareVersions")]
     async fn builtin_compare_versions(co: GenCo, x: Value, y: Value) -> Result<Value, ErrorKind> {
         let s1 = x.to_str()?;
-        let s1 = VersionPartsIter::new_for_cmp(s1.as_str());
+        let s1 = VersionPartsIter::new_for_cmp((&s1).into());
         let s2 = y.to_str()?;
-        let s2 = VersionPartsIter::new_for_cmp(s2.as_str());
+        let s2 = VersionPartsIter::new_for_cmp((&s2).into());
 
         match s1.cmp(s2) {
             std::cmp::Ordering::Less => Ok(Value::Integer(-1)),
@@ -323,7 +329,7 @@ mod pure_builtins {
             context = context.join(sep_context);
         }
         let list = list.to_list()?;
-        let mut res = String::new();
+        let mut res = BString::default();
         for (i, val) in list.into_iter().enumerate() {
             if i != 0 {
                 res.push_str(&separator);
@@ -339,7 +345,7 @@ mod pure_builtins {
             .await
             {
                 Ok(mut s) => {
-                    res.push_str(s.as_str());
+                    res.push_str(&s);
                     if let Some(ref mut other_context) = s.context_mut() {
                         // It is safe to consume the other context here
                         // because the `list` and `separator` are originally
@@ -353,7 +359,7 @@ mod pure_builtins {
             }
         }
         // FIXME: pass immediately the string res.
-        Ok(NixString::new_context_from(context, &res).into())
+        Ok(NixString::new_context_from(context, res).into())
     }
 
     #[builtin("deepSeq")]
@@ -383,17 +389,24 @@ mod pure_builtins {
             .await?
             .to_contextful_str()?;
         let result = str
-            .rsplit_once('/')
-            .map(|(x, _)| match x {
-                "" => "/",
-                _ => x,
+            .rfind_char('/')
+            .map(|last_slash| {
+                let x = &str[..last_slash];
+                if x.is_empty() {
+                    b"/"
+                } else {
+                    x
+                }
             })
-            .unwrap_or(".");
+            .unwrap_or(b".");
         if is_path {
-            Ok(Value::Path(Box::new(result.into())))
+            Ok(Value::Path(Box::new(PathBuf::from(
+                OsString::assert_from_raw_vec(result.to_owned()),
+            ))))
         } else {
             Ok(Value::String(NixString::new_inherit_context_from(
-                &str, result,
+                &str,
+                result.into(),
             )))
         }
     }
@@ -519,7 +532,7 @@ mod pure_builtins {
 
         let json_str = json.to_str()?;
 
-        serde_json::from_str(&json_str).map_err(|err| err.into())
+        serde_json::from_slice(&json_str).map_err(|err| err.into())
     }
 
     #[builtin("toJSON")]
@@ -537,7 +550,7 @@ mod pure_builtins {
     async fn builtin_from_toml(co: GenCo, toml: Value) -> Result<Value, ErrorKind> {
         let toml_str = toml.to_str()?;
 
-        toml::from_str(&toml_str).map_err(|err| err.into())
+        toml::from_str(toml_str.to_str()?).map_err(|err| err.into())
     }
 
     #[builtin("filterSource")]
@@ -632,7 +645,7 @@ mod pure_builtins {
         let k = key.to_str()?;
         let xs = set.to_attrs()?;
 
-        match xs.select(k.as_str()) {
+        match xs.select(&k) {
             Some(x) => Ok(x.clone()),
             None => Err(ErrorKind::AttributeNotFound {
                 name: k.to_string(),
@@ -680,7 +693,7 @@ mod pure_builtins {
         let k = key.to_str()?;
         let xs = set.to_attrs()?;
 
-        Ok(Value::Bool(xs.contains(k.as_str())))
+        Ok(Value::Bool(xs.contains(&k)))
     }
 
     #[builtin("hasContext")]
@@ -1069,8 +1082,8 @@ mod pure_builtins {
             return Ok(re);
         }
         let re = re.to_str()?;
-        let re: Regex = Regex::new(&format!("^{}$", re.as_str())).unwrap();
-        match re.captures(&s) {
+        let re: Regex = Regex::new(&format!("^{}$", re.to_str()?)).unwrap();
+        match re.captures(s.to_str()?) {
             Some(caps) => Ok(Value::List(
                 caps.iter()
                     .skip(1)
@@ -1106,7 +1119,7 @@ mod pure_builtins {
         // This replicates cppnix's (mis?)handling of codepoints
         // above U+007f following 0x2d ('-')
         let s = s.to_str()?;
-        let slice: &[u8] = s.as_str().as_ref();
+        let slice: &[u8] = s.as_ref();
         let (name, dash_and_version) = slice.split_at(
             slice
                 .windows(2)
@@ -1219,7 +1232,7 @@ mod pure_builtins {
 
         let mut string = s.to_contextful_str()?;
 
-        let mut res = String::new();
+        let mut res = BString::default();
 
         let mut i: usize = 0;
         let mut empty_string_replace = false;
@@ -1248,27 +1261,27 @@ mod pure_builtins {
                 // We already applied a from->to with an empty from
                 // transformation.
                 // Let's skip it so that we don't loop infinitely
-                if empty_string_replace && from.as_str().is_empty() {
+                if empty_string_replace && from.is_empty() {
                     continue;
                 }
 
                 // if we match the `from` string, let's replace
-                if &string[i..i + from.len()] == from.as_str() {
-                    res += &to;
+                if string[i..i + from.len()] == *from {
+                    res.push_str(&to);
                     i += from.len();
                     if let Some(to_ctx) = to.context_mut() {
                         context = context.join(to_ctx);
                     }
 
                     // remember if we applied the empty from->to
-                    empty_string_replace = from.as_str().is_empty();
+                    empty_string_replace = from.is_empty();
 
                     continue 'outer;
                 }
             }
 
             // If we don't match any `from`, we simply add a character
-            res += &string[i..i + 1];
+            res.push_str(&string[i..i + 1]);
             i += 1;
 
             // Since we didn't apply anything transformation,
@@ -1286,8 +1299,8 @@ mod pure_builtins {
             // We don't need to merge again the context, it's already in the right state.
             let mut to = elem.1.to_contextful_str()?;
 
-            if from.as_str().is_empty() {
-                res += &to;
+            if from.is_empty() {
+                res.push_str(&to);
                 if let Some(to_ctx) = to.context_mut() {
                     context = context.join(to_ctx);
                 }
@@ -1295,8 +1308,7 @@ mod pure_builtins {
             }
         }
 
-        // FIXME: consume directly the String.
-        Ok(Value::String(NixString::new_context_from(context, &res)))
+        Ok(Value::String(NixString::new_context_from(context, res)))
     }
 
     #[builtin("seq")]
@@ -1317,9 +1329,9 @@ mod pure_builtins {
         }
 
         let s = str.to_contextful_str()?;
-        let text = s.as_str();
+        let text = s.to_str()?;
         let re = regex.to_str()?;
-        let re: Regex = Regex::new(re.as_str()).unwrap();
+        let re = Regex::new(re.to_str()?).unwrap();
         let mut capture_locations = re.capture_locations();
         let num_captures = capture_locations.len();
         let mut ret = imbl::Vector::new();
@@ -1329,7 +1341,7 @@ mod pure_builtins {
             // push the unmatched characters preceding the match
             ret.push_back(Value::from(NixString::new_inherit_context_from(
                 &s,
-                &text[pos..thematch.start()],
+                (&text[pos..thematch.start()]).into(),
             )));
 
             // Push a list with one element for each capture
@@ -1380,7 +1392,7 @@ mod pure_builtins {
             return Ok(s);
         }
         let s = s.to_str()?;
-        let s = VersionPartsIter::new(s.as_str());
+        let s = VersionPartsIter::new((&s).into());
 
         let parts = s
             .map(|s| {
@@ -1412,7 +1424,7 @@ mod pure_builtins {
             return Ok(s);
         }
 
-        Ok(Value::Integer(s.to_contextful_str()?.as_str().len() as i64))
+        Ok(Value::Integer(s.to_contextful_str()?.len() as i64))
     }
 
     #[builtin("sub")]
@@ -1453,19 +1465,22 @@ mod pure_builtins {
         // Nix doesn't assert that the length argument is
         // non-negative when the starting index is GTE the
         // string's length.
-        if beg >= x.as_str().len() {
-            return Ok(Value::String(NixString::new_inherit_context_from(&x, "")));
+        if beg >= x.len() {
+            return Ok(Value::String(NixString::new_inherit_context_from(
+                &x,
+                BString::default(),
+            )));
         }
 
         let end = if len < 0 {
-            x.as_str().len()
+            x.len()
         } else {
-            cmp::min(beg + (len as usize), x.as_str().len())
+            cmp::min(beg + (len as usize), x.len())
         };
 
         Ok(Value::String(NixString::new_inherit_context_from(
             &x,
-            &x[beg..end],
+            (&x[beg..end]).into(),
         )))
     }
 
diff --git a/tvix/eval/src/builtins/to_xml.rs b/tvix/eval/src/builtins/to_xml.rs
index f23cb22db3d9..2f9a11e78852 100644
--- a/tvix/eval/src/builtins/to_xml.rs
+++ b/tvix/eval/src/builtins/to_xml.rs
@@ -2,6 +2,7 @@
 //! of value information as well as internal tvix state that several
 //! things in nixpkgs rely on.
 
+use bstr::ByteSlice;
 use std::{io::Write, rc::Rc};
 use xml::writer::events::XmlEvent;
 use xml::writer::EmitterConfig;
@@ -60,7 +61,7 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu
         Value::Bool(b) => return write_typed_value(w, "bool", b),
         Value::Integer(i) => return write_typed_value(w, "int", i),
         Value::Float(f) => return write_typed_value(w, "float", f),
-        Value::String(s) => return write_typed_value(w, "string", s.as_str()),
+        Value::String(s) => return write_typed_value(w, "string", s.to_str()?),
         Value::Path(p) => return write_typed_value(w, "path", p.to_string_lossy()),
 
         Value::List(list) => {
@@ -77,7 +78,7 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu
             w.write(XmlEvent::start_element("attrs"))?;
 
             for elem in attrs.iter() {
-                w.write(XmlEvent::start_element("attr").attr("name", elem.0.as_str()))?;
+                w.write(XmlEvent::start_element("attr").attr("name", &elem.0.to_str_lossy()))?;
                 value_variant_to_xml(w, elem.1)?;
                 w.write(XmlEvent::end_element())?;
             }
@@ -101,7 +102,9 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu
                     w.write(attrspat)?;
 
                     for arg in formals.arguments.iter() {
-                        w.write(XmlEvent::start_element("attr").attr("name", arg.0.as_str()))?;
+                        w.write(
+                            XmlEvent::start_element("attr").attr("name", &arg.0.to_str_lossy()),
+                        )?;
                         w.write(XmlEvent::end_element())?;
                     }
 
diff --git a/tvix/eval/src/builtins/versions.rs b/tvix/eval/src/builtins/versions.rs
index 79fb82b868fb..6de512142440 100644
--- a/tvix/eval/src/builtins/versions.rs
+++ b/tvix/eval/src/builtins/versions.rs
@@ -2,13 +2,15 @@ use std::cmp::Ordering;
 use std::iter::{once, Chain, Once};
 use std::ops::RangeInclusive;
 
+use bstr::{BStr, ByteSlice, B};
+
 /// Version strings can be broken up into Parts.
 /// One Part represents either a string of digits or characters.
 /// '.' and '_' represent deviders between parts and are not included in any part.
 #[derive(PartialEq, Eq, Clone, Debug)]
 pub enum VersionPart<'a> {
-    Word(&'a str),
-    Number(&'a str),
+    Word(&'a BStr),
+    Number(&'a BStr),
 }
 
 impl PartialOrd for VersionPart<'_> {
@@ -23,15 +25,17 @@ impl Ord for VersionPart<'_> {
             (VersionPart::Number(s1), VersionPart::Number(s2)) => {
                 // Note: C++ Nix uses `int`, but probably doesn't make a difference
                 // We trust that the splitting was done correctly and parsing will work
-                let n1: u64 = s1.parse().unwrap();
-                let n2: u64 = s2.parse().unwrap();
+                let n1: u64 = s1.to_str_lossy().parse().unwrap();
+                let n2: u64 = s2.to_str_lossy().parse().unwrap();
                 n1.cmp(&n2)
             }
 
             // `pre` looses unless the other part is also a `pre`
-            (VersionPart::Word("pre"), VersionPart::Word("pre")) => Ordering::Equal,
-            (VersionPart::Word("pre"), _) => Ordering::Less,
-            (_, VersionPart::Word("pre")) => Ordering::Greater,
+            (VersionPart::Word(x), VersionPart::Word(y)) if *x == B("pre") && *y == B("pre") => {
+                Ordering::Equal
+            }
+            (VersionPart::Word(x), _) if *x == B("pre") => Ordering::Less,
+            (_, VersionPart::Word(y)) if *y == B("pre") => Ordering::Greater,
 
             // Number wins against Word
             (VersionPart::Number(_), VersionPart::Word(_)) => Ordering::Greater,
@@ -54,12 +58,12 @@ enum InternalPart {
 /// This can then be directly used to compare two versions
 pub struct VersionPartsIter<'a> {
     cached_part: InternalPart,
-    iter: std::str::CharIndices<'a>,
-    version: &'a str,
+    iter: bstr::CharIndices<'a>,
+    version: &'a BStr,
 }
 
 impl<'a> VersionPartsIter<'a> {
-    pub fn new(version: &'a str) -> Self {
+    pub fn new(version: &'a BStr) -> Self {
         Self {
             cached_part: InternalPart::Break,
             iter: version.char_indices(),
@@ -77,8 +81,8 @@ impl<'a> VersionPartsIter<'a> {
     /// like `2.3 < 2.3.0pre` ensues. Luckily for us, this means that we can
     /// lexicographically compare two version strings, _if_ we append an extra
     /// component to both versions.
-    pub fn new_for_cmp(version: &'a str) -> Chain<Self, Once<VersionPart>> {
-        Self::new(version).chain(once(VersionPart::Word("")))
+    pub fn new_for_cmp(version: &'a BStr) -> Chain<Self, Once<VersionPart>> {
+        Self::new(version).chain(once(VersionPart::Word("".into())))
     }
 }
 
@@ -101,7 +105,7 @@ impl<'a> Iterator for VersionPartsIter<'a> {
             }
         }
 
-        let (pos, char) = char.unwrap();
+        let (start, end, char) = char.unwrap();
         match char {
             // Divider encountered
             '.' | '-' => {
@@ -119,7 +123,9 @@ impl<'a> Iterator for VersionPartsIter<'a> {
             _ if char.is_ascii_digit() => {
                 let cached_part = std::mem::replace(
                     &mut self.cached_part,
-                    InternalPart::Number { range: pos..=pos },
+                    InternalPart::Number {
+                        range: start..=(end - 1),
+                    },
                 );
                 match cached_part {
                     InternalPart::Number { range } => {
@@ -135,7 +141,9 @@ impl<'a> Iterator for VersionPartsIter<'a> {
 
             // char encountered
             _ => {
-                let mut cached_part = InternalPart::Word { range: pos..=pos };
+                let mut cached_part = InternalPart::Word {
+                    range: start..=(end - 1),
+                };
                 std::mem::swap(&mut cached_part, &mut self.cached_part);
                 match cached_part {
                     InternalPart::Word { range } => {
diff --git a/tvix/eval/src/compiler/import.rs b/tvix/eval/src/compiler/import.rs
index 94a6602e9df6..4e8ea7195d81 100644
--- a/tvix/eval/src/compiler/import.rs
+++ b/tvix/eval/src/compiler/import.rs
@@ -6,6 +6,7 @@
 //! instance, or observers).
 
 use super::GlobalsMap;
+use bstr::ByteSlice;
 use genawaiter::rc::Gen;
 use std::rc::Weak;
 
@@ -40,11 +41,11 @@ async fn import_impl(
 
     // TODO(tazjin): make this return a string directly instead
     let contents: Value = generators::request_read_to_string(&co, path.clone()).await;
-    let contents = contents.to_str()?.as_str().to_string();
+    let contents = contents.to_str()?.to_str()?.to_owned();
 
     let parsed = rnix::ast::Root::parse(&contents);
     let errors = parsed.errors();
-    let file = source.add_file(path.to_string_lossy().to_string(), contents);
+    let file = source.add_file(path.to_string_lossy().to_string(), contents.to_owned());
 
     if !errors.is_empty() {
         return Err(ErrorKind::ImportParseError {
diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs
index 53a99a453d9e..b4ef7973c7e8 100644
--- a/tvix/eval/src/compiler/mod.rs
+++ b/tvix/eval/src/compiler/mod.rs
@@ -727,7 +727,7 @@ impl Compiler<'_> {
                 if let (Some(attr), None) = (path_iter.next(), path_iter.next()) {
                     // Only do this optimisation for statically known attrs.
                     if let Some(ident) = expr_static_attr_str(&attr) {
-                        if let Some(selected_value) = attrs.select(ident.as_str()) {
+                        if let Some(selected_value) = attrs.select(ident.as_bytes()) {
                             *constant = selected_value.clone();
 
                             // If this worked, we can unthunk the current thunk.
diff --git a/tvix/eval/src/errors.rs b/tvix/eval/src/errors.rs
index 1bd2e47e93ec..d5cb6ef34b0d 100644
--- a/tvix/eval/src/errors.rs
+++ b/tvix/eval/src/errors.rs
@@ -178,6 +178,9 @@ pub enum ErrorKind {
         formals_span: Span,
     },
 
+    /// Invalid UTF-8 was encoutered somewhere
+    Utf8,
+
     /// Errors while serialising to XML.
     Xml(Rc<XmlError>),
 
@@ -245,6 +248,18 @@ impl From<FromUtf8Error> for ErrorKind {
     }
 }
 
+impl From<bstr::Utf8Error> for ErrorKind {
+    fn from(_: bstr::Utf8Error) -> Self {
+        Self::Utf8
+    }
+}
+
+impl From<bstr::FromUtf8Error> for ErrorKind {
+    fn from(_value: bstr::FromUtf8Error) -> Self {
+        Self::Utf8
+    }
+}
+
 impl From<XmlError> for ErrorKind {
     fn from(err: XmlError) -> Self {
         Self::Xml(Rc::new(err))
@@ -457,11 +472,11 @@ to a missing value in the attribute set(s) included via `with`."#,
             }
 
             ErrorKind::UnexpectedArgument { arg, .. } => {
-                write!(
-                    f,
-                    "Unexpected argument `{}` supplied to function",
-                    arg.as_str()
-                )
+                write!(f, "Unexpected argument `{arg}` supplied to function",)
+            }
+
+            ErrorKind::Utf8 => {
+                write!(f, "Invalid UTF-8 in string")
             }
 
             ErrorKind::Xml(error) => write!(f, "failed to serialise to XML: {error}"),
@@ -775,6 +790,7 @@ impl Error {
             | ErrorKind::NotSerialisableToJson(_)
             | ErrorKind::FromTomlError(_)
             | ErrorKind::Xml(_)
+            | ErrorKind::Utf8
             | ErrorKind::TvixError(_)
             | ErrorKind::TvixBug { .. }
             | ErrorKind::NotImplemented(_)
@@ -819,6 +835,7 @@ impl Error {
             ErrorKind::FromTomlError(_) => "E035",
             ErrorKind::NotSerialisableToJson(_) => "E036",
             ErrorKind::UnexpectedContext => "E037",
+            ErrorKind::Utf8 => "E038",
 
             // Special error code for errors from other Tvix
             // components. We may want to introduce a code namespacing
diff --git a/tvix/eval/src/value/attrs.rs b/tvix/eval/src/value/attrs.rs
index b80d8b2db84e..2027653c3f39 100644
--- a/tvix/eval/src/value/attrs.rs
+++ b/tvix/eval/src/value/attrs.rs
@@ -5,8 +5,10 @@
 //!
 //! Due to this, construction and management of attribute sets has
 //! some peculiarities that are encapsulated within this module.
+use std::borrow::Borrow;
 use std::iter::FromIterator;
 
+use bstr::BStr;
 use imbl::{ordmap, OrdMap};
 use lazy_static::lazy_static;
 use serde::de::{Deserializer, Error, Visitor};
@@ -67,25 +69,25 @@ impl AttrsRep {
         }
     }
 
-    fn select(&self, key: &str) -> Option<&Value> {
+    fn select(&self, key: &BStr) -> Option<&Value> {
         match self {
             AttrsRep::Empty => None,
 
-            AttrsRep::KV { name, value } => match key {
-                "name" => Some(name),
-                "value" => Some(value),
+            AttrsRep::KV { name, value } => match &**key {
+                b"name" => Some(name),
+                b"value" => Some(value),
                 _ => None,
             },
 
-            AttrsRep::Im(map) => map.get(&key.into()),
+            AttrsRep::Im(map) => map.get(key),
         }
     }
 
-    fn contains(&self, key: &str) -> bool {
+    fn contains(&self, key: &BStr) -> bool {
         match self {
             AttrsRep::Empty => false,
             AttrsRep::KV { .. } => key == "name" || key == "value",
-            AttrsRep::Im(map) => map.contains_key(&key.into()),
+            AttrsRep::Im(map) => map.contains_key(key),
         }
     }
 }
@@ -264,19 +266,30 @@ impl NixAttrs {
     }
 
     /// Select a value from an attribute set by key.
-    pub fn select(&self, key: &str) -> Option<&Value> {
-        self.0.select(key)
+    pub fn select<K>(&self, key: &K) -> Option<&Value>
+    where
+        K: Borrow<BStr> + ?Sized,
+    {
+        self.0.select(key.borrow())
     }
 
     /// Select a required value from an attribute set by key, return
     /// an `AttributeNotFound` error if it is missing.
-    pub fn select_required(&self, key: &str) -> Result<&Value, ErrorKind> {
+    pub fn select_required<K>(&self, key: &K) -> Result<&Value, ErrorKind>
+    where
+        K: Borrow<BStr> + ?Sized,
+    {
         self.select(key)
-            .ok_or_else(|| ErrorKind::AttributeNotFound { name: key.into() })
+            .ok_or_else(|| ErrorKind::AttributeNotFound {
+                name: key.borrow().to_string(),
+            })
     }
 
-    pub fn contains(&self, key: &str) -> bool {
-        self.0.contains(key)
+    pub fn contains<'a, K: 'a>(&self, key: K) -> bool
+    where
+        &'a BStr: From<K>,
+    {
+        self.0.contains(key.into())
     }
 
     /// Construct an iterator over all the key-value pairs in the attribute set.
@@ -423,7 +436,7 @@ fn attempt_optimise_kv(slice: &mut [Value]) -> Option<NixAttrs> {
 fn set_attr(attrs: &mut NixAttrs, key: NixString, value: Value) -> Result<(), ErrorKind> {
     match attrs.0.map_mut().entry(key) {
         imbl::ordmap::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey {
-            key: entry.key().as_str().to_string(),
+            key: entry.key().to_string(),
         }),
 
         imbl::ordmap::Entry::Vacant(entry) => {
diff --git a/tvix/eval/src/value/attrs/tests.rs b/tvix/eval/src/value/attrs/tests.rs
index 5c693873cd75..d269044b32ba 100644
--- a/tvix/eval/src/value/attrs/tests.rs
+++ b/tvix/eval/src/value/attrs/tests.rs
@@ -1,3 +1,5 @@
+use bstr::B;
+
 use super::*;
 
 #[test]
@@ -99,6 +101,6 @@ fn test_map_attrs_iter() {
     let mut iter = attrs.iter().collect::<Vec<_>>().into_iter();
     let (k, v) = iter.next().unwrap();
     assert!(k == &NixString::from("key"));
-    assert!(v.to_str().unwrap().as_str() == "value");
+    assert_eq!(v.to_str().unwrap(), B("value"));
     assert!(iter.next().is_none());
 }
diff --git a/tvix/eval/src/value/json.rs b/tvix/eval/src/value/json.rs
index 8c31bd0e6082..4e915a36e55a 100644
--- a/tvix/eval/src/value/json.rs
+++ b/tvix/eval/src/value/json.rs
@@ -7,6 +7,7 @@ use super::{CoercionKind, Value};
 use crate::errors::{CatchableErrorKind, ErrorKind};
 use crate::generators::{self, GenCo};
 
+use bstr::ByteSlice;
 use serde_json::value::to_value;
 use serde_json::Value as Json; // name clash with *our* `Value`
 use serde_json::{Map, Number};
@@ -23,7 +24,7 @@ impl Value {
             Value::Bool(b) => Json::Bool(b),
             Value::Integer(i) => Json::Number(Number::from(i)),
             Value::Float(f) => to_value(f)?,
-            Value::String(s) => Json::String(s.as_str().into()),
+            Value::String(s) => Json::String(s.to_str()?.to_owned()),
 
             Value::Path(p) => {
                 let imported = generators::request_path_import(co, *p).await;
@@ -61,7 +62,7 @@ impl Value {
                         .await?
                     {
                         Value::Catchable(cek) => return Ok(Err(cek)),
-                        Value::String(s) => return Ok(Ok(Json::String(s.as_str().to_owned()))),
+                        Value::String(s) => return Ok(Ok(Json::String(s.to_str()?.to_owned()))),
                         _ => panic!("Value::coerce_to_string_() returned a non-string!"),
                     }
                 }
@@ -76,7 +77,7 @@ impl Value {
                 let mut out = Map::with_capacity(attrs.len());
                 for (name, value) in attrs.into_iter_sorted() {
                     out.insert(
-                        name.as_str().to_string(),
+                        name.to_str()?.to_owned(),
                         match generators::request_to_json(co, value).await {
                             Ok(v) => v,
                             Err(cek) => return Ok(Err(cek)),
diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs
index ccca0bea5b2e..e181b3a2da4e 100644
--- a/tvix/eval/src/value/mod.rs
+++ b/tvix/eval/src/value/mod.rs
@@ -6,6 +6,7 @@ use std::num::{NonZeroI32, NonZeroUsize};
 use std::path::PathBuf;
 use std::rc::Rc;
 
+use bstr::{BString, ByteVec};
 use lexical_core::format::CXX_LITERAL;
 use serde::Deserialize;
 
@@ -313,7 +314,7 @@ impl Value {
         kind: CoercionKind,
         span: LightSpan,
     ) -> Result<Value, ErrorKind> {
-        let mut result = String::new();
+        let mut result = BString::default();
         let mut vals = vec![self];
         // Track if we are coercing the first value of a list to correctly emit
         // separating white spaces.
@@ -326,18 +327,15 @@ impl Value {
             let value = if let Some(v) = vals.pop() {
                 v.force(co, span.clone()).await?
             } else {
-                return Ok(Value::String(NixString::new_context_from(
-                    context,
-                    result.as_str(),
-                )));
+                return Ok(Value::String(NixString::new_context_from(context, result)));
             };
-            let coerced = match (value, kind) {
+            let coerced: Result<BString, _> = match (value, kind) {
                 // coercions that are always done
                 (Value::String(mut s), _) => {
                     if let Some(ctx) = s.context_mut() {
                         context = context.join(ctx);
                     }
-                    Ok(s.as_str().to_owned())
+                    Ok(s.into())
                 }
 
                 // TODO(sterni): Think about proper encoding handling here. This needs
@@ -357,7 +355,7 @@ impl Value {
                     context = context.append(NixContextElement::Plain(
                         imported.to_string_lossy().to_string(),
                     ));
-                    Ok(imported.to_string_lossy().into_owned())
+                    Ok(imported.into_os_string().into_encoded_bytes().into())
                 }
                 (
                     Value::Path(p),
@@ -365,7 +363,7 @@ impl Value {
                         import_paths: false,
                         ..
                     },
-                ) => Ok(p.to_string_lossy().into_owned()),
+                ) => Ok(p.into_os_string().into_encoded_bytes().into()),
 
                 // Attribute sets can be converted to strings if they either have an
                 // `__toString` attribute which holds a function that receives the
@@ -397,14 +395,14 @@ impl Value {
 
                 // strong coercions
                 (Value::Null, CoercionKind { strong: true, .. })
-                | (Value::Bool(false), CoercionKind { strong: true, .. }) => Ok("".to_owned()),
-                (Value::Bool(true), CoercionKind { strong: true, .. }) => Ok("1".to_owned()),
+                | (Value::Bool(false), CoercionKind { strong: true, .. }) => Ok("".into()),
+                (Value::Bool(true), CoercionKind { strong: true, .. }) => Ok("1".into()),
 
-                (Value::Integer(i), CoercionKind { strong: true, .. }) => Ok(format!("{i}")),
+                (Value::Integer(i), CoercionKind { strong: true, .. }) => Ok(format!("{i}").into()),
                 (Value::Float(f), CoercionKind { strong: true, .. }) => {
                     // contrary to normal Display, coercing a float to a string will
                     // result in unconditional 6 decimal places
-                    Ok(format!("{:.6}", f))
+                    Ok(format!("{:.6}", f).into())
                 }
 
                 // Lists are coerced by coercing their elements and interspersing spaces
@@ -448,7 +446,7 @@ impl Value {
 
             if let Some(head) = is_list_head {
                 if !head {
-                    result.push(' ');
+                    result.push(b' ');
                 } else {
                     is_list_head = Some(false);
                 }
@@ -576,7 +574,7 @@ impl Value {
                             let s2 = s2.to_str();
 
                             if let (Ok(s1), Ok(s2)) = (s1, s2) {
-                                if s1.as_str() == "derivation" && s2.as_str() == "derivation" {
+                                if s1 == "derivation" && s2 == "derivation" {
                                     // TODO(tazjin): are the outPaths really required,
                                     // or should it fall through?
                                     let out1 = a1
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs
index c528fd19f229..e2767dd22cdb 100644
--- a/tvix/eval/src/value/string.rs
+++ b/tvix/eval/src/value/string.rs
@@ -3,14 +3,13 @@
 //! Nix language strings never need to be modified on the language
 //! level, allowing us to shave off some memory overhead and only
 //! paying the cost when creating new strings.
+use bstr::{BStr, BString, ByteSlice, Chars};
 use rnix::ast;
+use std::borrow::{Borrow, Cow};
 use std::collections::HashSet;
-use std::ffi::OsStr;
+use std::fmt::Display;
 use std::hash::Hash;
 use std::ops::Deref;
-use std::path::Path;
-use std::str::{self, Utf8Error};
-use std::{borrow::Cow, fmt::Display, str::Chars};
 
 use serde::de::{Deserializer, Visitor};
 use serde::{Deserialize, Serialize};
@@ -147,16 +146,28 @@ impl NixContext {
 
 // FIXME: when serializing, ignore the context?
 #[derive(Clone, Debug, Serialize)]
-pub struct NixString(Box<str>, Option<NixContext>);
+pub struct NixString(BString, Option<NixContext>);
 
 impl PartialEq for NixString {
     fn eq(&self, other: &Self) -> bool {
-        self.as_str() == other.as_str()
+        self.as_bstr() == other.as_bstr()
     }
 }
 
 impl Eq for NixString {}
 
+impl PartialEq<&[u8]> for NixString {
+    fn eq(&self, other: &&[u8]) -> bool {
+        **self == **other
+    }
+}
+
+impl PartialEq<&str> for NixString {
+    fn eq(&self, other: &&str) -> bool {
+        **self == other.as_bytes()
+    }
+}
+
 impl PartialOrd for NixString {
     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
         Some(self.cmp(other))
@@ -165,39 +176,49 @@ impl PartialOrd for NixString {
 
 impl Ord for NixString {
     fn cmp(&self, other: &Self) -> std::cmp::Ordering {
-        self.as_str().cmp(other.as_str())
+        self.as_bstr().cmp(other.as_bstr())
+    }
+}
+
+impl From<&BStr> for NixString {
+    fn from(value: &BStr) -> Self {
+        Self(value.to_owned(), None)
     }
 }
 
-impl TryFrom<&[u8]> for NixString {
-    type Error = Utf8Error;
+impl From<&[u8]> for NixString {
+    fn from(value: &[u8]) -> Self {
+        Self(value.into(), None)
+    }
+}
 
-    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
-        Ok(Self(Box::from(str::from_utf8(value)?), None))
+impl From<Vec<u8>> for NixString {
+    fn from(value: Vec<u8>) -> Self {
+        Self(value.into(), None)
     }
 }
 
 impl From<&str> for NixString {
     fn from(s: &str) -> Self {
-        NixString(Box::from(s), None)
+        Self(s.as_bytes().into(), None)
     }
 }
 
 impl From<String> for NixString {
     fn from(s: String) -> Self {
-        NixString(s.into_boxed_str(), None)
+        Self(s.into(), None)
     }
 }
 
 impl From<(String, Option<NixContext>)> for NixString {
-    fn from(s: (String, Option<NixContext>)) -> Self {
-        NixString(s.0.into_boxed_str(), s.1)
+    fn from((s, ctx): (String, Option<NixContext>)) -> Self {
+        NixString(s.into(), ctx)
     }
 }
 
 impl From<Box<str>> for NixString {
     fn from(s: Box<str>) -> Self {
-        Self(s, None)
+        Self(s.into_boxed_bytes().into_vec().into(), None)
     }
 }
 
@@ -207,9 +228,33 @@ impl From<ast::Ident> for NixString {
     }
 }
 
+impl<'a> From<&'a NixString> for &'a BStr {
+    fn from(s: &'a NixString) -> Self {
+        BStr::new(&*s.0)
+    }
+}
+
+impl From<NixString> for BString {
+    fn from(s: NixString) -> Self {
+        s.0
+    }
+}
+
+impl AsRef<[u8]> for NixString {
+    fn as_ref(&self) -> &[u8] {
+        &self.0
+    }
+}
+
+impl Borrow<BStr> for NixString {
+    fn borrow(&self) -> &BStr {
+        self.as_bstr()
+    }
+}
+
 impl Hash for NixString {
     fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
-        self.as_str().hash(state)
+        self.as_bstr().hash(state)
     }
 }
 
@@ -246,6 +291,14 @@ impl<'de> Deserialize<'de> for NixString {
     }
 }
 
+impl Deref for NixString {
+    type Target = BString;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
 #[cfg(feature = "arbitrary")]
 mod arbitrary {
     use super::*;
@@ -264,13 +317,13 @@ mod arbitrary {
 }
 
 impl NixString {
-    pub fn new_inherit_context_from(other: &NixString, new_contents: &str) -> Self {
-        Self(Box::from(new_contents), other.1.clone())
+    pub fn new_inherit_context_from(other: &NixString, new_contents: BString) -> Self {
+        Self(new_contents, other.1.clone())
     }
 
-    pub fn new_context_from(context: NixContext, contents: &str) -> Self {
+    pub fn new_context_from(context: NixContext, contents: BString) -> Self {
         Self(
-            Box::from(contents),
+            contents,
             if context.is_empty() {
                 None
             } else {
@@ -279,10 +332,22 @@ impl NixString {
         )
     }
 
-    pub fn as_str(&self) -> &str {
+    pub fn as_mut_bstring(&mut self) -> &mut BString {
+        &mut self.0
+    }
+
+    pub fn as_bstr(&self) -> &BStr {
+        BStr::new(self.as_bytes())
+    }
+
+    pub fn as_bytes(&self) -> &[u8] {
         &self.0
     }
 
+    pub fn into_bstring(self) -> BString {
+        self.0
+    }
+
     /// Return a displayable representation of the string as an
     /// identifier.
     ///
@@ -290,8 +355,10 @@ impl NixString {
     /// set keys, as those are only escaped in the presence of special
     /// characters.
     pub fn ident_str(&self) -> Cow<str> {
-        let escaped = nix_escape_string(self.as_str());
-
+        let escaped = match self.to_str_lossy() {
+            Cow::Borrowed(s) => nix_escape_string(s),
+            Cow::Owned(s) => nix_escape_string(&s).into_owned().into(),
+        };
         match escaped {
             // A borrowed string is unchanged and can be returned as
             // is.
@@ -310,8 +377,8 @@ impl NixString {
     }
 
     pub fn concat(&self, other: &Self) -> Self {
-        let mut s = self.as_str().to_owned();
-        s.push_str(other.as_str());
+        let mut s = self.to_vec();
+        s.extend(other.0.as_slice());
 
         let context = [&self.1, &other.1]
             .into_iter()
@@ -319,7 +386,7 @@ impl NixString {
             .fold(NixContext::new(), |acc_ctx, new_ctx| {
                 acc_ctx.join(&mut new_ctx.clone())
             });
-        Self::new_context_from(context, &s.into_boxed_str())
+        Self::new_context_from(context, s.into())
     }
 
     pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> {
@@ -436,37 +503,11 @@ fn nix_escape_string(input: &str) -> Cow<str> {
 impl Display for NixString {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         f.write_str("\"")?;
-        f.write_str(&nix_escape_string(self.as_str()))?;
+        f.write_str(&nix_escape_string(&self.to_str_lossy()))?;
         f.write_str("\"")
     }
 }
 
-impl AsRef<str> for NixString {
-    fn as_ref(&self) -> &str {
-        self.as_str()
-    }
-}
-
-impl AsRef<OsStr> for NixString {
-    fn as_ref(&self) -> &OsStr {
-        self.as_str().as_ref()
-    }
-}
-
-impl AsRef<Path> for NixString {
-    fn as_ref(&self) -> &Path {
-        self.as_str().as_ref()
-    }
-}
-
-impl Deref for NixString {
-    type Target = str;
-
-    fn deref(&self) -> &Self::Target {
-        self.as_str()
-    }
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs
index 5238fdc0662f..d51d44b6a110 100644
--- a/tvix/eval/src/vm/mod.rs
+++ b/tvix/eval/src/vm/mod.rs
@@ -12,6 +12,7 @@
 pub mod generators;
 mod macros;
 
+use bstr::{BString, ByteSlice, ByteVec};
 use codemap::Span;
 use serde_json::json;
 use std::{cmp::Ordering, collections::HashMap, ops::DerefMut, path::PathBuf, rc::Rc};
@@ -550,14 +551,14 @@ where
                         let key = key.to_str().with_span(&frame, self)?;
                         let attrs = attrs.to_attrs().with_span(&frame, self)?;
 
-                        match attrs.select(key.as_str()) {
+                        match attrs.select(&key) {
                             Some(value) => self.stack.push(value.clone()),
 
                             None => {
                                 return frame.error(
                                     self,
                                     ErrorKind::AttributeNotFound {
-                                        name: key.as_str().to_string(),
+                                        name: (**key).clone().into_string_lossy()
                                     },
                                 );
                             }
@@ -598,7 +599,7 @@ where
                 OpCode::OpAttrsTrySelect => {
                     let key = self.stack_pop().to_str().with_span(&frame, self)?;
                     let value = match self.stack_pop() {
-                        Value::Attrs(attrs) => match attrs.select(key.as_str()) {
+                        Value::Attrs(attrs) => match attrs.select(&key) {
                             Some(value) => value.clone(),
                             None => Value::AttrNotFound,
                         },
@@ -705,7 +706,7 @@ where
                     self(key, attrs) => {
                         let key = key.to_str().with_span(&frame, self)?;
                         let result = match attrs {
-                            Value::Attrs(attrs) => attrs.contains(key.as_str()),
+                            Value::Attrs(attrs) => attrs.contains(&key),
 
                             // Nix allows use of `?` on non-set types, but
                             // always returns false in those cases.
@@ -742,7 +743,7 @@ where
                     self.enqueue_generator("resolve_with", op_span, |co| {
                         resolve_with(
                             co,
-                            ident.as_str().to_owned(),
+                            ident.as_bstr().to_owned(),
                             with_stack_len,
                             closed_with_stack_len,
                         )
@@ -966,7 +967,7 @@ where
     /// fragments of the stack, evaluating them to strings, and pushing
     /// the concatenated result string back on the stack.
     fn run_interpolate(&mut self, frame: &CallFrame, count: usize) -> EvalResult<()> {
-        let mut out = String::new();
+        let mut out = BString::default();
         // Interpolation propagates the context and union them.
         let mut context: NixContext = NixContext::new();
 
@@ -980,7 +981,7 @@ where
                 return Ok(());
             }
             let mut nix_string = val.to_contextful_str().with_span(frame, self)?;
-            out.push_str(nix_string.as_str());
+            out.push_str(nix_string.as_bstr());
             if let Some(nix_string_ctx) = nix_string.context_mut() {
                 context = context.join(nix_string_ctx);
             }
@@ -988,7 +989,7 @@ where
 
         // FIXME: consume immediately here the String.
         self.stack
-            .push(Value::String(NixString::new_context_from(context, &out)));
+            .push(Value::String(NixString::new_context_from(context, out)));
         Ok(())
     }
 
@@ -1160,7 +1161,7 @@ where
 /// for matching values in the with-stacks carried at runtime.
 async fn resolve_with(
     co: GenCo,
-    ident: String,
+    ident: BString,
     vm_with_len: usize,
     upvalue_with_len: usize,
 ) -> Result<Value, ErrorKind> {
@@ -1213,7 +1214,7 @@ async fn resolve_with(
         }
     }
 
-    Err(ErrorKind::UnknownDynamicVariable(ident))
+    Err(ErrorKind::UnknownDynamicVariable(ident.to_string()))
 }
 
 // TODO(amjoseph): de-asyncify this
@@ -1221,7 +1222,7 @@ async fn add_values(co: GenCo, a: Value, b: Value) -> Result<Value, ErrorKind> {
     // What we try to do is solely determined by the type of the first value!
     let result = match (a, b) {
         (Value::Path(p), v) => {
-            let mut path = p.to_string_lossy().into_owned();
+            let mut path = p.into_os_string();
             match generators::request_string_coerce(
                 &co,
                 v,
@@ -1243,7 +1244,7 @@ async fn add_values(co: GenCo, a: Value, b: Value) -> Result<Value, ErrorKind> {
             .await
             {
                 Ok(vs) => {
-                    path.push_str(vs.as_str());
+                    path.push(vs.to_os_str()?);
                     crate::value::canon_path(PathBuf::from(path)).into()
                 }
                 Err(c) => Value::Catchable(c),