diff options
Diffstat (limited to 'tvix/eval')
-rw-r--r-- | tvix/eval/Cargo.toml | 2 | ||||
-rw-r--r-- | tvix/eval/src/builtins/impure.rs | 7 | ||||
-rw-r--r-- | tvix/eval/src/builtins/mod.rs | 109 | ||||
-rw-r--r-- | tvix/eval/src/builtins/to_xml.rs | 9 | ||||
-rw-r--r-- | tvix/eval/src/builtins/versions.rs | 38 | ||||
-rw-r--r-- | tvix/eval/src/compiler/import.rs | 5 | ||||
-rw-r--r-- | tvix/eval/src/compiler/mod.rs | 2 | ||||
-rw-r--r-- | tvix/eval/src/errors.rs | 27 | ||||
-rw-r--r-- | tvix/eval/src/value/attrs.rs | 41 | ||||
-rw-r--r-- | tvix/eval/src/value/attrs/tests.rs | 4 | ||||
-rw-r--r-- | tvix/eval/src/value/json.rs | 7 | ||||
-rw-r--r-- | tvix/eval/src/value/mod.rs | 28 | ||||
-rw-r--r-- | tvix/eval/src/value/string.rs | 149 | ||||
-rw-r--r-- | tvix/eval/src/vm/mod.rs | 25 |
14 files changed, 280 insertions, 173 deletions
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index d347d0ea687a..4f4551a349d2 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -11,6 +11,7 @@ name = "tvix_eval" [dependencies] builtin-macros = { path = "./builtin-macros", package = "tvix-eval-builtin-macros" } bytes = "1.4.0" +bstr = { version = "1.8.0", features = ["serde"] } codemap = "0.1.3" codemap-diagnostic = "0.1.1" dirs = "4.0.0" @@ -19,6 +20,7 @@ imbl = { version = "2.0", features = [ "serde" ] } itertools = "0.12.0" lazy_static = "1.4.0" lexical-core = { version = "0.8.5", features = ["format", "parse-floats"] } +os_str_bytes = { version = "6.3", features = ["conversions"] } path-clean = "0.1" proptest = { version = "1.3.0", default_features = false, features = ["std", "alloc", "tempfile"], optional = true } regex = "1.6.0" diff --git a/tvix/eval/src/builtins/impure.rs b/tvix/eval/src/builtins/impure.rs index 5324fd565304..28b8697644dd 100644 --- a/tvix/eval/src/builtins/impure.rs +++ b/tvix/eval/src/builtins/impure.rs @@ -17,12 +17,17 @@ use crate::{ #[builtins] mod impure_builtins { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + use super::*; use crate::builtins::coerce_value_to_path; #[builtin("getEnv")] async fn builtin_get_env(co: GenCo, var: Value) -> Result<Value, ErrorKind> { - Ok(env::var(var.to_str()?).unwrap_or_else(|_| "".into()).into()) + Ok(env::var(OsStr::from_bytes(&var.to_str()?)) + .unwrap_or_else(|_| "".into()) + .into()) } #[builtin("pathExists")] diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index dc3ea3fc15d4..6ccae2f9de1c 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -3,6 +3,7 @@ //! See //tvix/eval/docs/builtins.md for a some context on the //! available builtins in Nix. +use bstr::ByteVec; use builtin_macros::builtins; use genawaiter::rc::Gen; use imbl::OrdMap; @@ -66,7 +67,7 @@ pub async fn coerce_value_to_path( .await { Ok(vs) => { - let path = PathBuf::from(vs.as_str()); + let path = (**vs).clone().into_path_buf()?; if path.is_absolute() { Ok(Ok(path)) } else { @@ -79,8 +80,12 @@ pub async fn coerce_value_to_path( #[builtins] mod pure_builtins { + use std::ffi::OsString; + + use bstr::{BString, ByteSlice}; use imbl::Vector; use itertools::Itertools; + use os_str_bytes::OsStringBytes; use crate::{value::PointerEquality, NixContext, NixContextElement}; @@ -187,7 +192,7 @@ mod pure_builtins { #[builtin("baseNameOf")] async fn builtin_base_name_of(co: GenCo, s: Value) -> Result<Value, ErrorKind> { let span = generators::request_span(&co).await; - let s = match s { + let mut s = match s { val @ Value::Catchable(_) => return Ok(val), _ => s .coerce_to_string( @@ -201,11 +206,12 @@ mod pure_builtins { .await? .to_contextful_str()?, }; - let result: NixString = NixString::new_inherit_context_from( - &s, - s.rsplit_once('/').map(|(_, x)| x).unwrap_or(&s), - ); - Ok(result.into()) + + let bs = s.as_mut_bstring(); + if let Some(last_slash) = bs.rfind_char('/') { + *bs = bs[(last_slash + 1)..].into(); + } + Ok(s.into()) } #[builtin("bitAnd")] @@ -240,7 +246,7 @@ mod pure_builtins { for item in list.into_iter() { let set = generators::request_force(&co, item).await.to_attrs()?; - if let Some(value) = set.select(key.as_str()) { + if let Some(value) = set.select(&key) { output.push(value.clone()); } } @@ -256,9 +262,9 @@ mod pure_builtins { #[builtin("compareVersions")] async fn builtin_compare_versions(co: GenCo, x: Value, y: Value) -> Result<Value, ErrorKind> { let s1 = x.to_str()?; - let s1 = VersionPartsIter::new_for_cmp(s1.as_str()); + let s1 = VersionPartsIter::new_for_cmp((&s1).into()); let s2 = y.to_str()?; - let s2 = VersionPartsIter::new_for_cmp(s2.as_str()); + let s2 = VersionPartsIter::new_for_cmp((&s2).into()); match s1.cmp(s2) { std::cmp::Ordering::Less => Ok(Value::Integer(-1)), @@ -323,7 +329,7 @@ mod pure_builtins { context = context.join(sep_context); } let list = list.to_list()?; - let mut res = String::new(); + let mut res = BString::default(); for (i, val) in list.into_iter().enumerate() { if i != 0 { res.push_str(&separator); @@ -339,7 +345,7 @@ mod pure_builtins { .await { Ok(mut s) => { - res.push_str(s.as_str()); + res.push_str(&s); if let Some(ref mut other_context) = s.context_mut() { // It is safe to consume the other context here // because the `list` and `separator` are originally @@ -353,7 +359,7 @@ mod pure_builtins { } } // FIXME: pass immediately the string res. - Ok(NixString::new_context_from(context, &res).into()) + Ok(NixString::new_context_from(context, res).into()) } #[builtin("deepSeq")] @@ -383,17 +389,24 @@ mod pure_builtins { .await? .to_contextful_str()?; let result = str - .rsplit_once('/') - .map(|(x, _)| match x { - "" => "/", - _ => x, + .rfind_char('/') + .map(|last_slash| { + let x = &str[..last_slash]; + if x.is_empty() { + b"/" + } else { + x + } }) - .unwrap_or("."); + .unwrap_or(b"."); if is_path { - Ok(Value::Path(Box::new(result.into()))) + Ok(Value::Path(Box::new(PathBuf::from( + OsString::assert_from_raw_vec(result.to_owned()), + )))) } else { Ok(Value::String(NixString::new_inherit_context_from( - &str, result, + &str, + result.into(), ))) } } @@ -519,7 +532,7 @@ mod pure_builtins { let json_str = json.to_str()?; - serde_json::from_str(&json_str).map_err(|err| err.into()) + serde_json::from_slice(&json_str).map_err(|err| err.into()) } #[builtin("toJSON")] @@ -537,7 +550,7 @@ mod pure_builtins { async fn builtin_from_toml(co: GenCo, toml: Value) -> Result<Value, ErrorKind> { let toml_str = toml.to_str()?; - toml::from_str(&toml_str).map_err(|err| err.into()) + toml::from_str(toml_str.to_str()?).map_err(|err| err.into()) } #[builtin("filterSource")] @@ -632,7 +645,7 @@ mod pure_builtins { let k = key.to_str()?; let xs = set.to_attrs()?; - match xs.select(k.as_str()) { + match xs.select(&k) { Some(x) => Ok(x.clone()), None => Err(ErrorKind::AttributeNotFound { name: k.to_string(), @@ -680,7 +693,7 @@ mod pure_builtins { let k = key.to_str()?; let xs = set.to_attrs()?; - Ok(Value::Bool(xs.contains(k.as_str()))) + Ok(Value::Bool(xs.contains(&k))) } #[builtin("hasContext")] @@ -1069,8 +1082,8 @@ mod pure_builtins { return Ok(re); } let re = re.to_str()?; - let re: Regex = Regex::new(&format!("^{}$", re.as_str())).unwrap(); - match re.captures(&s) { + let re: Regex = Regex::new(&format!("^{}$", re.to_str()?)).unwrap(); + match re.captures(s.to_str()?) { Some(caps) => Ok(Value::List( caps.iter() .skip(1) @@ -1106,7 +1119,7 @@ mod pure_builtins { // This replicates cppnix's (mis?)handling of codepoints // above U+007f following 0x2d ('-') let s = s.to_str()?; - let slice: &[u8] = s.as_str().as_ref(); + let slice: &[u8] = s.as_ref(); let (name, dash_and_version) = slice.split_at( slice .windows(2) @@ -1219,7 +1232,7 @@ mod pure_builtins { let mut string = s.to_contextful_str()?; - let mut res = String::new(); + let mut res = BString::default(); let mut i: usize = 0; let mut empty_string_replace = false; @@ -1248,27 +1261,27 @@ mod pure_builtins { // We already applied a from->to with an empty from // transformation. // Let's skip it so that we don't loop infinitely - if empty_string_replace && from.as_str().is_empty() { + if empty_string_replace && from.is_empty() { continue; } // if we match the `from` string, let's replace - if &string[i..i + from.len()] == from.as_str() { - res += &to; + if string[i..i + from.len()] == *from { + res.push_str(&to); i += from.len(); if let Some(to_ctx) = to.context_mut() { context = context.join(to_ctx); } // remember if we applied the empty from->to - empty_string_replace = from.as_str().is_empty(); + empty_string_replace = from.is_empty(); continue 'outer; } } // If we don't match any `from`, we simply add a character - res += &string[i..i + 1]; + res.push_str(&string[i..i + 1]); i += 1; // Since we didn't apply anything transformation, @@ -1286,8 +1299,8 @@ mod pure_builtins { // We don't need to merge again the context, it's already in the right state. let mut to = elem.1.to_contextful_str()?; - if from.as_str().is_empty() { - res += &to; + if from.is_empty() { + res.push_str(&to); if let Some(to_ctx) = to.context_mut() { context = context.join(to_ctx); } @@ -1295,8 +1308,7 @@ mod pure_builtins { } } - // FIXME: consume directly the String. - Ok(Value::String(NixString::new_context_from(context, &res))) + Ok(Value::String(NixString::new_context_from(context, res))) } #[builtin("seq")] @@ -1317,9 +1329,9 @@ mod pure_builtins { } let s = str.to_contextful_str()?; - let text = s.as_str(); + let text = s.to_str()?; let re = regex.to_str()?; - let re: Regex = Regex::new(re.as_str()).unwrap(); + let re = Regex::new(re.to_str()?).unwrap(); let mut capture_locations = re.capture_locations(); let num_captures = capture_locations.len(); let mut ret = imbl::Vector::new(); @@ -1329,7 +1341,7 @@ mod pure_builtins { // push the unmatched characters preceding the match ret.push_back(Value::from(NixString::new_inherit_context_from( &s, - &text[pos..thematch.start()], + (&text[pos..thematch.start()]).into(), ))); // Push a list with one element for each capture @@ -1380,7 +1392,7 @@ mod pure_builtins { return Ok(s); } let s = s.to_str()?; - let s = VersionPartsIter::new(s.as_str()); + let s = VersionPartsIter::new((&s).into()); let parts = s .map(|s| { @@ -1412,7 +1424,7 @@ mod pure_builtins { return Ok(s); } - Ok(Value::Integer(s.to_contextful_str()?.as_str().len() as i64)) + Ok(Value::Integer(s.to_contextful_str()?.len() as i64)) } #[builtin("sub")] @@ -1453,19 +1465,22 @@ mod pure_builtins { // Nix doesn't assert that the length argument is // non-negative when the starting index is GTE the // string's length. - if beg >= x.as_str().len() { - return Ok(Value::String(NixString::new_inherit_context_from(&x, ""))); + if beg >= x.len() { + return Ok(Value::String(NixString::new_inherit_context_from( + &x, + BString::default(), + ))); } let end = if len < 0 { - x.as_str().len() + x.len() } else { - cmp::min(beg + (len as usize), x.as_str().len()) + cmp::min(beg + (len as usize), x.len()) }; Ok(Value::String(NixString::new_inherit_context_from( &x, - &x[beg..end], + (&x[beg..end]).into(), ))) } diff --git a/tvix/eval/src/builtins/to_xml.rs b/tvix/eval/src/builtins/to_xml.rs index f23cb22db3d9..2f9a11e78852 100644 --- a/tvix/eval/src/builtins/to_xml.rs +++ b/tvix/eval/src/builtins/to_xml.rs @@ -2,6 +2,7 @@ //! of value information as well as internal tvix state that several //! things in nixpkgs rely on. +use bstr::ByteSlice; use std::{io::Write, rc::Rc}; use xml::writer::events::XmlEvent; use xml::writer::EmitterConfig; @@ -60,7 +61,7 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu Value::Bool(b) => return write_typed_value(w, "bool", b), Value::Integer(i) => return write_typed_value(w, "int", i), Value::Float(f) => return write_typed_value(w, "float", f), - Value::String(s) => return write_typed_value(w, "string", s.as_str()), + Value::String(s) => return write_typed_value(w, "string", s.to_str()?), Value::Path(p) => return write_typed_value(w, "path", p.to_string_lossy()), Value::List(list) => { @@ -77,7 +78,7 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu w.write(XmlEvent::start_element("attrs"))?; for elem in attrs.iter() { - w.write(XmlEvent::start_element("attr").attr("name", elem.0.as_str()))?; + w.write(XmlEvent::start_element("attr").attr("name", &elem.0.to_str_lossy()))?; value_variant_to_xml(w, elem.1)?; w.write(XmlEvent::end_element())?; } @@ -101,7 +102,9 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu w.write(attrspat)?; for arg in formals.arguments.iter() { - w.write(XmlEvent::start_element("attr").attr("name", arg.0.as_str()))?; + w.write( + XmlEvent::start_element("attr").attr("name", &arg.0.to_str_lossy()), + )?; w.write(XmlEvent::end_element())?; } diff --git a/tvix/eval/src/builtins/versions.rs b/tvix/eval/src/builtins/versions.rs index 79fb82b868fb..6de512142440 100644 --- a/tvix/eval/src/builtins/versions.rs +++ b/tvix/eval/src/builtins/versions.rs @@ -2,13 +2,15 @@ use std::cmp::Ordering; use std::iter::{once, Chain, Once}; use std::ops::RangeInclusive; +use bstr::{BStr, ByteSlice, B}; + /// Version strings can be broken up into Parts. /// One Part represents either a string of digits or characters. /// '.' and '_' represent deviders between parts and are not included in any part. #[derive(PartialEq, Eq, Clone, Debug)] pub enum VersionPart<'a> { - Word(&'a str), - Number(&'a str), + Word(&'a BStr), + Number(&'a BStr), } impl PartialOrd for VersionPart<'_> { @@ -23,15 +25,17 @@ impl Ord for VersionPart<'_> { (VersionPart::Number(s1), VersionPart::Number(s2)) => { // Note: C++ Nix uses `int`, but probably doesn't make a difference // We trust that the splitting was done correctly and parsing will work - let n1: u64 = s1.parse().unwrap(); - let n2: u64 = s2.parse().unwrap(); + let n1: u64 = s1.to_str_lossy().parse().unwrap(); + let n2: u64 = s2.to_str_lossy().parse().unwrap(); n1.cmp(&n2) } // `pre` looses unless the other part is also a `pre` - (VersionPart::Word("pre"), VersionPart::Word("pre")) => Ordering::Equal, - (VersionPart::Word("pre"), _) => Ordering::Less, - (_, VersionPart::Word("pre")) => Ordering::Greater, + (VersionPart::Word(x), VersionPart::Word(y)) if *x == B("pre") && *y == B("pre") => { + Ordering::Equal + } + (VersionPart::Word(x), _) if *x == B("pre") => Ordering::Less, + (_, VersionPart::Word(y)) if *y == B("pre") => Ordering::Greater, // Number wins against Word (VersionPart::Number(_), VersionPart::Word(_)) => Ordering::Greater, @@ -54,12 +58,12 @@ enum InternalPart { /// This can then be directly used to compare two versions pub struct VersionPartsIter<'a> { cached_part: InternalPart, - iter: std::str::CharIndices<'a>, - version: &'a str, + iter: bstr::CharIndices<'a>, + version: &'a BStr, } impl<'a> VersionPartsIter<'a> { - pub fn new(version: &'a str) -> Self { + pub fn new(version: &'a BStr) -> Self { Self { cached_part: InternalPart::Break, iter: version.char_indices(), @@ -77,8 +81,8 @@ impl<'a> VersionPartsIter<'a> { /// like `2.3 < 2.3.0pre` ensues. Luckily for us, this means that we can /// lexicographically compare two version strings, _if_ we append an extra /// component to both versions. - pub fn new_for_cmp(version: &'a str) -> Chain<Self, Once<VersionPart>> { - Self::new(version).chain(once(VersionPart::Word(""))) + pub fn new_for_cmp(version: &'a BStr) -> Chain<Self, Once<VersionPart>> { + Self::new(version).chain(once(VersionPart::Word("".into()))) } } @@ -101,7 +105,7 @@ impl<'a> Iterator for VersionPartsIter<'a> { } } - let (pos, char) = char.unwrap(); + let (start, end, char) = char.unwrap(); match char { // Divider encountered '.' | '-' => { @@ -119,7 +123,9 @@ impl<'a> Iterator for VersionPartsIter<'a> { _ if char.is_ascii_digit() => { let cached_part = std::mem::replace( &mut self.cached_part, - InternalPart::Number { range: pos..=pos }, + InternalPart::Number { + range: start..=(end - 1), + }, ); match cached_part { InternalPart::Number { range } => { @@ -135,7 +141,9 @@ impl<'a> Iterator for VersionPartsIter<'a> { // char encountered _ => { - let mut cached_part = InternalPart::Word { range: pos..=pos }; + let mut cached_part = InternalPart::Word { + range: start..=(end - 1), + }; std::mem::swap(&mut cached_part, &mut self.cached_part); match cached_part { InternalPart::Word { range } => { diff --git a/tvix/eval/src/compiler/import.rs b/tvix/eval/src/compiler/import.rs index 94a6602e9df6..4e8ea7195d81 100644 --- a/tvix/eval/src/compiler/import.rs +++ b/tvix/eval/src/compiler/import.rs @@ -6,6 +6,7 @@ //! instance, or observers). use super::GlobalsMap; +use bstr::ByteSlice; use genawaiter::rc::Gen; use std::rc::Weak; @@ -40,11 +41,11 @@ async fn import_impl( // TODO(tazjin): make this return a string directly instead let contents: Value = generators::request_read_to_string(&co, path.clone()).await; - let contents = contents.to_str()?.as_str().to_string(); + let contents = contents.to_str()?.to_str()?.to_owned(); let parsed = rnix::ast::Root::parse(&contents); let errors = parsed.errors(); - let file = source.add_file(path.to_string_lossy().to_string(), contents); + let file = source.add_file(path.to_string_lossy().to_string(), contents.to_owned()); if !errors.is_empty() { return Err(ErrorKind::ImportParseError { diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 53a99a453d9e..b4ef7973c7e8 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -727,7 +727,7 @@ impl Compiler<'_> { if let (Some(attr), None) = (path_iter.next(), path_iter.next()) { // Only do this optimisation for statically known attrs. if let Some(ident) = expr_static_attr_str(&attr) { - if let Some(selected_value) = attrs.select(ident.as_str()) { + if let Some(selected_value) = attrs.select(ident.as_bytes()) { *constant = selected_value.clone(); // If this worked, we can unthunk the current thunk. diff --git a/tvix/eval/src/errors.rs b/tvix/eval/src/errors.rs index 1bd2e47e93ec..d5cb6ef34b0d 100644 --- a/tvix/eval/src/errors.rs +++ b/tvix/eval/src/errors.rs @@ -178,6 +178,9 @@ pub enum ErrorKind { formals_span: Span, }, + /// Invalid UTF-8 was encoutered somewhere + Utf8, + /// Errors while serialising to XML. Xml(Rc<XmlError>), @@ -245,6 +248,18 @@ impl From<FromUtf8Error> for ErrorKind { } } +impl From<bstr::Utf8Error> for ErrorKind { + fn from(_: bstr::Utf8Error) -> Self { + Self::Utf8 + } +} + +impl From<bstr::FromUtf8Error> for ErrorKind { + fn from(_value: bstr::FromUtf8Error) -> Self { + Self::Utf8 + } +} + impl From<XmlError> for ErrorKind { fn from(err: XmlError) -> Self { Self::Xml(Rc::new(err)) @@ -457,11 +472,11 @@ to a missing value in the attribute set(s) included via `with`."#, } ErrorKind::UnexpectedArgument { arg, .. } => { - write!( - f, - "Unexpected argument `{}` supplied to function", - arg.as_str() - ) + write!(f, "Unexpected argument `{arg}` supplied to function",) + } + + ErrorKind::Utf8 => { + write!(f, "Invalid UTF-8 in string") } ErrorKind::Xml(error) => write!(f, "failed to serialise to XML: {error}"), @@ -775,6 +790,7 @@ impl Error { | ErrorKind::NotSerialisableToJson(_) | ErrorKind::FromTomlError(_) | ErrorKind::Xml(_) + | ErrorKind::Utf8 | ErrorKind::TvixError(_) | ErrorKind::TvixBug { .. } | ErrorKind::NotImplemented(_) @@ -819,6 +835,7 @@ impl Error { ErrorKind::FromTomlError(_) => "E035", ErrorKind::NotSerialisableToJson(_) => "E036", ErrorKind::UnexpectedContext => "E037", + ErrorKind::Utf8 => "E038", // Special error code for errors from other Tvix // components. We may want to introduce a code namespacing diff --git a/tvix/eval/src/value/attrs.rs b/tvix/eval/src/value/attrs.rs index b80d8b2db84e..2027653c3f39 100644 --- a/tvix/eval/src/value/attrs.rs +++ b/tvix/eval/src/value/attrs.rs @@ -5,8 +5,10 @@ //! //! Due to this, construction and management of attribute sets has //! some peculiarities that are encapsulated within this module. +use std::borrow::Borrow; use std::iter::FromIterator; +use bstr::BStr; use imbl::{ordmap, OrdMap}; use lazy_static::lazy_static; use serde::de::{Deserializer, Error, Visitor}; @@ -67,25 +69,25 @@ impl AttrsRep { } } - fn select(&self, key: &str) -> Option<&Value> { + fn select(&self, key: &BStr) -> Option<&Value> { match self { AttrsRep::Empty => None, - AttrsRep::KV { name, value } => match key { - "name" => Some(name), - "value" => Some(value), + AttrsRep::KV { name, value } => match &**key { + b"name" => Some(name), + b"value" => Some(value), _ => None, }, - AttrsRep::Im(map) => map.get(&key.into()), + AttrsRep::Im(map) => map.get(key), } } - fn contains(&self, key: &str) -> bool { + fn contains(&self, key: &BStr) -> bool { match self { AttrsRep::Empty => false, AttrsRep::KV { .. } => key == "name" || key == "value", - AttrsRep::Im(map) => map.contains_key(&key.into()), + AttrsRep::Im(map) => map.contains_key(key), } } } @@ -264,19 +266,30 @@ impl NixAttrs { } /// Select a value from an attribute set by key. - pub fn select(&self, key: &str) -> Option<&Value> { - self.0.select(key) + pub fn select<K>(&self, key: &K) -> Option<&Value> + where + K: Borrow<BStr> + ?Sized, + { + self.0.select(key.borrow()) } /// Select a required value from an attribute set by key, return /// an `AttributeNotFound` error if it is missing. - pub fn select_required(&self, key: &str) -> Result<&Value, ErrorKind> { + pub fn select_required<K>(&self, key: &K) -> Result<&Value, ErrorKind> + where + K: Borrow<BStr> + ?Sized, + { self.select(key) - .ok_or_else(|| ErrorKind::AttributeNotFound { name: key.into() }) + .ok_or_else(|| ErrorKind::AttributeNotFound { + name: key.borrow().to_string(), + }) } - pub fn contains(&self, key: &str) -> bool { - self.0.contains(key) + pub fn contains<'a, K: 'a>(&self, key: K) -> bool + where + &'a BStr: From<K>, + { + self.0.contains(key.into()) } /// Construct an iterator over all the key-value pairs in the attribute set. @@ -423,7 +436,7 @@ fn attempt_optimise_kv(slice: &mut [Value]) -> Option<NixAttrs> { fn set_attr(attrs: &mut NixAttrs, key: NixString, value: Value) -> Result<(), ErrorKind> { match attrs.0.map_mut().entry(key) { imbl::ordmap::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey { - key: entry.key().as_str().to_string(), + key: entry.key().to_string(), }), imbl::ordmap::Entry::Vacant(entry) => { diff --git a/tvix/eval/src/value/attrs/tests.rs b/tvix/eval/src/value/attrs/tests.rs index 5c693873cd75..d269044b32ba 100644 --- a/tvix/eval/src/value/attrs/tests.rs +++ b/tvix/eval/src/value/attrs/tests.rs @@ -1,3 +1,5 @@ +use bstr::B; + use super::*; #[test] @@ -99,6 +101,6 @@ fn test_map_attrs_iter() { let mut iter = attrs.iter().collect::<Vec<_>>().into_iter(); let (k, v) = iter.next().unwrap(); assert!(k == &NixString::from("key")); - assert!(v.to_str().unwrap().as_str() == "value"); + assert_eq!(v.to_str().unwrap(), B("value")); assert!(iter.next().is_none()); } diff --git a/tvix/eval/src/value/json.rs b/tvix/eval/src/value/json.rs index 8c31bd0e6082..4e915a36e55a 100644 --- a/tvix/eval/src/value/json.rs +++ b/tvix/eval/src/value/json.rs @@ -7,6 +7,7 @@ use super::{CoercionKind, Value}; use crate::errors::{CatchableErrorKind, ErrorKind}; use crate::generators::{self, GenCo}; +use bstr::ByteSlice; use serde_json::value::to_value; use serde_json::Value as Json; // name clash with *our* `Value` use serde_json::{Map, Number}; @@ -23,7 +24,7 @@ impl Value { Value::Bool(b) => Json::Bool(b), Value::Integer(i) => Json::Number(Number::from(i)), Value::Float(f) => to_value(f)?, - Value::String(s) => Json::String(s.as_str().into()), + Value::String(s) => Json::String(s.to_str()?.to_owned()), Value::Path(p) => { let imported = generators::request_path_import(co, *p).await; @@ -61,7 +62,7 @@ impl Value { .await? { Value::Catchable(cek) => return Ok(Err(cek)), - Value::String(s) => return Ok(Ok(Json::String(s.as_str().to_owned()))), + Value::String(s) => return Ok(Ok(Json::String(s.to_str()?.to_owned()))), _ => panic!("Value::coerce_to_string_() returned a non-string!"), } } @@ -76,7 +77,7 @@ impl Value { let mut out = Map::with_capacity(attrs.len()); for (name, value) in attrs.into_iter_sorted() { out.insert( - name.as_str().to_string(), + name.to_str()?.to_owned(), match generators::request_to_json(co, value).await { Ok(v) => v, Err(cek) => return Ok(Err(cek)), diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index ccca0bea5b2e..e181b3a2da4e 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -6,6 +6,7 @@ use std::num::{NonZeroI32, NonZeroUsize}; use std::path::PathBuf; use std::rc::Rc; +use bstr::{BString, ByteVec}; use lexical_core::format::CXX_LITERAL; use serde::Deserialize; @@ -313,7 +314,7 @@ impl Value { kind: CoercionKind, span: LightSpan, ) -> Result<Value, ErrorKind> { - let mut result = String::new(); + let mut result = BString::default(); let mut vals = vec![self]; // Track if we are coercing the first value of a list to correctly emit // separating white spaces. @@ -326,18 +327,15 @@ impl Value { let value = if let Some(v) = vals.pop() { v.force(co, span.clone()).await? } else { - return Ok(Value::String(NixString::new_context_from( - context, - result.as_str(), - ))); + return Ok(Value::String(NixString::new_context_from(context, result))); }; - let coerced = match (value, kind) { + let coerced: Result<BString, _> = match (value, kind) { // coercions that are always done (Value::String(mut s), _) => { if let Some(ctx) = s.context_mut() { context = context.join(ctx); } - Ok(s.as_str().to_owned()) + Ok(s.into()) } // TODO(sterni): Think about proper encoding handling here. This needs @@ -357,7 +355,7 @@ impl Value { context = context.append(NixContextElement::Plain( imported.to_string_lossy().to_string(), )); - Ok(imported.to_string_lossy().into_owned()) + Ok(imported.into_os_string().into_encoded_bytes().into()) } ( Value::Path(p), @@ -365,7 +363,7 @@ impl Value { import_paths: false, .. }, - ) => Ok(p.to_string_lossy().into_owned()), + ) => Ok(p.into_os_string().into_encoded_bytes().into()), // Attribute sets can be converted to strings if they either have an // `__toString` attribute which holds a function that receives the @@ -397,14 +395,14 @@ impl Value { // strong coercions (Value::Null, CoercionKind { strong: true, .. }) - | (Value::Bool(false), CoercionKind { strong: true, .. }) => Ok("".to_owned()), - (Value::Bool(true), CoercionKind { strong: true, .. }) => Ok("1".to_owned()), + | (Value::Bool(false), CoercionKind { strong: true, .. }) => Ok("".into()), + (Value::Bool(true), CoercionKind { strong: true, .. }) => Ok("1".into()), - (Value::Integer(i), CoercionKind { strong: true, .. }) => Ok(format!("{i}")), + (Value::Integer(i), CoercionKind { strong: true, .. }) => Ok(format!("{i}").into()), (Value::Float(f), CoercionKind { strong: true, .. }) => { // contrary to normal Display, coercing a float to a string will // result in unconditional 6 decimal places - Ok(format!("{:.6}", f)) + Ok(format!("{:.6}", f).into()) } // Lists are coerced by coercing their elements and interspersing spaces @@ -448,7 +446,7 @@ impl Value { if let Some(head) = is_list_head { if !head { - result.push(' '); + result.push(b' '); } else { is_list_head = Some(false); } @@ -576,7 +574,7 @@ impl Value { let s2 = s2.to_str(); if let (Ok(s1), Ok(s2)) = (s1, s2) { - if s1.as_str() == "derivation" && s2.as_str() == "derivation" { + if s1 == "derivation" && s2 == "derivation" { // TODO(tazjin): are the outPaths really required, // or should it fall through? let out1 = a1 diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index c528fd19f229..e2767dd22cdb 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -3,14 +3,13 @@ //! Nix language strings never need to be modified on the language //! level, allowing us to shave off some memory overhead and only //! paying the cost when creating new strings. +use bstr::{BStr, BString, ByteSlice, Chars}; use rnix::ast; +use std::borrow::{Borrow, Cow}; use std::collections::HashSet; -use std::ffi::OsStr; +use std::fmt::Display; use std::hash::Hash; use std::ops::Deref; -use std::path::Path; -use std::str::{self, Utf8Error}; -use std::{borrow::Cow, fmt::Display, str::Chars}; use serde::de::{Deserializer, Visitor}; use serde::{Deserialize, Serialize}; @@ -147,16 +146,28 @@ impl NixContext { // FIXME: when serializing, ignore the context? #[derive(Clone, Debug, Serialize)] -pub struct NixString(Box<str>, Option<NixContext>); +pub struct NixString(BString, Option<NixContext>); impl PartialEq for NixString { fn eq(&self, other: &Self) -> bool { - self.as_str() == other.as_str() + self.as_bstr() == other.as_bstr() } } impl Eq for NixString {} +impl PartialEq<&[u8]> for NixString { + fn eq(&self, other: &&[u8]) -> bool { + **self == **other + } +} + +impl PartialEq<&str> for NixString { + fn eq(&self, other: &&str) -> bool { + **self == other.as_bytes() + } +} + impl PartialOrd for NixString { fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { Some(self.cmp(other)) @@ -165,39 +176,49 @@ impl PartialOrd for NixString { impl Ord for NixString { fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.as_str().cmp(other.as_str()) + self.as_bstr().cmp(other.as_bstr()) + } +} + +impl From<&BStr> for NixString { + fn from(value: &BStr) -> Self { + Self(value.to_owned(), None) } } -impl TryFrom<&[u8]> for NixString { - type Error = Utf8Error; +impl From<&[u8]> for NixString { + fn from(value: &[u8]) -> Self { + Self(value.into(), None) + } +} - fn try_from(value: &[u8]) -> Result<Self, Self::Error> { - Ok(Self(Box::from(str::from_utf8(value)?), None)) +impl From<Vec<u8>> for NixString { + fn from(value: Vec<u8>) -> Self { + Self(value.into(), None) } } impl From<&str> for NixString { fn from(s: &str) -> Self { - NixString(Box::from(s), None) + Self(s.as_bytes().into(), None) } } impl From<String> for NixString { fn from(s: String) -> Self { - NixString(s.into_boxed_str(), None) + Self(s.into(), None) } } impl From<(String, Option<NixContext>)> for NixString { - fn from(s: (String, Option<NixContext>)) -> Self { - NixString(s.0.into_boxed_str(), s.1) + fn from((s, ctx): (String, Option<NixContext>)) -> Self { + NixString(s.into(), ctx) } } impl From<Box<str>> for NixString { fn from(s: Box<str>) -> Self { - Self(s, None) + Self(s.into_boxed_bytes().into_vec().into(), None) } } @@ -207,9 +228,33 @@ impl From<ast::Ident> for NixString { } } +impl<'a> From<&'a NixString> for &'a BStr { + fn from(s: &'a NixString) -> Self { + BStr::new(&*s.0) + } +} + +impl From<NixString> for BString { + fn from(s: NixString) -> Self { + s.0 + } +} + +impl AsRef<[u8]> for NixString { + fn as_ref(&self) -> &[u8] { + &self.0 + } +} + +impl Borrow<BStr> for NixString { + fn borrow(&self) -> &BStr { + self.as_bstr() + } +} + impl Hash for NixString { fn hash<H: std::hash::Hasher>(&self, state: &mut H) { - self.as_str().hash(state) + self.as_bstr().hash(state) } } @@ -246,6 +291,14 @@ impl<'de> Deserialize<'de> for NixString { } } +impl Deref for NixString { + type Target = BString; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + #[cfg(feature = "arbitrary")] mod arbitrary { use super::*; @@ -264,13 +317,13 @@ mod arbitrary { } impl NixString { - pub fn new_inherit_context_from(other: &NixString, new_contents: &str) -> Self { - Self(Box::from(new_contents), other.1.clone()) + pub fn new_inherit_context_from(other: &NixString, new_contents: BString) -> Self { + Self(new_contents, other.1.clone()) } - pub fn new_context_from(context: NixContext, contents: &str) -> Self { + pub fn new_context_from(context: NixContext, contents: BString) -> Self { Self( - Box::from(contents), + contents, if context.is_empty() { None } else { @@ -279,10 +332,22 @@ impl NixString { ) } - pub fn as_str(&self) -> &str { + pub fn as_mut_bstring(&mut self) -> &mut BString { + &mut self.0 + } + + pub fn as_bstr(&self) -> &BStr { + BStr::new(self.as_bytes()) + } + + pub fn as_bytes(&self) -> &[u8] { &self.0 } + pub fn into_bstring(self) -> BString { + self.0 + } + /// Return a displayable representation of the string as an /// identifier. /// @@ -290,8 +355,10 @@ impl NixString { /// set keys, as those are only escaped in the presence of special /// characters. pub fn ident_str(&self) -> Cow<str> { - let escaped = nix_escape_string(self.as_str()); - + let escaped = match self.to_str_lossy() { + Cow::Borrowed(s) => nix_escape_string(s), + Cow::Owned(s) => nix_escape_string(&s).into_owned().into(), + }; match escaped { // A borrowed string is unchanged and can be returned as // is. @@ -310,8 +377,8 @@ impl NixString { } pub fn concat(&self, other: &Self) -> Self { - let mut s = self.as_str().to_owned(); - s.push_str(other.as_str()); + let mut s = self.to_vec(); + s.extend(other.0.as_slice()); let context = [&self.1, &other.1] .into_iter() @@ -319,7 +386,7 @@ impl NixString { .fold(NixContext::new(), |acc_ctx, new_ctx| { acc_ctx.join(&mut new_ctx.clone()) }); - Self::new_context_from(context, &s.into_boxed_str()) + Self::new_context_from(context, s.into()) } pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> { @@ -436,37 +503,11 @@ fn nix_escape_string(input: &str) -> Cow<str> { impl Display for NixString { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str("\"")?; - f.write_str(&nix_escape_string(self.as_str()))?; + f.write_str(&nix_escape_string(&self.to_str_lossy()))?; f.write_str("\"") } } -impl AsRef<str> for NixString { - fn as_ref(&self) -> &str { - self.as_str() - } -} - -impl AsRef<OsStr> for NixString { - fn as_ref(&self) -> &OsStr { - self.as_str().as_ref() - } -} - -impl AsRef<Path> for NixString { - fn as_ref(&self) -> &Path { - self.as_str().as_ref() - } -} - -impl Deref for NixString { - type Target = str; - - fn deref(&self) -> &Self::Target { - self.as_str() - } -} - #[cfg(test)] mod tests { use super::*; diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 5238fdc0662f..d51d44b6a110 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -12,6 +12,7 @@ pub mod generators; mod macros; +use bstr::{BString, ByteSlice, ByteVec}; use codemap::Span; use serde_json::json; use std::{cmp::Ordering, collections::HashMap, ops::DerefMut, path::PathBuf, rc::Rc}; @@ -550,14 +551,14 @@ where let key = key.to_str().with_span(&frame, self)?; let attrs = attrs.to_attrs().with_span(&frame, self)?; - match attrs.select(key.as_str()) { + match attrs.select(&key) { Some(value) => self.stack.push(value.clone()), None => { return frame.error( self, ErrorKind::AttributeNotFound { - name: key.as_str().to_string(), + name: (**key).clone().into_string_lossy() }, ); } @@ -598,7 +599,7 @@ where OpCode::OpAttrsTrySelect => { let key = self.stack_pop().to_str().with_span(&frame, self)?; let value = match self.stack_pop() { - Value::Attrs(attrs) => match attrs.select(key.as_str()) { + Value::Attrs(attrs) => match attrs.select(&key) { Some(value) => value.clone(), None => Value::AttrNotFound, }, @@ -705,7 +706,7 @@ where self(key, attrs) => { let key = key.to_str().with_span(&frame, self)?; let result = match attrs { - Value::Attrs(attrs) => attrs.contains(key.as_str()), + Value::Attrs(attrs) => attrs.contains(&key), // Nix allows use of `?` on non-set types, but // always returns false in those cases. @@ -742,7 +743,7 @@ where self.enqueue_generator("resolve_with", op_span, |co| { resolve_with( co, - ident.as_str().to_owned(), + ident.as_bstr().to_owned(), with_stack_len, closed_with_stack_len, ) @@ -966,7 +967,7 @@ where /// fragments of the stack, evaluating them to strings, and pushing /// the concatenated result string back on the stack. fn run_interpolate(&mut self, frame: &CallFrame, count: usize) -> EvalResult<()> { - let mut out = String::new(); + let mut out = BString::default(); // Interpolation propagates the context and union them. let mut context: NixContext = NixContext::new(); @@ -980,7 +981,7 @@ where return Ok(()); } let mut nix_string = val.to_contextful_str().with_span(frame, self)?; - out.push_str(nix_string.as_str()); + out.push_str(nix_string.as_bstr()); if let Some(nix_string_ctx) = nix_string.context_mut() { context = context.join(nix_string_ctx); } @@ -988,7 +989,7 @@ where // FIXME: consume immediately here the String. self.stack - .push(Value::String(NixString::new_context_from(context, &out))); + .push(Value::String(NixString::new_context_from(context, out))); Ok(()) } @@ -1160,7 +1161,7 @@ where /// for matching values in the with-stacks carried at runtime. async fn resolve_with( co: GenCo, - ident: String, + ident: BString, vm_with_len: usize, upvalue_with_len: usize, ) -> Result<Value, ErrorKind> { @@ -1213,7 +1214,7 @@ async fn resolve_with( } } - Err(ErrorKind::UnknownDynamicVariable(ident)) + Err(ErrorKind::UnknownDynamicVariable(ident.to_string())) } // TODO(amjoseph): de-asyncify this @@ -1221,7 +1222,7 @@ async fn add_values(co: GenCo, a: Value, b: Value) -> Result<Value, ErrorKind> { // What we try to do is solely determined by the type of the first value! let result = match (a, b) { (Value::Path(p), v) => { - let mut path = p.to_string_lossy().into_owned(); + let mut path = p.into_os_string(); match generators::request_string_coerce( &co, v, @@ -1243,7 +1244,7 @@ async fn add_values(co: GenCo, a: Value, b: Value) -> Result<Value, ErrorKind> { .await { Ok(vs) => { - path.push_str(vs.as_str()); + path.push(vs.to_os_str()?); crate::value::canon_path(PathBuf::from(path)).into() } Err(c) => Value::Catchable(c), |