diff options
Diffstat (limited to 'tvix/eval/src/value')
-rw-r--r-- | tvix/eval/src/value/arbitrary.rs | 106 | ||||
-rw-r--r-- | tvix/eval/src/value/attrs.rs | 613 | ||||
-rw-r--r-- | tvix/eval/src/value/attrs/tests.rs | 106 | ||||
-rw-r--r-- | tvix/eval/src/value/builtin.rs | 137 | ||||
-rw-r--r-- | tvix/eval/src/value/function.rs | 112 | ||||
-rw-r--r-- | tvix/eval/src/value/json.rs | 154 | ||||
-rw-r--r-- | tvix/eval/src/value/list.rs | 95 | ||||
-rw-r--r-- | tvix/eval/src/value/mod.rs | 1086 | ||||
-rw-r--r-- | tvix/eval/src/value/path.rs | 14 | ||||
-rw-r--r-- | tvix/eval/src/value/string/context.rs | 161 | ||||
-rw-r--r-- | tvix/eval/src/value/string/mod.rs | 879 | ||||
-rw-r--r-- | tvix/eval/src/value/thunk.rs | 427 |
12 files changed, 3890 insertions, 0 deletions
diff --git a/tvix/eval/src/value/arbitrary.rs b/tvix/eval/src/value/arbitrary.rs new file mode 100644 index 000000000000..49b9f2eea3fb --- /dev/null +++ b/tvix/eval/src/value/arbitrary.rs @@ -0,0 +1,106 @@ +//! Support for configurable generation of arbitrary nix values + +use proptest::collection::{btree_map, vec}; +use proptest::{prelude::*, strategy::BoxedStrategy}; +use std::ffi::OsString; + +use super::{attrs::AttrsRep, NixAttrs, NixList, NixString, Value}; + +#[derive(Clone)] +pub enum Parameters { + Strategy(BoxedStrategy<Value>), + Parameters { + generate_internal_values: bool, + generate_functions: bool, + generate_nested: bool, + }, +} + +impl Default for Parameters { + fn default() -> Self { + Self::Parameters { + generate_internal_values: false, + generate_functions: false, + generate_nested: true, + } + } +} + +impl Arbitrary for NixAttrs { + type Parameters = Parameters; + type Strategy = BoxedStrategy<Self>; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + prop_oneof![ + // Empty attrs representation + Just(AttrsRep::Empty.into()), + // KV representation (name/value pairs) + ( + any_with::<Value>(args.clone()), + any_with::<Value>(args.clone()) + ) + .prop_map(|(name, value)| AttrsRep::KV { name, value }.into()), + // Map representation + btree_map(NixString::arbitrary(), Value::arbitrary_with(args), 0..100) + .prop_map(|map| AttrsRep::Map(map).into()) + ] + .boxed() + } +} + +impl Arbitrary for NixList { + type Parameters = <Value as Arbitrary>::Parameters; + type Strategy = BoxedStrategy<Self>; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + vec(<Value as Arbitrary>::arbitrary_with(args), 0..100) + .prop_map(NixList::from) + .boxed() + } +} + +impl Arbitrary for Value { + type Parameters = Parameters; + type Strategy = BoxedStrategy<Self>; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + match args { + Parameters::Strategy(s) => s, + Parameters::Parameters { + generate_internal_values, + generate_functions, + generate_nested, + } => { + if generate_internal_values || generate_functions { + todo!("Generating internal values and functions not implemented yet") + } else if generate_nested { + non_internal_value().boxed() + } else { + leaf_value().boxed() + } + } + } + } +} + +fn leaf_value() -> impl Strategy<Value = Value> { + use Value::*; + + prop_oneof![ + Just(Null), + any::<bool>().prop_map(Bool), + any::<i64>().prop_map(Integer), + any::<f64>().prop_map(Float), + any::<NixString>().prop_map(String), + any::<OsString>().prop_map(|s| Path(Box::new(s.into()))), + ] +} + +fn non_internal_value() -> impl Strategy<Value = Value> { + leaf_value().prop_recursive(3, 5, 5, |inner| { + prop_oneof![ + NixAttrs::arbitrary_with(Parameters::Strategy(inner.clone())).prop_map(Value::attrs), + any_with::<NixList>(Parameters::Strategy(inner)).prop_map(Value::List) + ] + }) +} diff --git a/tvix/eval/src/value/attrs.rs b/tvix/eval/src/value/attrs.rs new file mode 100644 index 000000000000..00b913347418 --- /dev/null +++ b/tvix/eval/src/value/attrs.rs @@ -0,0 +1,613 @@ +//! This module implements Nix attribute sets. They have flexible +//! backing implementations, as they are used in very versatile +//! use-cases that are all exposed the same way in the language +//! surface. +//! +//! Due to this, construction and management of attribute sets has +//! some peculiarities that are encapsulated within this module. +use std::borrow::Borrow; +use std::collections::{btree_map, BTreeMap}; +use std::iter::FromIterator; +use std::rc::Rc; + +use bstr::BStr; +use lazy_static::lazy_static; +use serde::de::{Deserializer, Error, Visitor}; +use serde::Deserialize; + +use super::string::NixString; +use super::thunk::ThunkSet; +use super::TotalDisplay; +use super::Value; +use crate::errors::ErrorKind; +use crate::CatchableErrorKind; + +lazy_static! { + static ref NAME_S: NixString = "name".into(); + static ref NAME_REF: &'static NixString = &NAME_S; + static ref VALUE_S: NixString = "value".into(); + static ref VALUE_REF: &'static NixString = &VALUE_S; +} + +#[cfg(test)] +mod tests; + +#[derive(Clone, Debug, Deserialize, Default)] +pub(super) enum AttrsRep { + #[default] + Empty, + + Map(BTreeMap<NixString, Value>), + + /// Warning: this represents a **two**-attribute attrset, with + /// attribute names "name" and "value", like `{name="foo"; + /// value="bar";}`, *not* `{foo="bar";}`! + KV { + name: Value, + value: Value, + }, +} + +impl AttrsRep { + fn select(&self, key: &BStr) -> Option<&Value> { + match self { + AttrsRep::Empty => None, + + AttrsRep::KV { name, value } => match &**key { + b"name" => Some(name), + b"value" => Some(value), + _ => None, + }, + + AttrsRep::Map(map) => map.get(key), + } + } + + fn contains(&self, key: &BStr) -> bool { + match self { + AttrsRep::Empty => false, + AttrsRep::KV { .. } => key == "name" || key == "value", + AttrsRep::Map(map) => map.contains_key(key), + } + } +} + +#[repr(transparent)] +#[derive(Clone, Debug, Default)] +pub struct NixAttrs(pub(super) Rc<AttrsRep>); + +impl From<AttrsRep> for NixAttrs { + fn from(rep: AttrsRep) -> Self { + NixAttrs(Rc::new(rep)) + } +} + +impl<K, V> FromIterator<(K, V)> for NixAttrs +where + NixString: From<K>, + Value: From<V>, +{ + fn from_iter<T>(iter: T) -> NixAttrs + where + T: IntoIterator<Item = (K, V)>, + { + AttrsRep::Map( + iter.into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect(), + ) + .into() + } +} + +impl From<BTreeMap<NixString, Value>> for NixAttrs { + fn from(map: BTreeMap<NixString, Value>) -> Self { + AttrsRep::Map(map).into() + } +} + +impl TotalDisplay for NixAttrs { + fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result { + if let Some(Value::String(s)) = self.select("type") { + if *s == "derivation" { + write!(f, "«derivation ")?; + if let Some(p) = self.select("drvPath") { + p.total_fmt(f, set)?; + } else { + write!(f, "???")?; + } + return write!(f, "»"); + } + } + + f.write_str("{ ")?; + + for (name, value) in self.iter_sorted() { + write!(f, "{} = ", name.ident_str())?; + value.total_fmt(f, set)?; + f.write_str("; ")?; + } + + f.write_str("}") + } +} + +impl<'de> Deserialize<'de> for NixAttrs { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct MapVisitor; + + impl<'de> Visitor<'de> for MapVisitor { + type Value = NixAttrs; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a valid Nix attribute set") + } + + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> + where + A: serde::de::MapAccess<'de>, + { + let mut stack_array = Vec::with_capacity(map.size_hint().unwrap_or(0) * 2); + + while let Some((key, value)) = map.next_entry()? { + stack_array.push(key); + stack_array.push(value); + } + + Ok(NixAttrs::construct(stack_array.len() / 2, stack_array) + .map_err(A::Error::custom)? + .expect("Catchable values are unreachable here")) + } + } + + deserializer.deserialize_map(MapVisitor) + } +} + +impl NixAttrs { + pub fn empty() -> Self { + AttrsRep::Empty.into() + } + + /// Compare two attribute sets by pointer equality. Only makes + /// sense for some attribute set representations, i.e. returning + /// `false` does not mean that the two attribute sets do not have + /// equal *content*. + pub fn ptr_eq(&self, other: &Self) -> bool { + Rc::ptr_eq(&self.0, &other.0) + } + + /// Return an attribute set containing the merge of the two + /// provided sets. Keys from the `other` set have precedence. + pub fn update(self, other: Self) -> Self { + // Short-circuit on some optimal cases: + match (self.0.as_ref(), other.0.as_ref()) { + (AttrsRep::Empty, AttrsRep::Empty) => return self, + (AttrsRep::Empty, _) => return other, + (_, AttrsRep::Empty) => return self, + (AttrsRep::KV { .. }, AttrsRep::KV { .. }) => return other, + + // Explicitly handle all branches instead of falling + // through, to ensure that we get at least some compiler + // errors if variants are modified. + (AttrsRep::Map(_), AttrsRep::Map(_)) + | (AttrsRep::Map(_), AttrsRep::KV { .. }) + | (AttrsRep::KV { .. }, AttrsRep::Map(_)) => {} + }; + + // Slightly more advanced, but still optimised updates + match (Rc::unwrap_or_clone(self.0), Rc::unwrap_or_clone(other.0)) { + (AttrsRep::Map(mut m), AttrsRep::KV { name, value }) => { + m.insert(NAME_S.clone(), name); + m.insert(VALUE_S.clone(), value); + AttrsRep::Map(m).into() + } + + (AttrsRep::KV { name, value }, AttrsRep::Map(mut m)) => { + match m.entry(NAME_S.clone()) { + btree_map::Entry::Vacant(e) => { + e.insert(name); + } + + btree_map::Entry::Occupied(_) => { /* name from `m` has precedence */ } + }; + + match m.entry(VALUE_S.clone()) { + btree_map::Entry::Vacant(e) => { + e.insert(value); + } + + btree_map::Entry::Occupied(_) => { /* value from `m` has precedence */ } + }; + + AttrsRep::Map(m).into() + } + + // Plain merge of maps. + (AttrsRep::Map(mut m1), AttrsRep::Map(m2)) => { + m1.extend(m2); + AttrsRep::Map(m1).into() + } + + // Cases handled above by the borrowing match: + _ => unreachable!(), + } + } + + /// Return the number of key-value entries in an attrset. + pub fn len(&self) -> usize { + match self.0.as_ref() { + AttrsRep::Map(map) => map.len(), + AttrsRep::Empty => 0, + AttrsRep::KV { .. } => 2, + } + } + + pub fn is_empty(&self) -> bool { + match self.0.as_ref() { + AttrsRep::Map(map) => map.is_empty(), + AttrsRep::Empty => true, + AttrsRep::KV { .. } => false, + } + } + + /// Select a value from an attribute set by key. + pub fn select<K>(&self, key: &K) -> Option<&Value> + where + K: Borrow<BStr> + ?Sized, + { + self.0.select(key.borrow()) + } + + /// Select a required value from an attribute set by key, return + /// an `AttributeNotFound` error if it is missing. + pub fn select_required<K>(&self, key: &K) -> Result<&Value, ErrorKind> + where + K: Borrow<BStr> + ?Sized, + { + self.select(key) + .ok_or_else(|| ErrorKind::AttributeNotFound { + name: key.borrow().to_string(), + }) + } + + pub fn contains<'a, K: 'a>(&self, key: K) -> bool + where + &'a BStr: From<K>, + { + self.0.contains(key.into()) + } + + /// Construct an iterator over all the key-value pairs in the attribute set. + #[allow(clippy::needless_lifetimes)] + pub fn iter<'a>(&'a self) -> Iter<KeyValue<'a>> { + Iter(match &self.0.as_ref() { + AttrsRep::Map(map) => KeyValue::Map(map.iter()), + AttrsRep::Empty => KeyValue::Empty, + + AttrsRep::KV { + ref name, + ref value, + } => KeyValue::KV { + name, + value, + at: IterKV::default(), + }, + }) + } + + /// Same as iter(), but marks call sites which rely on the + /// iteration being lexicographic. + pub fn iter_sorted(&self) -> Iter<KeyValue<'_>> { + self.iter() + } + + /// Same as [IntoIterator::into_iter], but marks call sites which rely on the + /// iteration being lexicographic. + pub fn into_iter_sorted(self) -> OwnedAttrsIterator { + self.into_iter() + } + + /// Construct an iterator over all the keys of the attribute set + pub fn keys(&self) -> Keys { + Keys(match self.0.as_ref() { + AttrsRep::Empty => KeysInner::Empty, + AttrsRep::KV { .. } => KeysInner::KV(IterKV::default()), + + // TODO(tazjin): only sort when required, not always. + AttrsRep::Map(m) => KeysInner::Map(m.keys()), + }) + } + + /// Implement construction logic of an attribute set, to encapsulate + /// logic about attribute set optimisations inside of this module. + pub fn construct( + count: usize, + mut stack_slice: Vec<Value>, + ) -> Result<Result<Self, CatchableErrorKind>, ErrorKind> { + debug_assert!( + stack_slice.len() == count * 2, + "construct_attrs called with count == {}, but slice.len() == {}", + count, + stack_slice.len(), + ); + + // Optimisation: Empty attribute set + if count == 0 { + return Ok(Ok(AttrsRep::Empty.into())); + } + + // Optimisation: KV pattern + if count == 2 { + if let Some(kv) = attempt_optimise_kv(&mut stack_slice) { + return Ok(Ok(kv)); + } + } + + let mut attrs_map = BTreeMap::new(); + + for _ in 0..count { + let value = stack_slice.pop().unwrap(); + let key = stack_slice.pop().unwrap(); + + match key { + Value::String(ks) => set_attr(&mut attrs_map, ks, value)?, + + Value::Null => { + // This is in fact valid, but leads to the value + // being ignored and nothing being set, i.e. `{ + // ${null} = 1; } => { }`. + continue; + } + + Value::Catchable(err) => return Ok(Err(*err)), + + other => return Err(ErrorKind::InvalidAttributeName(other)), + } + } + + Ok(Ok(AttrsRep::Map(attrs_map).into())) + } + + /// Construct an optimized "KV"-style attribute set given the value for the + /// `"name"` key, and the value for the `"value"` key + pub(crate) fn from_kv(name: Value, value: Value) -> Self { + AttrsRep::KV { name, value }.into() + } +} + +impl IntoIterator for NixAttrs { + type Item = (NixString, Value); + type IntoIter = OwnedAttrsIterator; + + fn into_iter(self) -> Self::IntoIter { + match Rc::unwrap_or_clone(self.0) { + AttrsRep::Empty => OwnedAttrsIterator(IntoIterRepr::Empty), + AttrsRep::KV { name, value } => OwnedAttrsIterator(IntoIterRepr::Finite( + vec![(NAME_REF.clone(), name), (VALUE_REF.clone(), value)].into_iter(), + )), + AttrsRep::Map(map) => OwnedAttrsIterator(IntoIterRepr::Map(map.into_iter())), + } + } +} + +/// In Nix, name/value attribute pairs are frequently constructed from +/// literals. This particular case should avoid allocation of a map, +/// additional heap values etc. and use the optimised `KV` variant +/// instead. +/// +/// ```norust +/// `slice` is the top of the stack from which the attrset is being +/// constructed, e.g. +/// +/// slice: [ "value" 5 "name" "foo" ] +/// index: 0 1 2 3 +/// stack: 3 2 1 0 +/// ``` +fn attempt_optimise_kv(slice: &mut [Value]) -> Option<NixAttrs> { + let (name_idx, value_idx) = { + match (&slice[2], &slice[0]) { + (Value::String(s1), Value::String(s2)) if (*s1 == *NAME_S && *s2 == *VALUE_S) => (3, 1), + (Value::String(s1), Value::String(s2)) if (*s1 == *VALUE_S && *s2 == *NAME_S) => (1, 3), + + // Technically this branch lets type errors pass, + // but they will be caught during normal attribute + // set construction instead. + _ => return None, + } + }; + + Some(NixAttrs::from_kv( + slice[name_idx].clone(), + slice[value_idx].clone(), + )) +} + +/// Set an attribute on an in-construction attribute set, while +/// checking against duplicate keys. +fn set_attr( + map: &mut BTreeMap<NixString, Value>, + key: NixString, + value: Value, +) -> Result<(), ErrorKind> { + match map.entry(key) { + btree_map::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey { + key: entry.key().to_string(), + }), + + btree_map::Entry::Vacant(entry) => { + entry.insert(value); + Ok(()) + } + } +} + +/// Internal helper type to track the iteration status of an iterator +/// over the name/value representation. +#[derive(Debug, Default)] +pub enum IterKV { + #[default] + Name, + Value, + Done, +} + +impl IterKV { + fn next(&mut self) { + match *self { + Self::Name => *self = Self::Value, + Self::Value => *self = Self::Done, + Self::Done => {} + } + } +} + +/// Iterator representation over the keys *and* values of an attribute +/// set. +pub enum KeyValue<'a> { + Empty, + + KV { + name: &'a Value, + value: &'a Value, + at: IterKV, + }, + + Map(btree_map::Iter<'a, NixString, Value>), +} + +/// Iterator over a Nix attribute set. +// This wrapper type exists to make the inner "raw" iterator +// inaccessible. +#[repr(transparent)] +pub struct Iter<T>(T); + +impl<'a> Iterator for Iter<KeyValue<'a>> { + type Item = (&'a NixString, &'a Value); + + fn next(&mut self) -> Option<Self::Item> { + match &mut self.0 { + KeyValue::Map(inner) => inner.next(), + KeyValue::Empty => None, + + KeyValue::KV { name, value, at } => match at { + IterKV::Name => { + at.next(); + Some((&NAME_REF, name)) + } + + IterKV::Value => { + at.next(); + Some((&VALUE_REF, value)) + } + + IterKV::Done => None, + }, + } + } +} + +impl<'a> ExactSizeIterator for Iter<KeyValue<'a>> { + fn len(&self) -> usize { + match &self.0 { + KeyValue::Empty => 0, + KeyValue::KV { .. } => 2, + KeyValue::Map(inner) => inner.len(), + } + } +} + +enum KeysInner<'a> { + Empty, + KV(IterKV), + Map(btree_map::Keys<'a, NixString, Value>), +} + +pub struct Keys<'a>(KeysInner<'a>); + +impl<'a> Iterator for Keys<'a> { + type Item = &'a NixString; + + fn next(&mut self) -> Option<Self::Item> { + match &mut self.0 { + KeysInner::Empty => None, + KeysInner::KV(at @ IterKV::Name) => { + at.next(); + Some(&NAME_REF) + } + KeysInner::KV(at @ IterKV::Value) => { + at.next(); + Some(&VALUE_REF) + } + KeysInner::KV(IterKV::Done) => None, + KeysInner::Map(m) => m.next(), + } + } +} + +impl<'a> IntoIterator for &'a NixAttrs { + type Item = (&'a NixString, &'a Value); + + type IntoIter = Iter<KeyValue<'a>>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } +} + +impl<'a> ExactSizeIterator for Keys<'a> { + fn len(&self) -> usize { + match &self.0 { + KeysInner::Empty => 0, + KeysInner::KV(_) => 2, + KeysInner::Map(m) => m.len(), + } + } +} + +/// Internal representation of an owning attrset iterator +pub enum IntoIterRepr { + Empty, + Finite(std::vec::IntoIter<(NixString, Value)>), + Map(btree_map::IntoIter<NixString, Value>), +} + +/// Wrapper type which hides the internal implementation details from +/// users. +#[repr(transparent)] +pub struct OwnedAttrsIterator(IntoIterRepr); + +impl Iterator for OwnedAttrsIterator { + type Item = (NixString, Value); + + fn next(&mut self) -> Option<Self::Item> { + match &mut self.0 { + IntoIterRepr::Empty => None, + IntoIterRepr::Finite(inner) => inner.next(), + IntoIterRepr::Map(m) => m.next(), + } + } +} + +impl ExactSizeIterator for OwnedAttrsIterator { + fn len(&self) -> usize { + match &self.0 { + IntoIterRepr::Empty => 0, + IntoIterRepr::Finite(inner) => inner.len(), + IntoIterRepr::Map(inner) => inner.len(), + } + } +} + +impl DoubleEndedIterator for OwnedAttrsIterator { + fn next_back(&mut self) -> Option<Self::Item> { + match &mut self.0 { + IntoIterRepr::Empty => None, + IntoIterRepr::Finite(inner) => inner.next_back(), + IntoIterRepr::Map(inner) => inner.next_back(), + } + } +} diff --git a/tvix/eval/src/value/attrs/tests.rs b/tvix/eval/src/value/attrs/tests.rs new file mode 100644 index 000000000000..b79f45a71b28 --- /dev/null +++ b/tvix/eval/src/value/attrs/tests.rs @@ -0,0 +1,106 @@ +use bstr::B; + +use super::*; + +#[test] +fn test_empty_attrs() { + let attrs = NixAttrs::construct(0, vec![]) + .expect("empty attr construction should succeed") + .unwrap(); + + assert!( + matches!(attrs.0.as_ref(), AttrsRep::Empty), + "empty attribute set should use optimised representation" + ); +} + +#[test] +fn test_simple_attrs() { + let attrs = NixAttrs::construct(1, vec![Value::from("key"), Value::from("value")]) + .expect("simple attr construction should succeed") + .unwrap(); + + assert!( + matches!(attrs.0.as_ref(), AttrsRep::Map(_)), + "simple attribute set should use map representation", + ) +} + +#[test] +fn test_kv_attrs() { + let name_val = Value::from("name"); + let value_val = Value::from("value"); + let meaning_val = Value::from("meaning"); + let forty_two_val = Value::Integer(42); + + let kv_attrs = NixAttrs::construct( + 2, + vec![ + value_val, + forty_two_val.clone(), + name_val, + meaning_val.clone(), + ], + ) + .expect("constructing K/V pair attrs should succeed") + .unwrap(); + + match kv_attrs.0.as_ref() { + AttrsRep::KV { name, value } + if name.to_str().unwrap() == meaning_val.to_str().unwrap() + || value.to_str().unwrap() == forty_two_val.to_str().unwrap() => {} + + _ => panic!( + "K/V attribute set should use optimised representation, but got {:?}", + kv_attrs + ), + } +} + +#[test] +fn test_empty_attrs_iter() { + let attrs = NixAttrs::construct(0, vec![]).unwrap().unwrap(); + assert!(attrs.iter().next().is_none()); +} + +#[test] +fn test_kv_attrs_iter() { + let name_val = Value::from("name"); + let value_val = Value::from("value"); + let meaning_val = Value::from("meaning"); + let forty_two_val = Value::Integer(42); + + let kv_attrs = NixAttrs::construct( + 2, + vec![ + value_val, + forty_two_val.clone(), + name_val, + meaning_val.clone(), + ], + ) + .expect("constructing K/V pair attrs should succeed") + .unwrap(); + + let mut iter = kv_attrs.iter().collect::<Vec<_>>().into_iter(); + let (k, v) = iter.next().unwrap(); + assert!(k == *NAME_REF); + assert!(v.to_str().unwrap() == meaning_val.to_str().unwrap()); + let (k, v) = iter.next().unwrap(); + assert!(k == *VALUE_REF); + assert!(v.as_int().unwrap() == forty_two_val.as_int().unwrap()); + assert!(iter.next().is_none()); +} + +#[test] +fn test_map_attrs_iter() { + let attrs = NixAttrs::construct(1, vec![Value::from("key"), Value::from("value")]) + .expect("simple attr construction should succeed") + .unwrap(); + + let mut iter = attrs.iter().collect::<Vec<_>>().into_iter(); + let (k, v) = iter.next().unwrap(); + assert!(k == &NixString::from("key")); + assert_eq!(v.to_str().unwrap(), B("value")); + assert!(iter.next().is_none()); +} diff --git a/tvix/eval/src/value/builtin.rs b/tvix/eval/src/value/builtin.rs new file mode 100644 index 000000000000..346f06cb7748 --- /dev/null +++ b/tvix/eval/src/value/builtin.rs @@ -0,0 +1,137 @@ +//! This module implements the runtime representation of a Nix +//! builtin. +//! +//! Builtins are directly backed by Rust code operating on Nix values. + +use crate::vm::generators::Generator; + +use super::Value; + +use std::{ + fmt::{Debug, Display}, + rc::Rc, +}; + +/// Trait for closure types of builtins. +/// +/// Builtins are expected to yield a generator which can be run by the VM to +/// produce the final value. +/// +/// Implementors should use the builtins-macros to create these functions +/// instead of handling the argument-passing logic manually. +pub trait BuiltinGen: Fn(Vec<Value>) -> Generator {} +impl<F: Fn(Vec<Value>) -> Generator> BuiltinGen for F {} + +#[derive(Clone)] +pub struct BuiltinRepr { + name: &'static str, + /// Optional documentation for the builtin. + documentation: Option<&'static str>, + arg_count: usize, + + func: Rc<dyn BuiltinGen>, + + /// Partially applied function arguments. + partials: Vec<Value>, +} + +pub enum BuiltinResult { + /// Builtin was not ready to be called (arguments missing) and remains + /// partially applied. + Partial(Builtin), + + /// Builtin was called and constructed a generator that the VM must run. + Called(&'static str, Generator), +} + +/// Represents a single built-in function which directly executes Rust +/// code that operates on a Nix value. +/// +/// Builtins are the only functions in Nix that have varying arities +/// (for example, `hasAttr` has an arity of 2, but `isAttrs` an arity +/// of 1). To facilitate this generically, builtins expect to be +/// called with a vector of Nix values corresponding to their +/// arguments in order. +/// +/// Partially applied builtins act similar to closures in that they +/// "capture" the partially applied arguments, and are treated +/// specially when printing their representation etc. +#[derive(Clone)] +pub struct Builtin(Box<BuiltinRepr>); + +impl From<BuiltinRepr> for Builtin { + fn from(value: BuiltinRepr) -> Self { + Builtin(Box::new(value)) + } +} + +impl Builtin { + pub fn new<F: BuiltinGen + 'static>( + name: &'static str, + documentation: Option<&'static str>, + arg_count: usize, + func: F, + ) -> Self { + BuiltinRepr { + name, + documentation, + arg_count, + func: Rc::new(func), + partials: vec![], + } + .into() + } + + pub fn name(&self) -> &'static str { + self.0.name + } + + pub fn documentation(&self) -> Option<&'static str> { + self.0.documentation + } + + /// Apply an additional argument to the builtin. + /// After this, [`Builtin::call`] *must* be called, otherwise it may leave + /// the builtin in an incorrect state. + pub fn apply_arg(&mut self, arg: Value) { + self.0.partials.push(arg); + + debug_assert!( + self.0.partials.len() <= self.0.arg_count, + "Tvix bug: pushed too many arguments to builtin" + ); + } + + /// Attempt to call a builtin, which will produce a generator if it is fully + /// applied or return the builtin if it is partially applied. + pub fn call(self) -> BuiltinResult { + if self.0.partials.len() == self.0.arg_count { + BuiltinResult::Called(self.0.name, (self.0.func)(self.0.partials)) + } else { + BuiltinResult::Partial(self) + } + } +} + +impl Debug for Builtin { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "builtin[{}]", self.0.name) + } +} + +impl Display for Builtin { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + if !self.0.partials.is_empty() { + f.write_str("<PRIMOP-APP>") + } else { + f.write_str("<PRIMOP>") + } + } +} + +/// Builtins are uniquely identified by their name +impl PartialEq for Builtin { + fn eq(&self, other: &Self) -> bool { + self.0.name == other.0.name + } +} diff --git a/tvix/eval/src/value/function.rs b/tvix/eval/src/value/function.rs new file mode 100644 index 000000000000..7592e3d64164 --- /dev/null +++ b/tvix/eval/src/value/function.rs @@ -0,0 +1,112 @@ +//! This module implements the runtime representation of functions. +use std::{collections::BTreeMap, hash::Hash, rc::Rc}; + +use codemap::Span; +use smol_str::SmolStr; + +use crate::{chunk::Chunk, upvalues::Upvalues}; + +use super::NixString; + +#[derive(Clone, Debug, PartialEq)] +pub(crate) struct Formals { + /// Map from argument name, to whether that argument is required + pub(crate) arguments: BTreeMap<NixString, bool>, + + /// Do the formals of this function accept extra arguments + pub(crate) ellipsis: bool, + + /// The span of the formals themselves, to use to emit errors + pub(crate) span: Span, + + /// Optionally tracks a name for all function arguments (args@ style). + /// Used by toXML. + pub(crate) name: Option<String>, +} + +impl Formals { + /// Returns true if the given arg is a valid argument to these formals. + /// + /// This is true if it is either listed in the list of arguments, or the formals have an + /// ellipsis + pub(crate) fn contains<Q>(&self, arg: &Q) -> bool + where + Q: ?Sized + Hash + Ord + Eq, + NixString: std::borrow::Borrow<Q>, + { + self.ellipsis || self.arguments.contains_key(arg) + } +} + +/// The opcodes for a thunk or closure, plus the number of +/// non-executable opcodes which are allowed after an OpThunkClosure or +/// OpThunkSuspended referencing it. At runtime `Lambda` is usually wrapped +/// in `Rc` to avoid copying the `Chunk` it holds (which can be +/// quite large). +/// +/// In order to correctly reproduce cppnix's "pointer equality" +/// semantics it is important that we never clone a Lambda -- +/// use `Rc<Lambda>::clone()` instead. This struct deliberately +/// does not `derive(Clone)` in order to prevent this from being +/// done accidentally. +/// +#[derive(/* do not add Clone here */ Debug, Default)] +pub struct Lambda { + pub(crate) chunk: Chunk, + + /// Name of the function (equivalent to the name of the + /// identifier (e.g. a value in a let-expression or an attribute + /// set entry) it is located in). + pub(crate) name: Option<SmolStr>, + + /// Number of upvalues which the code in this Lambda closes + /// over, and which need to be initialised at + /// runtime. Information about the variables is emitted using + /// data-carrying opcodes (see [`crate::opcode::OpCode::DataStackIdx`]). + pub(crate) upvalue_count: usize, + pub(crate) formals: Option<Formals>, +} + +impl Lambda { + pub fn chunk(&mut self) -> &mut Chunk { + &mut self.chunk + } +} + +/// +/// In order to correctly reproduce cppnix's "pointer equality" +/// semantics it is important that we never clone a Lambda -- +/// use `Rc<Lambda>::clone()` instead. This struct deliberately +/// does not `derive(Clone)` in order to prevent this from being +/// done accidentally. +/// +#[derive(/* do not add Clone here */ Debug)] +pub struct Closure { + pub lambda: Rc<Lambda>, + pub upvalues: Rc<Upvalues>, +} + +impl Closure { + pub fn new(lambda: Rc<Lambda>) -> Self { + Self::new_with_upvalues( + Rc::new(Upvalues::with_capacity(lambda.upvalue_count)), + lambda, + ) + } + + pub fn new_with_upvalues(upvalues: Rc<Upvalues>, lambda: Rc<Lambda>) -> Self { + Closure { upvalues, lambda } + } + + pub fn chunk(&self) -> &Chunk { + &self.lambda.chunk + } + + pub fn lambda(&self) -> Rc<Lambda> { + self.lambda.clone() + } + + pub fn upvalues(&self) -> Rc<Upvalues> { + self.upvalues.clone() + } +} diff --git a/tvix/eval/src/value/json.rs b/tvix/eval/src/value/json.rs new file mode 100644 index 000000000000..24a6bcaf6f21 --- /dev/null +++ b/tvix/eval/src/value/json.rs @@ -0,0 +1,154 @@ +/// Implementation of Value serialisation *to* JSON. +/// +/// This can not be implemented through standard serde-derive methods, +/// as there is internal Nix logic that must happen within the +/// serialisation methods. +use super::{CoercionKind, Value}; +use crate::errors::{CatchableErrorKind, ErrorKind}; +use crate::generators::{self, GenCo}; +use crate::NixContext; + +use bstr::ByteSlice; +use serde_json::value::to_value; +use serde_json::Value as Json; // name clash with *our* `Value` +use serde_json::{Map, Number}; + +impl Value { + /// Transforms the structure into a JSON + /// and accumulate all encountered context in the second's element + /// of the return type. + pub async fn into_contextful_json( + self, + co: &GenCo, + ) -> Result<Result<(Json, NixContext), CatchableErrorKind>, ErrorKind> { + let self_forced = generators::request_force(co, self).await; + let mut context = NixContext::new(); + + let value = match self_forced { + Value::Null => Json::Null, + Value::Bool(b) => Json::Bool(b), + Value::Integer(i) => Json::Number(Number::from(i)), + Value::Float(f) => to_value(f)?, + Value::String(s) => { + context.mimic(&s); + + Json::String(s.to_str()?.to_owned()) + } + + Value::Path(p) => { + let imported = generators::request_path_import(co, *p).await; + let path = imported.to_string_lossy().to_string(); + context = context.append(crate::NixContextElement::Plain(path.clone())); + Json::String(path) + } + + Value::List(l) => { + let mut out = vec![]; + + for val in l.into_iter() { + match generators::request_to_json(co, val).await { + Ok((v, ctx)) => { + context.extend(ctx.into_iter()); + out.push(v) + } + Err(cek) => return Ok(Err(cek)), + } + } + + Json::Array(out) + } + + Value::Attrs(attrs) => { + // Attribute sets with a callable `__toString` attribute + // serialise to the string-coerced version of the result of + // calling that. + if attrs.select("__toString").is_some() { + let span = generators::request_span(co).await; + match Value::Attrs(attrs) + .coerce_to_string_( + co, + CoercionKind { + strong: false, + import_paths: false, + }, + span, + ) + .await? + { + Value::Catchable(cek) => return Ok(Err(*cek)), + Value::String(s) => { + // We need a fresh context here because `__toString` will discard + // everything. + let mut fresh = NixContext::new(); + fresh.mimic(&s); + + return Ok(Ok((Json::String(s.to_str()?.to_owned()), fresh))); + } + _ => panic!("Value::coerce_to_string_() returned a non-string!"), + } + } + + // Attribute sets with an `outPath` attribute + // serialise to a JSON serialisation of that inner + // value (regardless of what it is!). + if let Some(out_path) = attrs.select("outPath") { + return Ok(generators::request_to_json(co, out_path.clone()).await); + } + + let mut out = Map::with_capacity(attrs.len()); + for (name, value) in attrs.into_iter_sorted() { + out.insert( + name.to_str()?.to_owned(), + match generators::request_to_json(co, value).await { + Ok((v, ctx)) => { + context.extend(ctx.into_iter()); + v + } + Err(cek) => return Ok(Err(cek)), + }, + ); + } + + Json::Object(out) + } + + Value::Catchable(c) => return Ok(Err(*c)), + + val @ Value::Closure(_) + | val @ Value::Thunk(_) + | val @ Value::Builtin(_) + | val @ Value::AttrNotFound + | val @ Value::Blueprint(_) + | val @ Value::DeferredUpvalue(_) + | val @ Value::UnresolvedPath(_) + | val @ Value::Json(..) + | val @ Value::FinaliseRequest(_) => { + return Err(ErrorKind::NotSerialisableToJson(val.type_of())) + } + }; + + Ok(Ok((value, context))) + } + + /// Generator version of the above, which wraps responses in + /// [`Value::Json`]. + pub(crate) async fn into_contextful_json_generator( + self, + co: GenCo, + ) -> Result<Value, ErrorKind> { + match self.into_contextful_json(&co).await? { + Err(cek) => Ok(Value::from(cek)), + Ok((json, ctx)) => Ok(Value::Json(Box::new((json, ctx)))), + } + } + + /// Transforms the structure into a JSON + /// All the accumulated context is ignored, use [`into_contextful_json`] + /// to obtain the resulting context of the JSON object. + pub async fn into_json( + self, + co: &GenCo, + ) -> Result<Result<Json, CatchableErrorKind>, ErrorKind> { + Ok(self.into_contextful_json(co).await?.map(|(json, _)| json)) + } +} diff --git a/tvix/eval/src/value/list.rs b/tvix/eval/src/value/list.rs new file mode 100644 index 000000000000..3e4b23a93f42 --- /dev/null +++ b/tvix/eval/src/value/list.rs @@ -0,0 +1,95 @@ +//! This module implements Nix lists. +use std::ops::Index; +use std::rc::Rc; + +use serde::Deserialize; + +use super::thunk::ThunkSet; +use super::TotalDisplay; +use super::Value; + +#[repr(transparent)] +#[derive(Clone, Debug, Deserialize)] +pub struct NixList(Rc<Vec<Value>>); + +impl TotalDisplay for NixList { + fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result { + f.write_str("[ ")?; + + for v in self { + v.total_fmt(f, set)?; + f.write_str(" ")?; + } + + f.write_str("]") + } +} + +impl From<Vec<Value>> for NixList { + fn from(vs: Vec<Value>) -> Self { + Self(Rc::new(vs)) + } +} + +impl NixList { + pub fn len(&self) -> usize { + self.0.len() + } + + pub fn get(&self, i: usize) -> Option<&Value> { + self.0.get(i) + } + + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + pub fn construct(count: usize, stack_slice: Vec<Value>) -> Self { + debug_assert!( + count == stack_slice.len(), + "NixList::construct called with count == {}, but slice.len() == {}", + count, + stack_slice.len(), + ); + + NixList(Rc::new(stack_slice)) + } + + pub fn iter(&self) -> std::slice::Iter<Value> { + self.0.iter() + } + + pub fn ptr_eq(&self, other: &Self) -> bool { + Rc::ptr_eq(&self.0, &other.0) + } + + pub fn into_inner(self) -> Vec<Value> { + Rc::try_unwrap(self.0).unwrap_or_else(|rc| (*rc).clone()) + } +} + +impl IntoIterator for NixList { + type Item = Value; + type IntoIter = std::vec::IntoIter<Value>; + + fn into_iter(self) -> Self::IntoIter { + self.into_inner().into_iter() + } +} + +impl<'a> IntoIterator for &'a NixList { + type Item = &'a Value; + type IntoIter = std::slice::Iter<'a, Value>; + + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} + +impl Index<usize> for NixList { + type Output = Value; + + fn index(&self, index: usize) -> &Self::Output { + &self.0[index] + } +} diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs new file mode 100644 index 000000000000..2e78f20b49a0 --- /dev/null +++ b/tvix/eval/src/value/mod.rs @@ -0,0 +1,1086 @@ +//! This module implements the backing representation of runtime +//! values in the Nix language. +use std::cmp::Ordering; +use std::fmt::Display; +use std::num::{NonZeroI32, NonZeroUsize}; +use std::path::PathBuf; +use std::rc::Rc; + +use bstr::{BString, ByteVec}; +use codemap::Span; +use lexical_core::format::CXX_LITERAL; +use serde::Deserialize; + +#[cfg(feature = "arbitrary")] +mod arbitrary; +mod attrs; +mod builtin; +mod function; +mod json; +mod list; +mod path; +mod string; +mod thunk; + +use crate::errors::{CatchableErrorKind, ErrorKind}; +use crate::opcode::StackIdx; +use crate::vm::generators::{self, GenCo}; +use crate::AddContext; +pub use attrs::NixAttrs; +pub use builtin::{Builtin, BuiltinResult}; +pub(crate) use function::Formals; +pub use function::{Closure, Lambda}; +pub use list::NixList; +pub use path::canon_path; +pub use string::{NixContext, NixContextElement, NixString}; +pub use thunk::Thunk; + +pub use self::thunk::ThunkSet; + +use lazy_static::lazy_static; + +#[warn(variant_size_differences)] +#[derive(Clone, Debug, Deserialize)] +#[serde(untagged)] +pub enum Value { + Null, + Bool(bool), + Integer(i64), + Float(f64), + String(NixString), + + #[serde(skip)] + Path(Box<PathBuf>), + Attrs(Box<NixAttrs>), + List(NixList), + + #[serde(skip)] + Closure(Rc<Closure>), // must use Rc<Closure> here in order to get proper pointer equality + + #[serde(skip)] + Builtin(Builtin), + + // Internal values that, while they technically exist at runtime, + // are never returned to or created directly by users. + #[serde(skip_deserializing)] + Thunk(Thunk), + + // See [`compiler::compile_select_or()`] for explanation + #[serde(skip)] + AttrNotFound, + + // this can only occur in Chunk::Constants and nowhere else + #[serde(skip)] + Blueprint(Rc<Lambda>), + + #[serde(skip)] + DeferredUpvalue(StackIdx), + #[serde(skip)] + UnresolvedPath(Box<PathBuf>), + #[serde(skip)] + Json(Box<(serde_json::Value, NixContext)>), + + #[serde(skip)] + FinaliseRequest(bool), + + #[serde(skip)] + Catchable(Box<CatchableErrorKind>), +} + +impl From<CatchableErrorKind> for Value { + #[inline] + fn from(c: CatchableErrorKind) -> Value { + Value::Catchable(Box::new(c)) + } +} + +impl<V> From<Result<V, CatchableErrorKind>> for Value +where + Value: From<V>, +{ + #[inline] + fn from(v: Result<V, CatchableErrorKind>) -> Value { + match v { + Ok(v) => v.into(), + Err(e) => Value::Catchable(Box::new(e)), + } + } +} + +lazy_static! { + static ref WRITE_FLOAT_OPTIONS: lexical_core::WriteFloatOptions = + lexical_core::WriteFloatOptionsBuilder::new() + .trim_floats(true) + .round_mode(lexical_core::write_float_options::RoundMode::Round) + .positive_exponent_break(Some(NonZeroI32::new(5).unwrap())) + .max_significant_digits(Some(NonZeroUsize::new(6).unwrap())) + .build() + .unwrap(); +} + +// Helper macros to generate the to_*/as_* macros while accounting for +// thunks. + +/// Generate an `as_*` method returning a reference to the expected +/// type, or a type error. This only works for types that implement +/// `Copy`, as returning a reference to an inner thunk value is not +/// possible. + +/// Generate an `as_*/to_*` accessor method that returns either the +/// expected type, or a type error. +macro_rules! gen_cast { + ( $name:ident, $type:ty, $expected:expr, $variant:pat, $result:expr ) => { + pub fn $name(&self) -> Result<$type, ErrorKind> { + match self { + $variant => Ok($result), + Value::Thunk(thunk) => Self::$name(&thunk.value()), + other => Err(type_error($expected, &other)), + } + } + }; +} + +/// Generate an `as_*_mut/to_*_mut` accessor method that returns either the +/// expected type, or a type error. +macro_rules! gen_cast_mut { + ( $name:ident, $type:ty, $expected:expr, $variant:ident) => { + pub fn $name(&mut self) -> Result<&mut $type, ErrorKind> { + match self { + Value::$variant(x) => Ok(x), + other => Err(type_error($expected, &other)), + } + } + }; +} + +/// Generate an `is_*` type-checking method. +macro_rules! gen_is { + ( $name:ident, $variant:pat ) => { + pub fn $name(&self) -> bool { + match self { + $variant => true, + Value::Thunk(thunk) => Self::$name(&thunk.value()), + _ => false, + } + } + }; +} + +/// Describes what input types are allowed when coercing a `Value` to a string +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub struct CoercionKind { + /// If false only coerce already "stringly" types like strings and paths, but + /// also coerce sets that have a `__toString` attribute. In Tvix, this is + /// usually called a weak coercion. Equivalent to passing `false` as the + /// `coerceMore` argument of `EvalState::coerceToString` in C++ Nix. + /// + /// If true coerce all value types included by a weak coercion, but also + /// coerce `null`, booleans, integers, floats and lists of coercible types. + /// Consequently, we call this a strong coercion. Equivalent to passing + /// `true` as `coerceMore` in C++ Nix. + pub strong: bool, + + /// If `import_paths` is `true`, paths are imported into the store and their + /// store path is the result of the coercion (equivalent to the + /// `copyToStore` argument of `EvalState::coerceToString` in C++ Nix). + pub import_paths: bool, +} + +impl From<CoercionKind> for u8 { + fn from(k: CoercionKind) -> u8 { + k.strong as u8 | (k.import_paths as u8) << 1 + } +} + +impl From<u8> for CoercionKind { + fn from(byte: u8) -> Self { + CoercionKind { + strong: byte & 0x01 != 0, + import_paths: byte & 0x02 != 0, + } + } +} + +impl<T> From<T> for Value +where + T: Into<NixString>, +{ + fn from(t: T) -> Self { + Self::String(t.into()) + } +} + +/// Controls what kind of by-pointer equality comparison is allowed. +/// +/// See `//tvix/docs/value-pointer-equality.md` for details. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum PointerEquality { + /// Pointer equality not allowed at all. + ForbidAll, + + /// Pointer equality comparisons only allowed for nested values. + AllowNested, + + /// Pointer equality comparisons are allowed in all contexts. + AllowAll, +} + +impl Value { + /// Construct a [`Value::Attrs`] from a [`NixAttrs`]. + pub fn attrs(attrs: NixAttrs) -> Self { + Self::Attrs(Box::new(attrs)) + } + + /// Deeply forces a value, traversing e.g. lists and attribute sets and forcing + /// their contents, too. + /// + /// This is a generator function. + pub(super) async fn deep_force(self, co: GenCo, span: Span) -> Result<Value, ErrorKind> { + if let Some(v) = Self::deep_force_(self.clone(), co, span).await? { + Ok(v) + } else { + Ok(self) + } + } + + /// Returns Some(v) or None to indicate the returned value is myself + async fn deep_force_(myself: Value, co: GenCo, span: Span) -> Result<Option<Value>, ErrorKind> { + // This is a stack of values which still remain to be forced. + let mut vals = vec![myself]; + + let mut thunk_set: ThunkSet = Default::default(); + + loop { + let v = if let Some(v) = vals.pop() { + v + } else { + return Ok(None); + }; + + // Get rid of any top-level thunks, and bail out of self-recursive + // thunks. + let value = if let Value::Thunk(t) = &v { + if !thunk_set.insert(t) { + continue; + } + Thunk::force_(t.clone(), &co, span).await? + } else { + v + }; + + match value { + // Short-circuit on already evaluated values, or fail on internal values. + Value::Null + | Value::Bool(_) + | Value::Integer(_) + | Value::Float(_) + | Value::String(_) + | Value::Path(_) + | Value::Closure(_) + | Value::Builtin(_) => continue, + + Value::List(list) => { + for val in list.into_iter().rev() { + vals.push(val); + } + continue; + } + + Value::Attrs(attrs) => { + for (_, val) in attrs.into_iter().rev() { + vals.push(val); + } + continue; + } + + Value::Thunk(_) => panic!("Tvix bug: force_value() returned a thunk"), + + Value::Catchable(_) => return Ok(Some(value)), + + Value::AttrNotFound + | Value::Blueprint(_) + | Value::DeferredUpvalue(_) + | Value::UnresolvedPath(_) + | Value::Json(..) + | Value::FinaliseRequest(_) => panic!( + "Tvix bug: internal value left on stack: {}", + value.type_of() + ), + } + } + } + + pub async fn coerce_to_string( + self, + co: GenCo, + kind: CoercionKind, + span: Span, + ) -> Result<Value, ErrorKind> { + self.coerce_to_string_(&co, kind, span).await + } + + /// Coerce a `Value` to a string. See `CoercionKind` for a rundown of what + /// input types are accepted under what circumstances. + pub async fn coerce_to_string_( + self, + co: &GenCo, + kind: CoercionKind, + span: Span, + ) -> Result<Value, ErrorKind> { + let mut result = BString::default(); + let mut vals = vec![self]; + // Track if we are coercing the first value of a list to correctly emit + // separating white spaces. + let mut is_list_head = None; + // FIXME(raitobezarius): as per https://b.tvl.fyi/issues/364 + // we might be interested into more powerful context-related coercion kinds. + let mut context: NixContext = NixContext::new(); + + loop { + let value = if let Some(v) = vals.pop() { + v.force(co, span).await? + } else { + return Ok(Value::String(NixString::new_context_from(context, result))); + }; + let coerced: Result<BString, _> = match (value, kind) { + // coercions that are always done + (Value::String(mut s), _) => { + if let Some(ctx) = s.take_context() { + context.extend(ctx.into_iter()); + } + Ok((*s).into()) + } + + // TODO(sterni): Think about proper encoding handling here. This needs + // general consideration anyways, since one current discrepancy between + // C++ Nix and Tvix is that the former's strings are arbitrary byte + // sequences without NUL bytes, whereas Tvix only allows valid + // Unicode. See also b/189. + ( + Value::Path(p), + CoercionKind { + import_paths: true, .. + }, + ) => { + let imported = generators::request_path_import(co, *p).await; + // When we import a path from the evaluator, we must attach + // its original path as its context. + context = context.append(NixContextElement::Plain( + imported.to_string_lossy().to_string(), + )); + Ok(imported.into_os_string().into_encoded_bytes().into()) + } + ( + Value::Path(p), + CoercionKind { + import_paths: false, + .. + }, + ) => Ok(p.into_os_string().into_encoded_bytes().into()), + + // Attribute sets can be converted to strings if they either have an + // `__toString` attribute which holds a function that receives the + // set itself or an `outPath` attribute which should be a string. + // `__toString` is preferred. + (Value::Attrs(attrs), kind) => { + if let Some(to_string) = attrs.select("__toString") { + let callable = to_string.clone().force(co, span).await?; + + // Leave the attribute set on the stack as an argument + // to the function call. + generators::request_stack_push(co, Value::Attrs(attrs.clone())).await; + + // Call the callable ... + let result = generators::request_call(co, callable).await; + + // Recurse on the result, as attribute set coercion + // actually works recursively, e.g. you can even return + // /another/ set with a __toString attr. + vals.push(result); + continue; + } else if let Some(out_path) = attrs.select("outPath") { + vals.push(out_path.clone()); + continue; + } else { + return Err(ErrorKind::NotCoercibleToString { from: "set", kind }); + } + } + + // strong coercions + (Value::Null, CoercionKind { strong: true, .. }) + | (Value::Bool(false), CoercionKind { strong: true, .. }) => Ok("".into()), + (Value::Bool(true), CoercionKind { strong: true, .. }) => Ok("1".into()), + + (Value::Integer(i), CoercionKind { strong: true, .. }) => Ok(format!("{i}").into()), + (Value::Float(f), CoercionKind { strong: true, .. }) => { + // contrary to normal Display, coercing a float to a string will + // result in unconditional 6 decimal places + Ok(format!("{:.6}", f).into()) + } + + // Lists are coerced by coercing their elements and interspersing spaces + (Value::List(list), CoercionKind { strong: true, .. }) => { + for elem in list.into_iter().rev() { + vals.push(elem); + } + // In case we are coercing a list within a list we don't want + // to touch this. Since the algorithm is nonrecursive, the + // space would not have been created yet (due to continue). + if is_list_head.is_none() { + is_list_head = Some(true); + } + continue; + } + + (Value::Thunk(_), _) => panic!("Tvix bug: force returned unforced thunk"), + + val @ (Value::Closure(_), _) + | val @ (Value::Builtin(_), _) + | val @ (Value::Null, _) + | val @ (Value::Bool(_), _) + | val @ (Value::Integer(_), _) + | val @ (Value::Float(_), _) + | val @ (Value::List(_), _) => Err(ErrorKind::NotCoercibleToString { + from: val.0.type_of(), + kind, + }), + + (c @ Value::Catchable(_), _) => return Ok(c), + + (Value::AttrNotFound, _) + | (Value::Blueprint(_), _) + | (Value::DeferredUpvalue(_), _) + | (Value::UnresolvedPath(_), _) + | (Value::Json(..), _) + | (Value::FinaliseRequest(_), _) => { + panic!("tvix bug: .coerce_to_string() called on internal value") + } + }; + + if let Some(head) = is_list_head { + if !head { + result.push(b' '); + } else { + is_list_head = Some(false); + } + } + + result.push_str(&coerced?); + } + } + + pub(crate) async fn nix_eq_owned_genco( + self, + other: Value, + co: GenCo, + ptr_eq: PointerEquality, + span: Span, + ) -> Result<Value, ErrorKind> { + self.nix_eq(other, &co, ptr_eq, span).await + } + + /// Compare two Nix values for equality, forcing nested parts of the structure + /// as needed. + /// + /// This comparison needs to be invoked for nested values (e.g. in lists and + /// attribute sets) as well, which is done by suspending and asking the VM to + /// perform the nested comparison. + /// + /// The `top_level` parameter controls whether this invocation is the top-level + /// comparison, or a nested value comparison. See + /// `//tvix/docs/value-pointer-equality.md` + pub(crate) async fn nix_eq( + self, + other: Value, + co: &GenCo, + ptr_eq: PointerEquality, + span: Span, + ) -> Result<Value, ErrorKind> { + // this is a stack of ((v1,v2),peq) triples to be compared; + // after each triple is popped off of the stack, v1 is + // compared to v2 using peq-mode PointerEquality + let mut vals = vec![((self, other), ptr_eq)]; + + loop { + let ((a, b), ptr_eq) = if let Some(abp) = vals.pop() { + abp + } else { + // stack is empty, so comparison has succeeded + return Ok(Value::Bool(true)); + }; + let a = match a { + Value::Thunk(thunk) => { + // If both values are thunks, and thunk comparisons are allowed by + // pointer, do that and move on. + if ptr_eq == PointerEquality::AllowAll { + if let Value::Thunk(t1) = &b { + if t1.ptr_eq(&thunk) { + continue; + } + } + }; + + Thunk::force_(thunk, co, span).await? + } + + _ => a, + }; + + let b = b.force(co, span).await?; + + debug_assert!(!matches!(a, Value::Thunk(_))); + debug_assert!(!matches!(b, Value::Thunk(_))); + + let result = match (a, b) { + // Trivial comparisons + (c @ Value::Catchable(_), _) => return Ok(c), + (_, c @ Value::Catchable(_)) => return Ok(c), + (Value::Null, Value::Null) => true, + (Value::Bool(b1), Value::Bool(b2)) => b1 == b2, + (Value::String(s1), Value::String(s2)) => s1 == s2, + (Value::Path(p1), Value::Path(p2)) => p1 == p2, + + // Numerical comparisons (they work between float & int) + (Value::Integer(i1), Value::Integer(i2)) => i1 == i2, + (Value::Integer(i), Value::Float(f)) => i as f64 == f, + (Value::Float(f1), Value::Float(f2)) => f1 == f2, + (Value::Float(f), Value::Integer(i)) => i as f64 == f, + + // List comparisons + (Value::List(l1), Value::List(l2)) => { + if ptr_eq >= PointerEquality::AllowNested && l1.ptr_eq(&l2) { + continue; + } + + if l1.len() != l2.len() { + return Ok(Value::Bool(false)); + } + + vals.extend(l1.into_iter().rev().zip(l2.into_iter().rev()).zip( + std::iter::repeat(std::cmp::max(ptr_eq, PointerEquality::AllowNested)), + )); + continue; + } + + (_, Value::List(_)) | (Value::List(_), _) => return Ok(Value::Bool(false)), + + // Attribute set comparisons + (Value::Attrs(a1), Value::Attrs(a2)) => { + if ptr_eq >= PointerEquality::AllowNested && a1.ptr_eq(&a2) { + continue; + } + + // Special-case for derivation comparisons: If both attribute sets + // have `type = derivation`, compare them by `outPath`. + #[allow(clippy::single_match)] // might need more match arms later + match (a1.select("type"), a2.select("type")) { + (Some(v1), Some(v2)) => { + let s1 = v1.clone().force(co, span).await?; + if s1.is_catchable() { + return Ok(s1); + } + let s2 = v2.clone().force(co, span).await?; + if s2.is_catchable() { + return Ok(s2); + } + let s1 = s1.to_str(); + let s2 = s2.to_str(); + + if let (Ok(s1), Ok(s2)) = (s1, s2) { + if s1 == "derivation" && s2 == "derivation" { + // TODO(tazjin): are the outPaths really required, + // or should it fall through? + let out1 = a1 + .select_required("outPath") + .context("comparing derivations")? + .clone(); + + let out2 = a2 + .select_required("outPath") + .context("comparing derivations")? + .clone(); + + let out1 = out1.clone().force(co, span).await?; + let out2 = out2.clone().force(co, span).await?; + + if out1.is_catchable() { + return Ok(out1); + } + + if out2.is_catchable() { + return Ok(out2); + } + + let result = + out1.to_contextful_str()? == out2.to_contextful_str()?; + if !result { + return Ok(Value::Bool(false)); + } else { + continue; + } + } + } + } + _ => {} + }; + + if a1.len() != a2.len() { + return Ok(Value::Bool(false)); + } + + // note that it is important to be careful here with the + // order we push the keys and values in order to properly + // compare attrsets containing `throw` elements. + let iter1 = a1.into_iter_sorted().rev(); + let iter2 = a2.into_iter_sorted().rev(); + for ((k1, v1), (k2, v2)) in iter1.zip(iter2) { + vals.push(( + (v1, v2), + std::cmp::max(ptr_eq, PointerEquality::AllowNested), + )); + vals.push(( + (k1.into(), k2.into()), + std::cmp::max(ptr_eq, PointerEquality::AllowNested), + )); + } + continue; + } + + (Value::Attrs(_), _) | (_, Value::Attrs(_)) => return Ok(Value::Bool(false)), + + (Value::Closure(c1), Value::Closure(c2)) + if ptr_eq >= PointerEquality::AllowNested => + { + if Rc::ptr_eq(&c1, &c2) { + continue; + } else { + return Ok(Value::Bool(false)); + } + } + + // Everything else is either incomparable (e.g. internal types) or + // false. + _ => return Ok(Value::Bool(false)), + }; + if !result { + return Ok(Value::Bool(false)); + } + } + } + + pub fn type_of(&self) -> &'static str { + match self { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Integer(_) => "int", + Value::Float(_) => "float", + Value::String(_) => "string", + Value::Path(_) => "path", + Value::Attrs(_) => "set", + Value::List(_) => "list", + Value::Closure(_) | Value::Builtin(_) => "lambda", + + // Internal types. Note: These are only elaborated here + // because it makes debugging easier. If a user ever sees + // any of these strings, it's a bug. + Value::Thunk(_) => "internal[thunk]", + Value::AttrNotFound => "internal[attr_not_found]", + Value::Blueprint(_) => "internal[blueprint]", + Value::DeferredUpvalue(_) => "internal[deferred_upvalue]", + Value::UnresolvedPath(_) => "internal[unresolved_path]", + Value::Json(..) => "internal[json]", + Value::FinaliseRequest(_) => "internal[finaliser_sentinel]", + Value::Catchable(_) => "internal[catchable]", + } + } + + gen_cast!(as_bool, bool, "bool", Value::Bool(b), *b); + gen_cast!(as_int, i64, "int", Value::Integer(x), *x); + gen_cast!(as_float, f64, "float", Value::Float(x), *x); + + /// Cast the current value into a **context-less** string. + /// If you wanted to cast it into a potentially contextful string, + /// you have to explicitly use `to_contextful_str`. + /// Contextful strings are special, they should not be obtained + /// everytime you want a string. + pub fn to_str(&self) -> Result<NixString, ErrorKind> { + match self { + Value::String(s) if !s.has_context() => Ok((*s).clone()), + Value::Thunk(thunk) => Self::to_str(&thunk.value()), + other => Err(type_error("contextless strings", other)), + } + } + + gen_cast!( + to_contextful_str, + NixString, + "contextful string", + Value::String(s), + (*s).clone() + ); + gen_cast!(to_path, Box<PathBuf>, "path", Value::Path(p), p.clone()); + gen_cast!(to_attrs, Box<NixAttrs>, "set", Value::Attrs(a), a.clone()); + gen_cast!(to_list, NixList, "list", Value::List(l), l.clone()); + gen_cast!( + as_closure, + Rc<Closure>, + "lambda", + Value::Closure(c), + c.clone() + ); + + gen_cast_mut!(as_list_mut, NixList, "list", List); + + gen_is!(is_path, Value::Path(_)); + gen_is!(is_number, Value::Integer(_) | Value::Float(_)); + gen_is!(is_bool, Value::Bool(_)); + gen_is!(is_attrs, Value::Attrs(_)); + gen_is!(is_catchable, Value::Catchable(_)); + + /// Returns `true` if the value is a [`Thunk`]. + /// + /// [`Thunk`]: Value::Thunk + pub fn is_thunk(&self) -> bool { + matches!(self, Self::Thunk(..)) + } + + /// Compare `self` against other using (fallible) Nix ordering semantics. + /// + /// The function is intended to be used from within other generator + /// functions or `gen!` blocks. + pub async fn nix_cmp_ordering( + self, + other: Self, + co: GenCo, + span: Span, + ) -> Result<Result<Ordering, CatchableErrorKind>, ErrorKind> { + Self::nix_cmp_ordering_(self, other, co, span).await + } + + async fn nix_cmp_ordering_( + myself: Self, + other: Self, + co: GenCo, + span: Span, + ) -> Result<Result<Ordering, CatchableErrorKind>, ErrorKind> { + // this is a stack of ((v1,v2),peq) triples to be compared; + // after each triple is popped off of the stack, v1 is + // compared to v2 using peq-mode PointerEquality + let mut vals = vec![((myself, other), PointerEquality::ForbidAll)]; + + loop { + let ((mut a, mut b), ptr_eq) = if let Some(abp) = vals.pop() { + abp + } else { + // stack is empty, so they are equal + return Ok(Ok(Ordering::Equal)); + }; + if ptr_eq == PointerEquality::AllowAll { + if a.clone() + .nix_eq(b.clone(), &co, PointerEquality::AllowAll, span) + .await? + .as_bool()? + { + continue; + } + a = a.force(&co, span).await?; + b = b.force(&co, span).await?; + } + let result = match (a, b) { + (Value::Catchable(c), _) => return Ok(Err(*c)), + (_, Value::Catchable(c)) => return Ok(Err(*c)), + // same types + (Value::Integer(i1), Value::Integer(i2)) => i1.cmp(&i2), + (Value::Float(f1), Value::Float(f2)) => f1.total_cmp(&f2), + (Value::String(s1), Value::String(s2)) => s1.cmp(&s2), + (Value::List(l1), Value::List(l2)) => { + let max = l1.len().max(l2.len()); + for j in 0..max { + let i = max - 1 - j; + if i >= l2.len() { + vals.push(((1.into(), 0.into()), PointerEquality::ForbidAll)); + } else if i >= l1.len() { + vals.push(((0.into(), 1.into()), PointerEquality::ForbidAll)); + } else { + vals.push(((l1[i].clone(), l2[i].clone()), PointerEquality::AllowAll)); + } + } + continue; + } + + // different types + (Value::Integer(i1), Value::Float(f2)) => (i1 as f64).total_cmp(&f2), + (Value::Float(f1), Value::Integer(i2)) => f1.total_cmp(&(i2 as f64)), + + // unsupported types + (lhs, rhs) => { + return Err(ErrorKind::Incomparable { + lhs: lhs.type_of(), + rhs: rhs.type_of(), + }) + } + }; + if result != Ordering::Equal { + return Ok(Ok(result)); + } + } + } + + // TODO(amjoseph): de-asyncify this (when called directly by the VM) + pub async fn force(self, co: &GenCo, span: Span) -> Result<Value, ErrorKind> { + if let Value::Thunk(thunk) = self { + // TODO(amjoseph): use #[tailcall::mutual] + return Thunk::force_(thunk, co, span).await; + } + Ok(self) + } + + // need two flavors, because async + pub async fn force_owned_genco(self, co: GenCo, span: Span) -> Result<Value, ErrorKind> { + if let Value::Thunk(thunk) = self { + // TODO(amjoseph): use #[tailcall::mutual] + return Thunk::force_(thunk, &co, span).await; + } + Ok(self) + } + + /// Explain a value in a human-readable way, e.g. by presenting + /// the docstrings of functions if present. + pub fn explain(&self) -> String { + match self { + Value::Null => "the 'null' value".into(), + Value::Bool(b) => format!("the boolean value '{}'", b), + Value::Integer(i) => format!("the integer '{}'", i), + Value::Float(f) => format!("the float '{}'", f), + Value::String(s) if s.has_context() => format!("the contextful string '{}'", s), + Value::String(s) => format!("the contextless string '{}'", s), + Value::Path(p) => format!("the path '{}'", p.to_string_lossy()), + Value::Attrs(attrs) => format!("a {}-item attribute set", attrs.len()), + Value::List(list) => format!("a {}-item list", list.len()), + + Value::Closure(f) => { + if let Some(name) = &f.lambda.name { + format!("the user-defined Nix function '{}'", name) + } else { + "a user-defined Nix function".to_string() + } + } + + Value::Builtin(b) => { + let mut out = format!("the builtin function '{}'", b.name()); + if let Some(docs) = b.documentation() { + out.push_str("\n\n"); + out.push_str(docs); + } + out + } + + // TODO: handle suspended thunks with a different explanation instead of panicking + Value::Thunk(t) => t.value().explain(), + + Value::Catchable(_) => "a catchable failure".into(), + + Value::AttrNotFound + | Value::Blueprint(_) + | Value::DeferredUpvalue(_) + | Value::UnresolvedPath(_) + | Value::Json(..) + | Value::FinaliseRequest(_) => "an internal Tvix evaluator value".into(), + } + } + + /// Constructs a thunk that will be evaluated lazily at runtime. This lets + /// users of Tvix implement their own lazy builtins and so on. + pub fn suspended_native_thunk(native: Box<dyn Fn() -> Result<Value, ErrorKind>>) -> Self { + Value::Thunk(Thunk::new_suspended_native(native)) + } +} + +trait TotalDisplay { + fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result; +} + +impl Display for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.total_fmt(f, &mut Default::default()) + } +} + +/// Emulates the C++-Nix style formatting of floats, which diverges +/// significantly from Rust's native float formatting. +fn total_fmt_float<F: std::fmt::Write>(num: f64, mut f: F) -> std::fmt::Result { + let mut buf = [b'0'; lexical_core::BUFFER_SIZE]; + let mut s = lexical_core::write_with_options::<f64, { CXX_LITERAL }>( + num, + &mut buf, + &WRITE_FLOAT_OPTIONS, + ); + + // apply some postprocessing on the buffer. If scientific + // notation is used (we see an `e`), and the next character is + // a digit, add the missing `+` sign.) + let mut new_s = Vec::with_capacity(s.len()); + + if s.contains(&b'e') { + for (i, c) in s.iter().enumerate() { + // encountered `e` + if c == &b'e' { + // next character is a digit (so no negative exponent) + if s.len() > i && s[i + 1].is_ascii_digit() { + // copy everything from the start up to (including) the e + new_s.extend_from_slice(&s[0..=i]); + // add the missing '+' + new_s.push(b'+'); + // check for the remaining characters. + // If it's only one, we need to prepend a trailing zero + if s.len() == i + 2 { + new_s.push(b'0'); + } + new_s.extend_from_slice(&s[i + 1..]); + break; + } + } + } + + // if we modified the scientific notation, flip the reference + if !new_s.is_empty() { + s = &mut new_s + } + } else if s.contains(&b'.') { + // else, if this is not scientific notation, and there's a + // decimal point, make sure we really drop trailing zeroes. + // In some cases, lexical_core doesn't. + for (i, c) in s.iter().enumerate() { + // at `.`` + if c == &b'.' { + // trim zeroes from the right side. + let frac = String::from_utf8_lossy(&s[i + 1..]); + let frac_no_trailing_zeroes = frac.trim_end_matches('0'); + + if frac.len() != frac_no_trailing_zeroes.len() { + // we managed to strip something, construct new_s + if frac_no_trailing_zeroes.is_empty() { + // if frac_no_trailing_zeroes is empty, the fractional part was all zeroes, so we can drop the decimal point as well + new_s.extend_from_slice(&s[0..=i - 1]); + } else { + // else, assemble the rest of the string + new_s.extend_from_slice(&s[0..=i]); + new_s.extend_from_slice(frac_no_trailing_zeroes.as_bytes()); + } + + // flip the reference + s = &mut new_s; + break; + } + } + } + } + + write!(f, "{}", String::from_utf8_lossy(s)) +} + +impl TotalDisplay for Value { + fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result { + match self { + Value::Null => f.write_str("null"), + Value::Bool(true) => f.write_str("true"), + Value::Bool(false) => f.write_str("false"), + Value::Integer(num) => write!(f, "{}", num), + Value::String(s) => s.fmt(f), + Value::Path(p) => p.display().fmt(f), + Value::Attrs(attrs) => attrs.total_fmt(f, set), + Value::List(list) => list.total_fmt(f, set), + // TODO: fancy REPL display with position + Value::Closure(_) => f.write_str("<LAMBDA>"), + Value::Builtin(builtin) => builtin.fmt(f), + + // Nix prints floats with a maximum precision of 5 digits + // only. Except when it decides to use scientific notation + // (with a + after the `e`, and zero-padded to 0 digits) + Value::Float(num) => total_fmt_float(*num, f), + + // internal types + Value::AttrNotFound => f.write_str("internal[not found]"), + Value::Blueprint(_) => f.write_str("internal[blueprint]"), + Value::DeferredUpvalue(_) => f.write_str("internal[deferred_upvalue]"), + Value::UnresolvedPath(_) => f.write_str("internal[unresolved_path]"), + Value::Json(..) => f.write_str("internal[json]"), + Value::FinaliseRequest(_) => f.write_str("internal[finaliser_sentinel]"), + + // Delegate thunk display to the type, as it must handle + // the case of already evaluated or cyclic thunks. + Value::Thunk(t) => t.total_fmt(f, set), + Value::Catchable(_) => panic!("total_fmt() called on a CatchableErrorKind"), + } + } +} + +impl From<bool> for Value { + fn from(b: bool) -> Self { + Value::Bool(b) + } +} + +impl From<i64> for Value { + fn from(i: i64) -> Self { + Self::Integer(i) + } +} + +impl From<f64> for Value { + fn from(i: f64) -> Self { + Self::Float(i) + } +} + +impl From<PathBuf> for Value { + fn from(path: PathBuf) -> Self { + Self::Path(Box::new(path)) + } +} + +fn type_error(expected: &'static str, actual: &Value) -> ErrorKind { + ErrorKind::TypeError { + expected, + actual: actual.type_of(), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::mem::size_of; + + #[test] + fn size() { + assert_eq!(size_of::<Value>(), 16); + } + + mod floats { + use crate::value::total_fmt_float; + + #[test] + fn format_float() { + let ff = [ + (0f64, "0"), + (1.0f64, "1"), + (-0.01, "-0.01"), + (5e+22, "5e+22"), + (1e6, "1e+06"), + (-2E-2, "-0.02"), + (6.626e-34, "6.626e-34"), + (9_224_617.445_991_227, "9.22462e+06"), + ]; + for (n, expected) in ff.iter() { + let mut buf = String::new(); + let res = total_fmt_float(*n, &mut buf); + assert!(res.is_ok()); + assert_eq!( + expected, &buf, + "{} should be formatted as {}, but got {}", + n, expected, &buf + ); + } + } + } +} diff --git a/tvix/eval/src/value/path.rs b/tvix/eval/src/value/path.rs new file mode 100644 index 000000000000..ad526a8746f8 --- /dev/null +++ b/tvix/eval/src/value/path.rs @@ -0,0 +1,14 @@ +use path_clean::PathClean; +use std::path::PathBuf; + +/// This function should match the behavior of canonPath() in +/// src/libutil/util.cc of cppnix. Currently it does not match that +/// behavior; it uses the `path_clean` library which is based on the +/// Go standard library +/// +/// TODO: make this match the behavior of cppnix +/// TODO: write tests for this + +pub fn canon_path(path: PathBuf) -> PathBuf { + path.clean() +} diff --git a/tvix/eval/src/value/string/context.rs b/tvix/eval/src/value/string/context.rs new file mode 100644 index 000000000000..e1c04735ddde --- /dev/null +++ b/tvix/eval/src/value/string/context.rs @@ -0,0 +1,161 @@ +use rustc_hash::FxHashSet; +use serde::Serialize; + +use super::NixString; + +#[derive(Clone, Debug, Serialize, Hash, PartialEq, Eq)] +pub enum NixContextElement { + /// A plain store path (e.g. source files copied to the store) + Plain(String), + + /// Single output of a derivation, represented by its name and its derivation path. + Single { name: String, derivation: String }, + + /// A reference to a complete derivation + /// including its source and its binary closure. + /// It is used for the `drvPath` attribute context. + /// The referred string is the store path to + /// the derivation path. + Derivation(String), +} + +/// Nix context strings representation in Tvix. This tracks a set of different kinds of string +/// dependencies that we can come across during manipulation of our language primitives, mostly +/// strings. There's some simple algebra of context strings and how they propagate w.r.t. primitive +/// operations, e.g. concatenation, interpolation and other string operations. +#[repr(transparent)] +#[derive(Clone, Debug, Serialize, Default)] +pub struct NixContext(FxHashSet<NixContextElement>); + +impl From<NixContextElement> for NixContext { + fn from(value: NixContextElement) -> Self { + let mut set = FxHashSet::default(); + set.insert(value); + Self(set) + } +} + +impl From<FxHashSet<NixContextElement>> for NixContext { + fn from(value: FxHashSet<NixContextElement>) -> Self { + Self(value) + } +} + +impl<const N: usize> From<[NixContextElement; N]> for NixContext { + fn from(value: [NixContextElement; N]) -> Self { + let mut set = FxHashSet::default(); + for elt in value { + set.insert(elt); + } + Self(set) + } +} + +impl NixContext { + /// Creates an empty context that can be populated + /// and passed to form a contextful [NixString], albeit + /// if the context is concretly empty, the resulting [NixString] + /// will be contextless. + pub fn new() -> Self { + Self::default() + } + + /// For internal consumers, we let people observe + /// if the [NixContext] is actually empty or not + /// to decide whether they want to skip the allocation + /// of a full blown [HashSet]. + pub(crate) fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Consumes a new [NixContextElement] and add it if not already + /// present in this context. + pub fn append(mut self, other: NixContextElement) -> Self { + self.0.insert(other); + self + } + + /// Extends the existing context with more context elements. + pub fn extend<T>(&mut self, iter: T) + where + T: IntoIterator<Item = NixContextElement>, + { + self.0.extend(iter) + } + + /// Copies from another [NixString] its context strings + /// in this context. + pub fn mimic(&mut self, other: &NixString) { + if let Some(context) = other.context() { + self.extend(context.iter().cloned()); + } + } + + /// Iterates over "plain" context elements, e.g. sources imported + /// in the store without more information, i.e. `toFile` or coerced imported paths. + /// It yields paths to the store. + pub fn iter_plain(&self) -> impl Iterator<Item = &str> { + self.iter().filter_map(|elt| { + if let NixContextElement::Plain(s) = elt { + Some(s.as_str()) + } else { + None + } + }) + } + + /// Iterates over "full derivations" context elements, e.g. something + /// referring to their `drvPath`, i.e. their full sources and binary closure. + /// It yields derivation paths. + pub fn iter_derivation(&self) -> impl Iterator<Item = &str> { + self.iter().filter_map(|elt| { + if let NixContextElement::Derivation(s) = elt { + Some(s.as_str()) + } else { + None + } + }) + } + + /// Iterates over "single" context elements, e.g. single derived paths, + /// or also known as the single output of a given derivation. + /// The first element of the tuple is the output name + /// and the second element is the derivation path. + pub fn iter_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { + self.iter().filter_map(|elt| { + if let NixContextElement::Single { name, derivation } = elt { + Some((name.as_str(), derivation.as_str())) + } else { + None + } + }) + } + + /// Iterates over any element of the context. + pub fn iter(&self) -> impl Iterator<Item = &NixContextElement> { + self.0.iter() + } + + /// Produces a list of owned references to this current context, + /// no matter its type. + pub fn to_owned_references(self) -> Vec<String> { + self.0 + .into_iter() + .map(|ctx| match ctx { + NixContextElement::Derivation(drv_path) => drv_path, + NixContextElement::Plain(store_path) => store_path, + NixContextElement::Single { derivation, .. } => derivation, + }) + .collect() + } +} + +impl IntoIterator for NixContext { + type Item = NixContextElement; + + type IntoIter = std::collections::hash_set::IntoIter<NixContextElement>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} diff --git a/tvix/eval/src/value/string/mod.rs b/tvix/eval/src/value/string/mod.rs new file mode 100644 index 000000000000..5bcb4786b283 --- /dev/null +++ b/tvix/eval/src/value/string/mod.rs @@ -0,0 +1,879 @@ +//! This module implements Nix language strings. +//! +//! See [`NixString`] for more information about the internals of string values + +use bstr::{BStr, BString, ByteSlice, Chars}; +use nohash_hasher::BuildNoHashHasher; +use rnix::ast; +#[cfg(feature = "no_leak")] +use rustc_hash::FxHashSet; +use rustc_hash::FxHasher; +use std::alloc::dealloc; +use std::alloc::{alloc, handle_alloc_error, Layout}; +use std::borrow::{Borrow, Cow}; +use std::cell::RefCell; +use std::ffi::c_void; +use std::fmt::{self, Debug, Display}; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use std::ptr::{self, NonNull}; +use std::slice; + +use serde::de::{Deserializer, Visitor}; +use serde::Deserialize; + +mod context; + +pub use context::{NixContext, NixContextElement}; + +/// This type is never instantiated, but serves to document the memory layout of the actual heap +/// allocation for Nix strings. +#[allow(dead_code)] +struct NixStringInner { + /// The string context, if any. Note that this is boxed to take advantage of the null pointer + /// niche, otherwise this field ends up being very large: + /// + /// ```notrust + /// >> std::mem::size_of::<Option<HashSet<String>>>() + /// 48 + /// + /// >> std::mem::size_of::<Option<Box<HashSet<String>>>>() + /// 8 + /// ``` + context: Option<Box<NixContext>>, + /// The length of the data, stored *inline in the allocation* + length: usize, + /// The actual data for the string itself. Will always be `length` bytes long + data: [u8], +} + +#[allow(clippy::zst_offset)] +impl NixStringInner { + /// Construct a [`Layout`] for a nix string allocation with the given length. + /// + /// Returns a tuple of: + /// 1. The layout itself. + /// 2. The offset of [`Self::length`] within the allocation, assuming the allocation starts at 0 + /// 3. The offset of the data array within the allocation, assuming the allocation starts at 0 + fn layout(len: usize) -> (Layout, usize, usize) { + let layout = Layout::new::<Option<Box<NixContext>>>(); + let (layout, len_offset) = layout.extend(Layout::new::<usize>()).unwrap(); + let (layout, data_offset) = layout.extend(Layout::array::<u8>(len).unwrap()).unwrap(); + (layout, len_offset, data_offset) + } + + /// Returns the [`Layout`] for an *already-allocated* nix string, loading the length from the + /// pointer. + /// + /// Returns a tuple of: + /// 1. The layout itself. + /// 2. The offset of [`Self::length`] within the allocation, assuming the allocation starts at 0 + /// 3. The offset of the data array within the allocation, assuming the allocation starts at 0 + /// + /// # Safety + /// + /// This function must only be called on a pointer that has been properly initialized with + /// [`Self::alloc`]. The data buffer may not necessarily be initialized + unsafe fn layout_of(this: NonNull<c_void>) -> (Layout, usize, usize) { + let layout = Layout::new::<Option<Box<NixContext>>>(); + let (_, len_offset) = layout.extend(Layout::new::<usize>()).unwrap(); + // SAFETY: Layouts are linear, so even though we haven't involved data at all yet, we know + // the len_offset is a valid offset into the second field of the allocation + let len = *(this.as_ptr().add(len_offset) as *const usize); + Self::layout(len) + } + + /// Allocate an *uninitialized* nix string with the given length. Writes the length to the + /// length value in the pointer, but leaves both context and data uninitialized + /// + /// This function is safe to call (as constructing pointers of any sort of validity is always + /// safe in Rust) but it is unsafe to use the resulting pointer to do anything other than + /// + /// 1. Read the length + /// 2. Write the context + /// 3. Write the data + /// + /// until the string is fully initialized + fn alloc(len: usize) -> NonNull<c_void> { + let (layout, len_offset, _data_offset) = Self::layout(len); + debug_assert_ne!(layout.size(), 0); + unsafe { + // SAFETY: Layout has non-zero size, since the layout of the context and the + // layout of the len both have non-zero size + let ptr = alloc(layout); + + if let Some(this) = NonNull::new(ptr as *mut _) { + // SAFETY: We've allocated with a layout that causes the len_offset to be in-bounds + // and writeable, and if the allocation succeeded it won't wrap + ((this.as_ptr() as *mut u8).add(len_offset) as *mut usize).write(len); + debug_assert_eq!(Self::len(this), len); + this + } else { + handle_alloc_error(layout); + } + } + } + + /// Deallocate the Nix string at the given pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn dealloc(this: NonNull<c_void>) { + let (layout, _, _) = Self::layout_of(this); + // SAFETY: okay because of the safety guarantees of this method + dealloc(this.as_ptr() as *mut u8, layout) + } + + /// Return the length of the Nix string at the given pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn len(this: NonNull<c_void>) -> usize { + let (_, len_offset, _) = Self::layout_of(this); + // SAFETY: As long as the safety guarantees of this method are upheld, we've allocated with + // a layout that causes the len_offset to be in-bounds and writeable, and if the allocation + // succeeded it won't wrap + *(this.as_ptr().add(len_offset) as *const usize) + } + + /// Return a pointer to the context value within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn context_ptr(this: NonNull<c_void>) -> *mut Option<Box<NixContext>> { + // SAFETY: The context is the first field in the layout of the allocation + this.as_ptr() as *mut Option<Box<NixContext>> + } + + /// Construct a shared reference to the context value within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the context has been properly initialized (by writing to the + /// pointer returned from [`Self::context_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`NonNull::as_ref`] for more. + unsafe fn context_ref<'a>(this: NonNull<c_void>) -> &'a Option<Box<NixContext>> { + Self::context_ptr(this).as_ref().unwrap() + } + + /// Construct a mutable reference to the context value within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the context has been properly initialized (by writing to the + /// pointer returned from [`Self::context_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`NonNull::as_mut`] for more. + unsafe fn context_mut<'a>(this: NonNull<c_void>) -> &'a mut Option<Box<NixContext>> { + Self::context_ptr(this).as_mut().unwrap() + } + + /// Return a pointer to the data array within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn data_ptr(this: NonNull<c_void>) -> *mut u8 { + let (_, _, data_offset) = Self::layout_of(this); + // SAFETY: data is the third field in the layout of the allocation + this.as_ptr().add(data_offset) as *mut u8 + } + + /// Construct a shared reference to the data slice within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the data array has been properly initialized (by writing to the + /// pointer returned from [`Self::data_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`slice::from_raw_parts`] for more. + unsafe fn data_slice<'a>(this: NonNull<c_void>) -> &'a [u8] { + let len = Self::len(this); + let data = Self::data_ptr(this); + slice::from_raw_parts(data, len) + } + + /// Construct a mutable reference to the data slice within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the data array has been properly initialized (by writing to the + /// pointer returned from [`Self::data_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`slice::from_raw_parts_mut`] for more. + #[allow(dead_code)] + unsafe fn data_slice_mut<'a>(this: NonNull<c_void>) -> &'a mut [u8] { + let len = Self::len(this); + let data = Self::data_ptr(this); + slice::from_raw_parts_mut(data, len) + } + + /// Clone the Nix string pointed to by this pointer, and return a pointer to a new Nix string + /// containing the same data and context. + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the context has been properly initialized (by writing to the + /// pointer returned from [`Self::context_ptr`]), and the data array has been properly + /// initialized (by writing to the pointer returned from [`Self::data_ptr`]). + unsafe fn clone(this: NonNull<c_void>) -> NonNull<c_void> { + let (layout, _, _) = Self::layout_of(this); + let ptr = alloc(layout); + if let Some(new) = NonNull::new(ptr as *mut _) { + ptr::copy_nonoverlapping(this.as_ptr(), new.as_ptr(), layout.size()); + Self::context_ptr(new).write(Self::context_ref(this).clone()); + new + } else { + handle_alloc_error(layout); + } + } +} + +#[derive(Default)] +struct InternerInner { + #[allow(clippy::disallowed_types)] // Not using the default hasher + map: std::collections::HashMap<u64, NonNull<c_void>, BuildNoHashHasher<u64>>, + #[cfg(feature = "no_leak")] + #[allow(clippy::disallowed_types)] // Not using the default hasher + interned_strings: FxHashSet<NonNull<c_void>>, +} + +unsafe impl Send for InternerInner {} + +fn hash<T>(s: T) -> u64 +where + T: Hash, +{ + let mut hasher = FxHasher::default(); + s.hash(&mut hasher); + hasher.finish() +} + +impl InternerInner { + pub fn intern(&mut self, s: &[u8]) -> NixString { + let hash = hash(s); + if let Some(s) = self.map.get(&hash) { + return NixString(*s); + } + + let string = NixString::new_inner(s, None); + self.map.insert(hash, string.0); + #[cfg(feature = "no_leak")] + self.interned_strings.insert(string.0); + string + } +} + +#[derive(Default)] +struct Interner(RefCell<InternerInner>); + +impl Interner { + pub fn intern(&self, s: &[u8]) -> NixString { + self.0.borrow_mut().intern(s) + } + + #[cfg(feature = "no_leak")] + pub fn is_interned_string(&self, string: &NixString) -> bool { + self.0.borrow().interned_strings.contains(&string.0) + } +} + +thread_local! { + static INTERNER: Interner = Interner::default(); +} + +/// Nix string values +/// +/// # Internals +/// +/// For performance reasons (to keep allocations small, and to avoid indirections), [`NixString`] is +/// represented as a single *thin* pointer to a packed data structure containing the +/// [context][NixContext] and the string data itself (which is a raw byte array, to match the Nix +/// string semantics that allow any array of bytes to be represented by a string). + +/// This memory representation is documented in [`NixStringInner`], but since Rust prefers to deal +/// with slices via *fat pointers* (pointers that include the length in the *pointer*, not in the +/// heap allocation), we have to do mostly manual layout management and allocation for this +/// representation. See the documentation for the methods of [`NixStringInner`] for more information +pub struct NixString(NonNull<c_void>); + +unsafe impl Send for NixString {} +unsafe impl Sync for NixString {} + +impl Drop for NixString { + #[cfg(not(feature = "no_leak"))] + fn drop(&mut self) { + if self.context().is_some() { + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of dealloc + unsafe { + NixStringInner::dealloc(self.0); + } + } + } + + #[cfg(feature = "no_leak")] + fn drop(&mut self) { + if INTERNER.with(|i| i.is_interned_string(self)) { + return; + } + + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of dealloc + unsafe { + NixStringInner::dealloc(self.0); + } + } +} + +impl Clone for NixString { + fn clone(&self) -> Self { + if cfg!(feature = "no_leak") || self.context().is_some() { + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of clone + unsafe { Self(NixStringInner::clone(self.0)) } + } else { + // SAFETY: + // + // - NixStrings are never mutated + // - NixStrings are never freed + Self(self.0) + } + } +} + +impl Debug for NixString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(ctx) = self.context() { + f.debug_struct("NixString") + .field("context", ctx) + .field("data", &self.as_bstr()) + .finish() + } else { + write!(f, "{:?}", self.as_bstr()) + } + } +} + +impl PartialEq for NixString { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 || self.as_bstr() == other.as_bstr() + } +} + +impl Eq for NixString {} + +impl PartialEq<&[u8]> for NixString { + fn eq(&self, other: &&[u8]) -> bool { + **self == **other + } +} + +impl PartialEq<&str> for NixString { + fn eq(&self, other: &&str) -> bool { + **self == other.as_bytes() + } +} + +impl PartialOrd for NixString { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for NixString { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + if self.0 == other.0 { + return std::cmp::Ordering::Equal; + } + self.as_bstr().cmp(other.as_bstr()) + } +} + +impl From<Box<BStr>> for NixString { + fn from(value: Box<BStr>) -> Self { + Self::new(&value, None) + } +} + +impl From<BString> for NixString { + fn from(value: BString) -> Self { + Self::new(&value, None) + } +} + +impl From<&BStr> for NixString { + fn from(value: &BStr) -> Self { + value.to_owned().into() + } +} + +impl From<&[u8]> for NixString { + fn from(value: &[u8]) -> Self { + Self::from(value.to_owned()) + } +} + +impl From<Vec<u8>> for NixString { + fn from(value: Vec<u8>) -> Self { + value.into_boxed_slice().into() + } +} + +impl From<Box<[u8]>> for NixString { + fn from(value: Box<[u8]>) -> Self { + Self::new(&value, None) + } +} + +impl From<&str> for NixString { + fn from(s: &str) -> Self { + s.as_bytes().into() + } +} + +impl From<String> for NixString { + fn from(s: String) -> Self { + s.into_bytes().into() + } +} + +impl<T> From<(T, Option<Box<NixContext>>)> for NixString +where + NixString: From<T>, +{ + fn from((s, ctx): (T, Option<Box<NixContext>>)) -> Self { + Self::new(NixString::from(s).as_ref(), ctx) + } +} + +impl From<Box<str>> for NixString { + fn from(s: Box<str>) -> Self { + s.into_boxed_bytes().into() + } +} + +impl From<ast::Ident> for NixString { + fn from(ident: ast::Ident) -> Self { + ident.ident_token().unwrap().text().into() + } +} + +impl<'a> From<&'a NixString> for &'a BStr { + fn from(s: &'a NixString) -> Self { + s.as_bstr() + } +} + +// No impl From<NixString> for String, that one quotes. + +impl From<NixString> for BString { + fn from(s: NixString) -> Self { + s.as_bstr().to_owned() + } +} + +impl AsRef<[u8]> for NixString { + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl Borrow<BStr> for NixString { + fn borrow(&self) -> &BStr { + self.as_bstr() + } +} + +impl Hash for NixString { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.as_bstr().hash(state) + } +} + +impl<'de> Deserialize<'de> for NixString { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct StringVisitor; + + impl<'de> Visitor<'de> for StringVisitor { + type Value = NixString; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a valid Nix string") + } + + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(v.into()) + } + + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(v.into()) + } + } + + deserializer.deserialize_string(StringVisitor) + } +} + +impl Deref for NixString { + type Target = BStr; + + fn deref(&self) -> &Self::Target { + self.as_bstr() + } +} + +#[cfg(feature = "arbitrary")] +mod arbitrary { + use super::*; + use proptest::prelude::{any_with, Arbitrary}; + use proptest::strategy::{BoxedStrategy, Strategy}; + + impl Arbitrary for NixString { + type Parameters = <String as Arbitrary>::Parameters; + + type Strategy = BoxedStrategy<Self>; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + any_with::<String>(args).prop_map(Self::from).boxed() + } + } +} + +/// Set non-scientifically. TODO(aspen): think more about what this should be +const INTERN_THRESHOLD: usize = 32; + +impl NixString { + fn new(contents: &[u8], context: Option<Box<NixContext>>) -> Self { + debug_assert!( + !context.as_deref().is_some_and(NixContext::is_empty), + "BUG: initialized with empty context" + ); + + if !cfg!(feature = "no_leak") /* It's only safe to intern if we leak strings, since there's + * nothing yet preventing interned strings from getting freed + * (and then used by other copies) otherwise + */ + && contents.len() <= INTERN_THRESHOLD + && context.is_none() + { + return INTERNER.with(|i| i.intern(contents)); + } + + Self::new_inner(contents, context) + } + + fn new_inner(contents: &[u8], context: Option<Box<NixContext>>) -> Self { + // SAFETY: We're always fully initializing a NixString here: + // + // 1. NixStringInner::alloc sets up the len for us + // 2. We set the context, using ptr::write to make sure that the uninitialized memory isn't + // read or dropped + // 3. We set the data, using copy_from_nonoverlapping to make sure that the uninitialized + // memory isn't read or dropped + // + // Only *then* can we construct a NixString + unsafe { + let inner = NixStringInner::alloc(contents.len()); + NixStringInner::context_ptr(inner).write(context); + NixStringInner::data_ptr(inner) + .copy_from_nonoverlapping(contents.as_ptr(), contents.len()); + Self(inner) + } + } + + pub fn new_inherit_context_from<T>(other: &NixString, new_contents: T) -> Self + where + NixString: From<T>, + { + Self::new( + Self::from(new_contents).as_ref(), + other.context().map(|c| Box::new(c.clone())), + ) + } + + pub fn new_context_from<T>(context: NixContext, contents: T) -> Self + where + NixString: From<T>, + { + Self::new( + Self::from(contents).as_ref(), + if context.is_empty() { + None + } else { + Some(Box::new(context)) + }, + ) + } + + pub fn as_bstr(&self) -> &BStr { + BStr::new(self.as_bytes()) + } + + pub fn as_bytes(&self) -> &[u8] { + // SAFETY: There's no way to construct an uninitialized NixString (see the SAFETY comment in + // `new`) + unsafe { NixStringInner::data_slice(self.0) } + } + + pub fn into_bstring(self) -> BString { + self.as_bstr().to_owned() + } + + /// Return a displayable representation of the string as an + /// identifier. + /// + /// This is used when printing out strings used as e.g. attribute + /// set keys, as those are only escaped in the presence of special + /// characters. + pub fn ident_str(&self) -> Cow<str> { + let escaped = match self.to_str_lossy() { + Cow::Borrowed(s) => nix_escape_string(s), + Cow::Owned(s) => nix_escape_string(&s).into_owned().into(), + }; + match escaped { + // A borrowed string is unchanged and can be returned as + // is. + Cow::Borrowed(_) => { + if is_valid_nix_identifier(&escaped) && !is_keyword(&escaped) { + escaped + } else { + Cow::Owned(format!("\"{}\"", escaped)) + } + } + + // An owned string has escapes, and needs the outer quotes + // for display. + Cow::Owned(s) => Cow::Owned(format!("\"{}\"", s)), + } + } + + pub fn concat(&self, other: &Self) -> Self { + let mut s = self.to_vec(); + s.extend(&(***other)); + + let context = [self.context(), other.context()] + .into_iter() + .flatten() + .fold(NixContext::new(), |mut acc_ctx, new_ctx| { + // TODO: consume new_ctx? + acc_ctx.extend(new_ctx.iter().cloned()); + acc_ctx + }); + Self::new_context_from(context, s) + } + + pub(crate) fn context(&self) -> Option<&NixContext> { + // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY + // comment in `new`). + // + // Also, we're using the same lifetime and mutability as self, to fit the + // pointer-to-reference conversion rules + let context = unsafe { NixStringInner::context_ref(self.0).as_deref() }; + + debug_assert!( + !context.is_some_and(NixContext::is_empty), + "BUG: empty context" + ); + + context + } + + pub(crate) fn context_mut(&mut self) -> &mut Option<Box<NixContext>> { + // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY + // comment in `new`). + // + // Also, we're using the same lifetime and mutability as self, to fit the + // pointer-to-reference conversion rules + let context = unsafe { NixStringInner::context_mut(self.0) }; + + debug_assert!( + !context.as_deref().is_some_and(NixContext::is_empty), + "BUG: empty context" + ); + + context + } + + /// Iterates over all context elements. + /// See [iter_plain], [iter_derivation], [iter_single_outputs]. + pub fn iter_context(&self) -> impl Iterator<Item = &NixContext> { + self.context().into_iter() + } + + /// Iterates over "plain" context elements, e.g. sources imported + /// in the store without more information, i.e. `toFile` or coerced imported paths. + /// It yields paths to the store. + pub fn iter_ctx_plain(&self) -> impl Iterator<Item = &str> { + self.iter_context().flat_map(|context| context.iter_plain()) + } + + /// Iterates over "full derivations" context elements, e.g. something + /// referring to their `drvPath`, i.e. their full sources and binary closure. + /// It yields derivation paths. + pub fn iter_ctx_derivation(&self) -> impl Iterator<Item = &str> { + return self + .iter_context() + .flat_map(|context| context.iter_derivation()); + } + + /// Iterates over "single" context elements, e.g. single derived paths, + /// or also known as the single output of a given derivation. + /// The first element of the tuple is the output name + /// and the second element is the derivation path. + pub fn iter_ctx_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { + return self + .iter_context() + .flat_map(|context| context.iter_single_outputs()); + } + + /// Returns whether this Nix string possess a context or not. + pub fn has_context(&self) -> bool { + self.context().is_some() + } + + /// This clears the context of the string, returning + /// the removed dependency tracking information. + pub fn take_context(&mut self) -> Option<Box<NixContext>> { + self.context_mut().take() + } + + /// This clears the context of that string, losing + /// all dependency tracking information. + pub fn clear_context(&mut self) { + let _ = self.take_context(); + } + + pub fn chars(&self) -> Chars<'_> { + self.as_bstr().chars() + } +} + +fn nix_escape_char(ch: char, next: Option<&char>) -> Option<&'static str> { + match (ch, next) { + ('\\', _) => Some("\\\\"), + ('"', _) => Some("\\\""), + ('\n', _) => Some("\\n"), + ('\t', _) => Some("\\t"), + ('\r', _) => Some("\\r"), + ('$', Some('{')) => Some("\\$"), + _ => None, + } +} + +/// Return true if this string is a keyword -- character strings +/// which lexically match the "identifier" production but are not +/// parsed as identifiers. See also cppnix commit +/// b72bc4a972fe568744d98b89d63adcd504cb586c. +fn is_keyword(s: &str) -> bool { + matches!( + s, + "if" | "then" | "else" | "assert" | "with" | "let" | "in" | "rec" | "inherit" + ) +} + +/// Return true if this string can be used as an identifier in Nix. +fn is_valid_nix_identifier(s: &str) -> bool { + // adapted from rnix-parser's tokenizer.rs + let mut chars = s.chars(); + match chars.next() { + Some('a'..='z' | 'A'..='Z' | '_') => (), + _ => return false, + } + for c in chars { + match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => (), + _ => return false, + } + } + true +} + +/// Escape a Nix string for display, as most user-visible representation +/// are escaped strings. +/// +/// Note that this does not add the outer pair of surrounding quotes. +fn nix_escape_string(input: &str) -> Cow<str> { + let mut iter = input.char_indices().peekable(); + + while let Some((i, c)) = iter.next() { + if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) { + let mut escaped = String::with_capacity(input.len()); + escaped.push_str(&input[..i]); + escaped.push_str(esc); + + // In theory we calculate how many bytes it takes to represent `esc` + // in UTF-8 and use that for the offset. It is, however, safe to + // assume that to be 1, as all characters that can be escaped in a + // Nix string are ASCII. + let mut inner_iter = input[i + 1..].chars().peekable(); + while let Some(c) = inner_iter.next() { + match nix_escape_char(c, inner_iter.peek()) { + Some(esc) => escaped.push_str(esc), + None => escaped.push(c), + } + } + + return Cow::Owned(escaped); + } + } + + Cow::Borrowed(input) +} + +impl Display for NixString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("\"")?; + f.write_str(&nix_escape_string(&self.to_str_lossy()))?; + f.write_str("\"") + } +} + +#[cfg(all(test, feature = "arbitrary"))] +mod tests { + use test_strategy::proptest; + + use super::*; + + use crate::properties::{eq_laws, hash_laws, ord_laws}; + + #[test] + fn size() { + assert_eq!(std::mem::size_of::<NixString>(), 8); + } + + #[proptest] + fn clone_strings(s: NixString) { + drop(s.clone()) + } + + eq_laws!(NixString); + hash_laws!(NixString); + ord_laws!(NixString); +} diff --git a/tvix/eval/src/value/thunk.rs b/tvix/eval/src/value/thunk.rs new file mode 100644 index 000000000000..4b915019d47c --- /dev/null +++ b/tvix/eval/src/value/thunk.rs @@ -0,0 +1,427 @@ +//! This module implements the runtime representation of Thunks. +//! +//! Thunks are a special kind of Nix value, similar to a 0-argument +//! closure that yields some value. Thunks are used to implement the +//! lazy evaluation behaviour of Nix: +//! +//! Whenever the compiler determines that an expression should be +//! evaluated lazily, it creates a thunk instead of compiling the +//! expression value directly. At any point in the runtime where the +//! actual value of a thunk is required, it is "forced", meaning that +//! the encompassing computation takes place and the thunk takes on +//! its new value. +//! +//! Thunks have interior mutability to be able to memoise their +//! computation. Once a thunk is evaluated, its internal +//! representation becomes the result of the expression. It is legal +//! for the runtime to replace a thunk object directly with its value +//! object, but when forcing a thunk, the runtime *must* mutate the +//! memoisable slot. + +use rustc_hash::FxHashSet; +use std::{ + cell::{Ref, RefCell, RefMut}, + fmt::Debug, + rc::Rc, +}; + +use crate::{ + errors::ErrorKind, + opcode::Op, + upvalues::Upvalues, + value::Closure, + vm::generators::{self, GenCo}, + Value, +}; + +use super::{Lambda, TotalDisplay}; +use codemap::Span; + +/// Internal representation of a suspended native thunk. +struct SuspendedNative(Box<dyn Fn() -> Result<Value, ErrorKind>>); + +impl Debug for SuspendedNative { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "SuspendedNative({:p})", self.0) + } +} + +/// Internal representation of the different states of a thunk. +/// +/// Upvalues must be finalised before leaving the initial state +/// (Suspended or RecursiveClosure). The [`value()`] function may +/// not be called until the thunk is in the final state (Evaluated). +#[derive(Debug)] +enum ThunkRepr { + /// Thunk is closed over some values, suspended and awaiting + /// execution. + Suspended { + lambda: Rc<Lambda>, + upvalues: Rc<Upvalues>, + span: Span, + }, + + /// Thunk is a suspended native computation. + Native(SuspendedNative), + + /// Thunk currently under-evaluation; encountering a blackhole + /// value means that infinite recursion has occured. + Blackhole { + /// Span at which the thunk was first forced. + forced_at: Span, + + /// Span at which the thunk was originally suspended. + suspended_at: Option<Span>, + + /// Span of the first instruction of the actual code inside + /// the thunk. + content_span: Option<Span>, + }, + + // TODO(amjoseph): consider changing `Value` to `Rc<Value>` to avoid + // expensive clone()s in Thunk::force(). + /// Fully evaluated thunk. + Evaluated(Value), +} + +impl ThunkRepr { + fn debug_repr(&self) -> String { + match self { + ThunkRepr::Evaluated(v) => format!("thunk(val|{})", v), + ThunkRepr::Blackhole { .. } => "thunk(blackhole)".to_string(), + ThunkRepr::Native(_) => "thunk(native)".to_string(), + ThunkRepr::Suspended { lambda, .. } => format!("thunk({:p})", *lambda), + } + } + + /// Return the Value within a fully-evaluated ThunkRepr; panics + /// if the thunk is not fully-evaluated. + fn expect(self) -> Value { + match self { + ThunkRepr::Evaluated(value) => value, + ThunkRepr::Blackhole { .. } => panic!("Thunk::expect() called on a black-holed thunk"), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) => { + panic!("Thunk::expect() called on a suspended thunk") + } + } + } + + fn expect_ref(&self) -> &Value { + match self { + ThunkRepr::Evaluated(value) => value, + ThunkRepr::Blackhole { .. } => panic!("Thunk::expect() called on a black-holed thunk"), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) => { + panic!("Thunk::expect() called on a suspended thunk") + } + } + } + + pub fn is_forced(&self) -> bool { + match self { + ThunkRepr::Evaluated(Value::Thunk(_)) => false, + ThunkRepr::Evaluated(_) => true, + _ => false, + } + } +} + +/// A thunk is created for any value which requires non-strict +/// evaluation due to self-reference or lazy semantics (or both). +/// Every reference cycle involving `Value`s will contain at least +/// one `Thunk`. +#[derive(Clone, Debug)] +pub struct Thunk(Rc<RefCell<ThunkRepr>>); + +impl Thunk { + pub fn new_closure(lambda: Rc<Lambda>) -> Self { + Thunk(Rc::new(RefCell::new(ThunkRepr::Evaluated(Value::Closure( + Rc::new(Closure { + upvalues: Rc::new(Upvalues::with_capacity(lambda.upvalue_count)), + lambda: lambda.clone(), + }), + ))))) + } + + pub fn new_suspended(lambda: Rc<Lambda>, span: Span) -> Self { + Thunk(Rc::new(RefCell::new(ThunkRepr::Suspended { + upvalues: Rc::new(Upvalues::with_capacity(lambda.upvalue_count)), + lambda: lambda.clone(), + span, + }))) + } + + pub fn new_suspended_native(native: Box<dyn Fn() -> Result<Value, ErrorKind>>) -> Self { + Thunk(Rc::new(RefCell::new(ThunkRepr::Native(SuspendedNative( + native, + ))))) + } + + /// Helper function to create a [`Thunk`] that calls a function given as the + /// [`Value`] `callee` with the argument `arg` when it is forced. This is + /// particularly useful in builtin implementations if the result of calling + /// a function does not need to be forced immediately, because e.g. it is + /// stored in an attribute set. + pub fn new_suspended_call(callee: Value, arg: Value, span: Span) -> Self { + let mut lambda = Lambda::default(); + + let arg_idx = lambda.chunk().push_constant(arg); + let f_idx = lambda.chunk().push_constant(callee); + + // This is basically a recreation of compile_apply(): + // We need to push the argument onto the stack and then the function. + // The function (not the argument) needs to be forced before calling. + lambda.chunk.push_op(Op::Constant, span); + lambda.chunk.push_uvarint(arg_idx.0 as u64); + lambda.chunk.push_op(Op::Constant, span); + lambda.chunk.push_uvarint(f_idx.0 as u64); + lambda.chunk.push_op(Op::Force, span); + lambda.chunk.push_op(Op::Call, span); + + // Inform the VM that the chunk has ended + lambda.chunk.push_op(Op::Return, span); + + Thunk(Rc::new(RefCell::new(ThunkRepr::Suspended { + upvalues: Rc::new(Upvalues::with_capacity(0)), + lambda: Rc::new(lambda), + span, + }))) + } + + fn prepare_blackhole(&self, forced_at: Span) -> ThunkRepr { + match &*self.0.borrow() { + ThunkRepr::Suspended { span, lambda, .. } => ThunkRepr::Blackhole { + forced_at, + suspended_at: Some(*span), + content_span: Some(lambda.chunk.first_span()), + }, + + _ => ThunkRepr::Blackhole { + forced_at, + suspended_at: None, + content_span: None, + }, + } + } + + pub async fn force(myself: Thunk, co: GenCo, span: Span) -> Result<Value, ErrorKind> { + Self::force_(myself, &co, span).await + } + pub async fn force_(mut myself: Thunk, co: &GenCo, span: Span) -> Result<Value, ErrorKind> { + // This vector of "thunks which point to the thunk-being-forced", to + // be updated along with it, is necessary in order to write this + // function in iterative (and later, mutual-tail-call) form. + let mut also_update: Vec<Rc<RefCell<ThunkRepr>>> = vec![]; + + loop { + // If the current thunk is already fully evaluated, return its evaluated + // value. The VM will continue running the code that landed us here. + if myself.is_forced() { + let val = myself.unwrap_or_clone(); + for other_thunk in also_update.into_iter() { + other_thunk.replace(ThunkRepr::Evaluated(val.clone())); + } + return Ok(val); + } + + // Begin evaluation of this thunk by marking it as a blackhole, meaning + // that any other forcing frame encountering this thunk before its + // evaluation is completed detected an evaluation cycle. + let inner = myself.0.replace(myself.prepare_blackhole(span)); + + match inner { + // If there was already a blackhole in the thunk, this is an + // evaluation cycle. + ThunkRepr::Blackhole { + forced_at, + suspended_at, + content_span, + } => { + return Err(ErrorKind::InfiniteRecursion { + first_force: forced_at, + suspended_at, + content_span, + }) + } + + // If there is a native function stored in the thunk, evaluate it + // and replace this thunk's representation with the result. + ThunkRepr::Native(native) => { + let value = native.0()?; + myself.0.replace(ThunkRepr::Evaluated(value)); + continue; + } + + // When encountering a suspended thunk, request that the VM enters + // it and produces the result. + ThunkRepr::Suspended { + lambda, + upvalues, + span, + } => { + // TODO(amjoseph): use #[tailcall::mutual] here. This can + // be turned into a tailcall to vm::execute_bytecode() by + // passing `also_update` to it. + let value = generators::request_enter_lambda(co, lambda, upvalues, span).await; + myself.0.replace(ThunkRepr::Evaluated(value)); + continue; + } + + // nested thunks -- try to flatten before forcing + ThunkRepr::Evaluated(Value::Thunk(inner_thunk)) => { + match Rc::try_unwrap(inner_thunk.0) { + Ok(refcell) => { + // we are the only reference to the inner thunk, + // so steal it + myself.0.replace(refcell.into_inner()); + continue; + } + Err(rc) => { + let inner_thunk = Thunk(rc); + if inner_thunk.is_forced() { + // tail call to force the inner thunk; note that + // this means the outer thunk remains unforced + // even after calling force() on it; however the + // next time it is forced we will be one + // thunk-forcing closer to it being + // fully-evaluated. + myself + .0 + .replace(ThunkRepr::Evaluated(inner_thunk.value().clone())); + continue; + } + also_update.push(myself.0.clone()); + myself = inner_thunk; + continue; + } + } + } + + ThunkRepr::Evaluated(val) => { + return Ok(val); + } + } + } + } + + pub fn finalise(&self, stack: &[Value]) { + self.upvalues_mut().resolve_deferred_upvalues(stack); + } + + pub fn is_evaluated(&self) -> bool { + matches!(*self.0.borrow(), ThunkRepr::Evaluated(_)) + } + + pub fn is_suspended(&self) -> bool { + matches!( + *self.0.borrow(), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) + ) + } + + /// Returns true if forcing this thunk will not change it. + pub fn is_forced(&self) -> bool { + self.0.borrow().is_forced() + } + + /// Returns a reference to the inner evaluated value of a thunk. + /// It is an error to call this on a thunk that has not been + /// forced, or is not otherwise known to be fully evaluated. + // Note: Due to the interior mutability of thunks this is + // difficult to represent in the type system without impacting the + // API too much. + pub fn value(&self) -> Ref<Value> { + Ref::map(self.0.borrow(), |thunk| match thunk { + ThunkRepr::Evaluated(value) => value, + ThunkRepr::Blackhole { .. } => panic!("Thunk::value called on a black-holed thunk"), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) => { + panic!("Thunk::value called on a suspended thunk") + } + }) + } + + /// Returns the inner evaluated value of a thunk, cloning it if + /// the Rc has more than one strong reference. It is an error + /// to call this on a thunk that has not been forced, or is not + /// otherwise known to be fully evaluated. + fn unwrap_or_clone(self) -> Value { + match Rc::try_unwrap(self.0) { + Ok(refcell) => refcell.into_inner().expect(), + Err(rc) => Ref::map(rc.borrow(), |thunkrepr| thunkrepr.expect_ref()).clone(), + } + } + + pub fn upvalues(&self) -> Ref<'_, Upvalues> { + Ref::map(self.0.borrow(), |thunk| match thunk { + ThunkRepr::Suspended { upvalues, .. } => upvalues.as_ref(), + ThunkRepr::Evaluated(Value::Closure(c)) => &c.upvalues, + _ => panic!("upvalues() on non-suspended thunk"), + }) + } + + pub fn upvalues_mut(&self) -> RefMut<'_, Upvalues> { + RefMut::map(self.0.borrow_mut(), |thunk| match thunk { + ThunkRepr::Suspended { upvalues, .. } => Rc::get_mut(upvalues).unwrap(), + ThunkRepr::Evaluated(Value::Closure(c)) => Rc::get_mut( + &mut Rc::get_mut(c).unwrap().upvalues, + ) + .expect( + "upvalues_mut() was called on a thunk which already had multiple references to it", + ), + thunk => panic!("upvalues() on non-suspended thunk: {thunk:?}"), + }) + } + + /// Do not use this without first reading and understanding + /// `tvix/docs/value-pointer-equality.md`. + pub(crate) fn ptr_eq(&self, other: &Self) -> bool { + if Rc::ptr_eq(&self.0, &other.0) { + return true; + } + match &*self.0.borrow() { + ThunkRepr::Evaluated(Value::Closure(c1)) => match &*other.0.borrow() { + ThunkRepr::Evaluated(Value::Closure(c2)) => Rc::ptr_eq(c1, c2), + _ => false, + }, + _ => false, + } + } + + /// Helper function to format thunks in observer output. + pub(crate) fn debug_repr(&self) -> String { + self.0.borrow().debug_repr() + } +} + +impl TotalDisplay for Thunk { + fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result { + if !set.insert(self) { + return f.write_str("<CYCLE>"); + } + + match &*self.0.borrow() { + ThunkRepr::Evaluated(v) => v.total_fmt(f, set), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) => f.write_str("<CODE>"), + other => write!(f, "internal[{}]", other.debug_repr()), + } + } +} + +/// A wrapper type for tracking which thunks have already been seen +/// in a context. This is necessary for printing and deeply forcing +/// cyclic non-diverging data structures like `rec { f = [ f ]; }`. +/// This is separate from the ThunkRepr::Blackhole mechanism, which +/// detects diverging data structures like `(rec { f = f; }).f`. +/// +/// The inner `HashSet` is not available on the outside, as it would be +/// potentially unsafe to interact with the pointers in the set. +#[derive(Default)] +pub struct ThunkSet(FxHashSet<*const ThunkRepr>); + +impl ThunkSet { + /// Check whether the given thunk has already been seen. Will mark the thunk + /// as seen otherwise. + pub fn insert(&mut self, thunk: &Thunk) -> bool { + let ptr: *const ThunkRepr = thunk.0.as_ptr(); + self.0.insert(ptr) + } +} |