diff options
Diffstat (limited to 'tvix/eval/src/value')
-rw-r--r-- | tvix/eval/src/value/arbitrary.rs | 48 | ||||
-rw-r--r-- | tvix/eval/src/value/attrs.rs | 390 | ||||
-rw-r--r-- | tvix/eval/src/value/attrs/tests.rs | 93 | ||||
-rw-r--r-- | tvix/eval/src/value/builtin.rs | 119 | ||||
-rw-r--r-- | tvix/eval/src/value/function.rs | 52 | ||||
-rw-r--r-- | tvix/eval/src/value/json.rs | 154 | ||||
-rw-r--r-- | tvix/eval/src/value/list.rs | 99 | ||||
-rw-r--r-- | tvix/eval/src/value/mod.rs | 1150 | ||||
-rw-r--r-- | tvix/eval/src/value/string.rs | 822 | ||||
-rw-r--r-- | tvix/eval/src/value/thunk.rs | 381 |
10 files changed, 2376 insertions, 932 deletions
diff --git a/tvix/eval/src/value/arbitrary.rs b/tvix/eval/src/value/arbitrary.rs index cd7629cfb9..bf53f4fcb2 100644 --- a/tvix/eval/src/value/arbitrary.rs +++ b/tvix/eval/src/value/arbitrary.rs @@ -1,9 +1,10 @@ //! Support for configurable generation of arbitrary nix values +use imbl::proptest::{ord_map, vector}; use proptest::{prelude::*, strategy::BoxedStrategy}; use std::ffi::OsString; -use super::{NixAttrs, NixList, NixString, Value}; +use super::{attrs::AttrsRep, NixAttrs, NixList, NixString, Value}; #[derive(Clone)] pub enum Parameters { @@ -25,6 +26,39 @@ impl Default for Parameters { } } +impl Arbitrary for NixAttrs { + type Parameters = Parameters; + type Strategy = BoxedStrategy<Self>; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + prop_oneof![ + // Empty attrs representation + Just(Self(AttrsRep::Empty)), + // KV representation (name/value pairs) + ( + any_with::<Value>(args.clone()), + any_with::<Value>(args.clone()) + ) + .prop_map(|(name, value)| Self(AttrsRep::KV { name, value })), + // Map representation + ord_map(NixString::arbitrary(), Value::arbitrary_with(args), 0..100) + .prop_map(|map| Self(AttrsRep::Im(map))) + ] + .boxed() + } +} + +impl Arbitrary for NixList { + type Parameters = <Value as Arbitrary>::Parameters; + type Strategy = BoxedStrategy<Self>; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + vector(<Value as Arbitrary>::arbitrary_with(args), 0..100) + .prop_map(NixList::from) + .boxed() + } +} + impl Arbitrary for Value { type Parameters = Parameters; type Strategy = BoxedStrategy<Self>; @@ -58,21 +92,15 @@ fn leaf_value() -> impl Strategy<Value = Value> { any::<i64>().prop_map(Integer), any::<f64>().prop_map(Float), any::<NixString>().prop_map(String), - any::<OsString>().prop_map(|s| Path(s.into())), + any::<OsString>().prop_map(|s| Path(Box::new(s.into()))), ] } fn non_internal_value() -> impl Strategy<Value = Value> { leaf_value().prop_recursive(3, 5, 5, |inner| { prop_oneof![ - any_with::<NixAttrs>(( - Default::default(), - Default::default(), - Parameters::Strategy(inner.clone()) - )) - .prop_map(Value::attrs), - any_with::<NixList>((Default::default(), Parameters::Strategy(inner))) - .prop_map(Value::List) + NixAttrs::arbitrary_with(Parameters::Strategy(inner.clone())).prop_map(Value::attrs), + any_with::<NixList>(Parameters::Strategy(inner)).prop_map(Value::List) ] }) } diff --git a/tvix/eval/src/value/attrs.rs b/tvix/eval/src/value/attrs.rs index ecce34fb4a..33259c8058 100644 --- a/tvix/eval/src/value/attrs.rs +++ b/tvix/eval/src/value/attrs.rs @@ -5,25 +5,38 @@ //! //! Due to this, construction and management of attribute sets has //! some peculiarities that are encapsulated within this module. -use std::collections::btree_map; -use std::collections::BTreeMap; +use std::borrow::Borrow; use std::iter::FromIterator; -use crate::errors::ErrorKind; -use crate::vm::VM; +use bstr::BStr; +use imbl::{ordmap, OrdMap}; +use lazy_static::lazy_static; +use serde::de::{Deserializer, Error, Visitor}; +use serde::Deserialize; use super::string::NixString; use super::thunk::ThunkSet; use super::TotalDisplay; use super::Value; +use crate::errors::ErrorKind; +use crate::CatchableErrorKind; + +lazy_static! { + static ref NAME_S: NixString = "name".into(); + static ref NAME_REF: &'static NixString = &NAME_S; + static ref VALUE_S: NixString = "value".into(); + static ref VALUE_REF: &'static NixString = &VALUE_S; +} #[cfg(test)] mod tests; -#[derive(Clone, Debug)] -enum AttrsRep { +#[derive(Clone, Debug, Deserialize, Default)] +pub(super) enum AttrsRep { + #[default] Empty, - Map(BTreeMap<NixString, Value>), + + Im(OrdMap<NixString, Value>), /// Warning: this represents a **two**-attribute attrset, with /// attribute names "name" and "value", like `{name="foo"; @@ -34,60 +47,61 @@ enum AttrsRep { }, } -impl Default for AttrsRep { - fn default() -> Self { - AttrsRep::Empty - } -} - impl AttrsRep { /// Retrieve reference to a mutable map inside of an attrs, /// optionally changing the representation if required. - fn map_mut(&mut self) -> &mut BTreeMap<NixString, Value> { + fn map_mut(&mut self) -> &mut OrdMap<NixString, Value> { match self { - AttrsRep::Map(m) => m, + AttrsRep::Im(m) => m, AttrsRep::Empty => { - *self = AttrsRep::Map(BTreeMap::new()); + *self = AttrsRep::Im(OrdMap::new()); self.map_mut() } AttrsRep::KV { name, value } => { - *self = AttrsRep::Map(BTreeMap::from([ - (NixString::NAME, name.clone()), - (NixString::VALUE, value.clone()), - ])); + *self = AttrsRep::Im(ordmap! { + NAME_S.clone() => name.clone(), + VALUE_S.clone() => value.clone() + }); + self.map_mut() } } } - fn select(&self, key: &str) -> Option<&Value> { + fn select(&self, key: &BStr) -> Option<&Value> { match self { AttrsRep::Empty => None, - AttrsRep::KV { name, value } => match key { - "name" => Some(name), - "value" => Some(value), + AttrsRep::KV { name, value } => match &**key { + b"name" => Some(name), + b"value" => Some(value), _ => None, }, - AttrsRep::Map(map) => map.get(&key.into()), + AttrsRep::Im(map) => map.get(key), } } - fn contains(&self, key: &str) -> bool { + fn contains(&self, key: &BStr) -> bool { match self { AttrsRep::Empty => false, AttrsRep::KV { .. } => key == "name" || key == "value", - AttrsRep::Map(map) => map.contains_key(&key.into()), + AttrsRep::Im(map) => map.contains_key(key), } } } #[repr(transparent)] #[derive(Clone, Debug, Default)] -pub struct NixAttrs(AttrsRep); +pub struct NixAttrs(pub(super) AttrsRep); + +impl From<OrdMap<NixString, Value>> for NixAttrs { + fn from(map: OrdMap<NixString, Value>) -> Self { + NixAttrs(AttrsRep::Im(map)) + } +} impl<K, V> FromIterator<(K, V)> for NixAttrs where @@ -98,11 +112,7 @@ where where T: IntoIterator<Item = (K, V)>, { - NixAttrs(AttrsRep::Map( - iter.into_iter() - .map(|(k, v)| (k.into(), v.into())) - .collect(), - )) + NixAttrs(AttrsRep::Im(iter.into_iter().collect())) } } @@ -121,7 +131,7 @@ impl TotalDisplay for NixAttrs { f.write_str("; ")?; } - AttrsRep::Map(map) => { + AttrsRep::Im(map) => { for (name, value) in map { write!(f, "{} = ", name.ident_str())?; value.total_fmt(f, set)?; @@ -136,32 +146,38 @@ impl TotalDisplay for NixAttrs { } } -#[cfg(feature = "arbitrary")] -mod arbitrary { - use super::*; - - use proptest::prelude::*; - use proptest::prop_oneof; - use proptest::strategy::{BoxedStrategy, Just, Strategy}; - - impl Arbitrary for NixAttrs { - type Parameters = <BTreeMap<NixString, Value> as Arbitrary>::Parameters; - - type Strategy = BoxedStrategy<Self>; - - fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { - prop_oneof![ - Just(Self(AttrsRep::Empty)), - ( - any_with::<Value>(args.2.clone()), - any_with::<Value>(args.2.clone()) - ) - .prop_map(|(name, value)| Self(AttrsRep::KV { name, value })), - any_with::<BTreeMap<NixString, Value>>(args) - .prop_map(|map| Self(AttrsRep::Map(map))) - ] - .boxed() +impl<'de> Deserialize<'de> for NixAttrs { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct MapVisitor; + + impl<'de> Visitor<'de> for MapVisitor { + type Value = NixAttrs; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a valid Nix attribute set") + } + + fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error> + where + A: serde::de::MapAccess<'de>, + { + let mut stack_array = Vec::with_capacity(map.size_hint().unwrap_or(0) * 2); + + while let Some((key, value)) = map.next_entry()? { + stack_array.push(key); + stack_array.push(value); + } + + Ok(NixAttrs::construct(stack_array.len() / 2, stack_array) + .map_err(A::Error::custom)? + .expect("Catchable values are unreachable here")) + } } + + deserializer.deserialize_map(MapVisitor) } } @@ -170,6 +186,17 @@ impl NixAttrs { Self(AttrsRep::Empty) } + /// Compare two attribute sets by pointer equality. Only makes + /// sense for some attribute set reprsentations, i.e. returning + /// `false` does not mean that the two attribute sets do not have + /// equal *content*. + pub fn ptr_eq(&self, other: &Self) -> bool { + match (&self.0, &other.0) { + (AttrsRep::Im(lhs), AttrsRep::Im(rhs)) => lhs.ptr_eq(rhs), + _ => false, + } + } + /// Return an attribute set containing the merge of the two /// provided sets. Keys from the `other` set have precedence. pub fn update(self, other: Self) -> Self { @@ -183,44 +210,41 @@ impl NixAttrs { // Explicitly handle all branches instead of falling // through, to ensure that we get at least some compiler // errors if variants are modified. - (AttrsRep::Map(_), AttrsRep::Map(_)) - | (AttrsRep::Map(_), AttrsRep::KV { .. }) - | (AttrsRep::KV { .. }, AttrsRep::Map(_)) => {} + (AttrsRep::Im(_), AttrsRep::Im(_)) + | (AttrsRep::Im(_), AttrsRep::KV { .. }) + | (AttrsRep::KV { .. }, AttrsRep::Im(_)) => {} }; // Slightly more advanced, but still optimised updates match (self.0, other.0) { - (AttrsRep::Map(mut m), AttrsRep::KV { name, value }) => { - m.insert(NixString::NAME, name); - m.insert(NixString::VALUE, value); - NixAttrs(AttrsRep::Map(m)) + (AttrsRep::Im(mut m), AttrsRep::KV { name, value }) => { + m.insert(NAME_S.clone(), name); + m.insert(VALUE_S.clone(), value); + NixAttrs(AttrsRep::Im(m)) } - (AttrsRep::KV { name, value }, AttrsRep::Map(mut m)) => { - match m.entry(NixString::NAME) { - btree_map::Entry::Vacant(e) => { + (AttrsRep::KV { name, value }, AttrsRep::Im(mut m)) => { + match m.entry(NAME_S.clone()) { + imbl::ordmap::Entry::Vacant(e) => { e.insert(name); } - btree_map::Entry::Occupied(_) => { /* name from `m` has precedence */ } + imbl::ordmap::Entry::Occupied(_) => { /* name from `m` has precedence */ } }; - match m.entry(NixString::VALUE) { - btree_map::Entry::Vacant(e) => { + match m.entry(VALUE_S.clone()) { + imbl::ordmap::Entry::Vacant(e) => { e.insert(value); } - btree_map::Entry::Occupied(_) => { /* value from `m` has precedence */ } + imbl::ordmap::Entry::Occupied(_) => { /* value from `m` has precedence */ } }; - NixAttrs(AttrsRep::Map(m)) + NixAttrs(AttrsRep::Im(m)) } // Plain merge of maps. - (AttrsRep::Map(mut m1), AttrsRep::Map(mut m2)) => { - m1.append(&mut m2); - NixAttrs(AttrsRep::Map(m1)) - } + (AttrsRep::Im(m1), AttrsRep::Im(m2)) => NixAttrs(AttrsRep::Im(m2.union(m1))), // Cases handled above by the borrowing match: _ => unreachable!(), @@ -230,33 +254,52 @@ impl NixAttrs { /// Return the number of key-value entries in an attrset. pub fn len(&self) -> usize { match &self.0 { - AttrsRep::Map(map) => map.len(), + AttrsRep::Im(map) => map.len(), AttrsRep::Empty => 0, AttrsRep::KV { .. } => 2, } } + pub fn is_empty(&self) -> bool { + match &self.0 { + AttrsRep::Im(map) => map.is_empty(), + AttrsRep::Empty => true, + AttrsRep::KV { .. } => false, + } + } + /// Select a value from an attribute set by key. - pub fn select(&self, key: &str) -> Option<&Value> { - self.0.select(key) + pub fn select<K>(&self, key: &K) -> Option<&Value> + where + K: Borrow<BStr> + ?Sized, + { + self.0.select(key.borrow()) } /// Select a required value from an attribute set by key, return /// an `AttributeNotFound` error if it is missing. - pub fn select_required(&self, key: &str) -> Result<&Value, ErrorKind> { + pub fn select_required<K>(&self, key: &K) -> Result<&Value, ErrorKind> + where + K: Borrow<BStr> + ?Sized, + { self.select(key) - .ok_or_else(|| ErrorKind::AttributeNotFound { name: key.into() }) + .ok_or_else(|| ErrorKind::AttributeNotFound { + name: key.borrow().to_string(), + }) } - pub fn contains(&self, key: &str) -> bool { - self.0.contains(key) + pub fn contains<'a, K: 'a>(&self, key: K) -> bool + where + &'a BStr: From<K>, + { + self.0.contains(key.into()) } /// Construct an iterator over all the key-value pairs in the attribute set. #[allow(clippy::needless_lifetimes)] pub fn iter<'a>(&'a self) -> Iter<KeyValue<'a>> { Iter(match &self.0 { - AttrsRep::Map(map) => KeyValue::Map(map.iter()), + AttrsRep::Im(map) => KeyValue::Im(map.iter()), AttrsRep::Empty => KeyValue::Empty, AttrsRep::KV { @@ -270,23 +313,15 @@ impl NixAttrs { }) } - pub fn into_iter(self) -> IntoIter { - match self.0 { - AttrsRep::Empty => IntoIter(IntoIterRepr::Empty), - AttrsRep::KV { name, value } => IntoIter(IntoIterRepr::Finite( - vec![ - (NixString::NAME_REF.clone(), name), - (NixString::VALUE_REF.clone(), value), - ] - .into_iter(), - )), - AttrsRep::Map(map) => IntoIter(IntoIterRepr::Map(map.into_iter())), - } + /// Same as iter(), but marks call sites which rely on the + /// iteration being lexicographic. + pub fn iter_sorted(&self) -> Iter<KeyValue<'_>> { + self.iter() } - /// Same as into_iter(), but marks call sites which rely on the + /// Same as [IntoIterator::into_iter], but marks call sites which rely on the /// iteration being lexicographic. - pub fn into_iter_sorted(self) -> IntoIter { + pub fn into_iter_sorted(self) -> OwnedAttrsIterator { self.into_iter() } @@ -294,14 +329,17 @@ impl NixAttrs { pub fn keys(&self) -> Keys { Keys(match &self.0 { AttrsRep::Empty => KeysInner::Empty, - AttrsRep::Map(m) => KeysInner::Map(m.keys()), + AttrsRep::Im(m) => KeysInner::Im(m.keys()), AttrsRep::KV { .. } => KeysInner::KV(IterKV::default()), }) } /// Implement construction logic of an attribute set, to encapsulate /// logic about attribute set optimisations inside of this module. - pub fn construct(count: usize, mut stack_slice: Vec<Value>) -> Result<Self, ErrorKind> { + pub fn construct( + count: usize, + mut stack_slice: Vec<Value>, + ) -> Result<Result<Self, CatchableErrorKind>, ErrorKind> { debug_assert!( stack_slice.len() == count * 2, "construct_attrs called with count == {}, but slice.len() == {}", @@ -311,18 +349,17 @@ impl NixAttrs { // Optimisation: Empty attribute set if count == 0 { - return Ok(NixAttrs(AttrsRep::Empty)); + return Ok(Ok(NixAttrs(AttrsRep::Empty))); } // Optimisation: KV pattern if count == 2 { if let Some(kv) = attempt_optimise_kv(&mut stack_slice) { - return Ok(kv); + return Ok(Ok(kv)); } } - // TODO(tazjin): extend_reserve(count) (rust#72631) - let mut attrs = NixAttrs(AttrsRep::Map(BTreeMap::new())); + let mut attrs = NixAttrs(AttrsRep::Im(OrdMap::new())); for _ in 0..count { let value = stack_slice.pop().unwrap(); @@ -338,11 +375,13 @@ impl NixAttrs { continue; } + Value::Catchable(err) => return Ok(Err(*err)), + other => return Err(ErrorKind::InvalidAttributeName(other)), } } - Ok(attrs) + Ok(Ok(attrs)) } /// Construct an optimized "KV"-style attribute set given the value for the @@ -350,70 +389,19 @@ impl NixAttrs { pub(crate) fn from_kv(name: Value, value: Value) -> Self { NixAttrs(AttrsRep::KV { name, value }) } +} - /// Compare `self` against `other` for equality using Nix equality semantics - pub fn nix_eq(&self, other: &Self, vm: &mut VM) -> Result<bool, ErrorKind> { - match (&self.0, &other.0) { - (AttrsRep::Empty, AttrsRep::Empty) => Ok(true), - - // It is possible to create an empty attribute set that - // has Map representation like so: ` { ${null} = 1; }`. - // - // Preventing this would incur a cost on all attribute set - // construction (we'd have to check the actual number of - // elements after key construction). In practice this - // probably does not happen, so it's better to just bite - // the bullet and implement this branch. - (AttrsRep::Empty, AttrsRep::Map(map)) | (AttrsRep::Map(map), AttrsRep::Empty) => { - Ok(map.is_empty()) - } - - // Other specialised representations (KV ...) definitely - // do not match `Empty`. - (AttrsRep::Empty, _) | (_, AttrsRep::Empty) => Ok(false), - - ( - AttrsRep::KV { - name: n1, - value: v1, - }, - AttrsRep::KV { - name: n2, - value: v2, - }, - ) => Ok(n1.nix_eq(n2, vm)? && v1.nix_eq(v2, vm)?), - - (AttrsRep::Map(map), AttrsRep::KV { name, value }) - | (AttrsRep::KV { name, value }, AttrsRep::Map(map)) => { - if map.len() != 2 { - return Ok(false); - } - - if let (Some(m_name), Some(m_value)) = - (map.get(&NixString::NAME), map.get(&NixString::VALUE)) - { - return Ok(name.nix_eq(m_name, vm)? && value.nix_eq(m_value, vm)?); - } - - Ok(false) - } - - (AttrsRep::Map(m1), AttrsRep::Map(m2)) => { - if m1.len() != m2.len() { - return Ok(false); - } +impl IntoIterator for NixAttrs { + type Item = (NixString, Value); + type IntoIter = OwnedAttrsIterator; - for (k, v1) in m1 { - if let Some(v2) = m2.get(k) { - if !v1.nix_eq(v2, vm)? { - return Ok(false); - } - } else { - return Ok(false); - } - } - Ok(true) - } + fn into_iter(self) -> Self::IntoIter { + match self.0 { + AttrsRep::Empty => OwnedAttrsIterator(IntoIterRepr::Empty), + AttrsRep::KV { name, value } => OwnedAttrsIterator(IntoIterRepr::Finite( + vec![(NAME_REF.clone(), name), (VALUE_REF.clone(), value)].into_iter(), + )), + AttrsRep::Im(map) => OwnedAttrsIterator(IntoIterRepr::Im(map.into_iter())), } } } @@ -434,17 +422,8 @@ impl NixAttrs { fn attempt_optimise_kv(slice: &mut [Value]) -> Option<NixAttrs> { let (name_idx, value_idx) = { match (&slice[2], &slice[0]) { - (Value::String(s1), Value::String(s2)) - if (*s1 == NixString::NAME && *s2 == NixString::VALUE) => - { - (3, 1) - } - - (Value::String(s1), Value::String(s2)) - if (*s1 == NixString::VALUE && *s2 == NixString::NAME) => - { - (1, 3) - } + (Value::String(s1), Value::String(s2)) if (*s1 == *NAME_S && *s2 == *VALUE_S) => (3, 1), + (Value::String(s1), Value::String(s2)) if (*s1 == *VALUE_S && *s2 == *NAME_S) => (1, 3), // Technically this branch lets type errors pass, // but they will be caught during normal attribute @@ -463,11 +442,11 @@ fn attempt_optimise_kv(slice: &mut [Value]) -> Option<NixAttrs> { /// checking against duplicate keys. fn set_attr(attrs: &mut NixAttrs, key: NixString, value: Value) -> Result<(), ErrorKind> { match attrs.0.map_mut().entry(key) { - btree_map::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey { - key: entry.key().as_str().to_string(), + imbl::ordmap::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey { + key: entry.key().to_string(), }), - btree_map::Entry::Vacant(entry) => { + imbl::ordmap::Entry::Vacant(entry) => { entry.insert(value); Ok(()) } @@ -496,7 +475,6 @@ impl IterKV { /// Iterator representation over the keys *and* values of an attribute /// set. -#[derive(Debug)] pub enum KeyValue<'a> { Empty, @@ -506,7 +484,7 @@ pub enum KeyValue<'a> { at: IterKV, }, - Map(btree_map::Iter<'a, NixString, Value>), + Im(imbl::ordmap::Iter<'a, NixString, Value>), } /// Iterator over a Nix attribute set. @@ -520,18 +498,18 @@ impl<'a> Iterator for Iter<KeyValue<'a>> { fn next(&mut self) -> Option<Self::Item> { match &mut self.0 { - KeyValue::Map(inner) => inner.next(), + KeyValue::Im(inner) => inner.next(), KeyValue::Empty => None, KeyValue::KV { name, value, at } => match at { IterKV::Name => { at.next(); - Some((NixString::NAME_REF, name)) + Some((&NAME_REF, name)) } IterKV::Value => { at.next(); - Some((NixString::VALUE_REF, value)) + Some((&VALUE_REF, value)) } IterKV::Done => None, @@ -545,7 +523,7 @@ impl<'a> ExactSizeIterator for Iter<KeyValue<'a>> { match &self.0 { KeyValue::Empty => 0, KeyValue::KV { .. } => 2, - KeyValue::Map(inner) => inner.len(), + KeyValue::Im(inner) => inner.len(), } } } @@ -553,7 +531,7 @@ impl<'a> ExactSizeIterator for Iter<KeyValue<'a>> { enum KeysInner<'a> { Empty, KV(IterKV), - Map(btree_map::Keys<'a, NixString, Value>), + Im(imbl::ordmap::Keys<'a, NixString, Value>), } pub struct Keys<'a>(KeysInner<'a>); @@ -566,14 +544,14 @@ impl<'a> Iterator for Keys<'a> { KeysInner::Empty => None, KeysInner::KV(at @ IterKV::Name) => { at.next(); - Some(NixString::NAME_REF) + Some(&NAME_REF) } KeysInner::KV(at @ IterKV::Value) => { at.next(); - Some(NixString::VALUE_REF) + Some(&VALUE_REF) } KeysInner::KV(IterKV::Done) => None, - KeysInner::Map(m) => m.next(), + KeysInner::Im(m) => m.next(), } } } @@ -593,7 +571,7 @@ impl<'a> ExactSizeIterator for Keys<'a> { match &self.0 { KeysInner::Empty => 0, KeysInner::KV(_) => 2, - KeysInner::Map(m) => m.len(), + KeysInner::Im(m) => m.len(), } } } @@ -602,30 +580,42 @@ impl<'a> ExactSizeIterator for Keys<'a> { pub enum IntoIterRepr { Empty, Finite(std::vec::IntoIter<(NixString, Value)>), - Map(std::collections::btree_map::IntoIter<NixString, Value>), + Im(imbl::ordmap::ConsumingIter<(NixString, Value)>), } +/// Wrapper type which hides the internal implementation details from +/// users. #[repr(transparent)] -pub struct IntoIter(IntoIterRepr); +pub struct OwnedAttrsIterator(IntoIterRepr); -impl Iterator for IntoIter { +impl Iterator for OwnedAttrsIterator { type Item = (NixString, Value); fn next(&mut self) -> Option<Self::Item> { match &mut self.0 { IntoIterRepr::Empty => None, - IntoIterRepr::Map(inner) => inner.next(), IntoIterRepr::Finite(inner) => inner.next(), + IntoIterRepr::Im(inner) => inner.next(), } } } -impl ExactSizeIterator for IntoIter { +impl ExactSizeIterator for OwnedAttrsIterator { fn len(&self) -> usize { match &self.0 { IntoIterRepr::Empty => 0, - IntoIterRepr::Map(inner) => inner.len(), IntoIterRepr::Finite(inner) => inner.len(), + IntoIterRepr::Im(inner) => inner.len(), + } + } +} + +impl DoubleEndedIterator for OwnedAttrsIterator { + fn next_back(&mut self) -> Option<Self::Item> { + match &mut self.0 { + IntoIterRepr::Empty => None, + IntoIterRepr::Finite(inner) => inner.next_back(), + IntoIterRepr::Im(inner) => inner.next_back(), } } } diff --git a/tvix/eval/src/value/attrs/tests.rs b/tvix/eval/src/value/attrs/tests.rs index 65d3c8d7ca..534b78a00d 100644 --- a/tvix/eval/src/value/attrs/tests.rs +++ b/tvix/eval/src/value/attrs/tests.rs @@ -1,45 +1,12 @@ -use super::*; - -mod nix_eq { - use crate::observer::NoOpObserver; - - use super::*; - use proptest::prelude::ProptestConfig; - use test_strategy::proptest; - - #[proptest(ProptestConfig { cases: 2, ..Default::default() })] - fn reflexive(x: NixAttrs) { - let mut observer = NoOpObserver {}; - let mut vm = VM::new(Default::default(), &mut observer); +use bstr::B; - assert!(x.nix_eq(&x, &mut vm).unwrap()) - } - - #[proptest(ProptestConfig { cases: 2, ..Default::default() })] - fn symmetric(x: NixAttrs, y: NixAttrs) { - let mut observer = NoOpObserver {}; - let mut vm = VM::new(Default::default(), &mut observer); - - assert_eq!( - x.nix_eq(&y, &mut vm).unwrap(), - y.nix_eq(&x, &mut vm).unwrap() - ) - } - - #[proptest(ProptestConfig { cases: 2, ..Default::default() })] - fn transitive(x: NixAttrs, y: NixAttrs, z: NixAttrs) { - let mut observer = NoOpObserver {}; - let mut vm = VM::new(Default::default(), &mut observer); - - if x.nix_eq(&y, &mut vm).unwrap() && y.nix_eq(&z, &mut vm).unwrap() { - assert!(x.nix_eq(&z, &mut vm).unwrap()) - } - } -} +use super::*; #[test] fn test_empty_attrs() { - let attrs = NixAttrs::construct(0, vec![]).expect("empty attr construction should succeed"); + let attrs = NixAttrs::construct(0, vec![]) + .expect("empty attr construction should succeed") + .unwrap(); assert!( matches!(attrs, NixAttrs(AttrsRep::Empty)), @@ -49,23 +16,21 @@ fn test_empty_attrs() { #[test] fn test_simple_attrs() { - let attrs = NixAttrs::construct( - 1, - vec![Value::String("key".into()), Value::String("value".into())], - ) - .expect("simple attr construction should succeed"); + let attrs = NixAttrs::construct(1, vec![Value::from("key"), Value::from("value")]) + .expect("simple attr construction should succeed") + .unwrap(); assert!( - matches!(attrs, NixAttrs(AttrsRep::Map(_))), + matches!(attrs, NixAttrs(AttrsRep::Im(_))), "simple attribute set should use map representation", ) } #[test] fn test_kv_attrs() { - let name_val = Value::String("name".into()); - let value_val = Value::String("value".into()); - let meaning_val = Value::String("meaning".into()); + let name_val = Value::from("name"); + let value_val = Value::from("value"); + let meaning_val = Value::from("meaning"); let forty_two_val = Value::Integer(42); let kv_attrs = NixAttrs::construct( @@ -77,7 +42,8 @@ fn test_kv_attrs() { meaning_val.clone(), ], ) - .expect("constructing K/V pair attrs should succeed"); + .expect("constructing K/V pair attrs should succeed") + .unwrap(); match kv_attrs { NixAttrs(AttrsRep::KV { name, value }) @@ -93,15 +59,15 @@ fn test_kv_attrs() { #[test] fn test_empty_attrs_iter() { - let attrs = NixAttrs::construct(0, vec![]).unwrap(); + let attrs = NixAttrs::construct(0, vec![]).unwrap().unwrap(); assert!(attrs.iter().next().is_none()); } #[test] fn test_kv_attrs_iter() { - let name_val = Value::String("name".into()); - let value_val = Value::String("value".into()); - let meaning_val = Value::String("meaning".into()); + let name_val = Value::from("name"); + let value_val = Value::from("value"); + let meaning_val = Value::from("meaning"); let forty_two_val = Value::Integer(42); let kv_attrs = NixAttrs::construct( @@ -113,33 +79,28 @@ fn test_kv_attrs_iter() { meaning_val.clone(), ], ) - .expect("constructing K/V pair attrs should succeed"); + .expect("constructing K/V pair attrs should succeed") + .unwrap(); - let mut iter = kv_attrs - .iter() - .collect::<Vec<_>>() - .into_iter() - .map(|(k, v)| (k, v)); + let mut iter = kv_attrs.iter().collect::<Vec<_>>().into_iter(); let (k, v) = iter.next().unwrap(); - assert!(k == NixString::NAME_REF); + assert!(k == *NAME_REF); assert!(v.to_str().unwrap() == meaning_val.to_str().unwrap()); let (k, v) = iter.next().unwrap(); - assert!(k == NixString::VALUE_REF); + assert!(k == *VALUE_REF); assert!(v.as_int().unwrap() == forty_two_val.as_int().unwrap()); assert!(iter.next().is_none()); } #[test] fn test_map_attrs_iter() { - let attrs = NixAttrs::construct( - 1, - vec![Value::String("key".into()), Value::String("value".into())], - ) - .expect("simple attr construction should succeed"); + let attrs = NixAttrs::construct(1, vec![Value::from("key"), Value::from("value")]) + .expect("simple attr construction should succeed") + .unwrap(); let mut iter = attrs.iter().collect::<Vec<_>>().into_iter(); let (k, v) = iter.next().unwrap(); assert!(k == &NixString::from("key")); - assert!(v.to_str().unwrap().as_str() == "value"); + assert_eq!(v.to_str().unwrap(), B("value")); assert!(iter.next().is_none()); } diff --git a/tvix/eval/src/value/builtin.rs b/tvix/eval/src/value/builtin.rs index bb14265181..346f06cb77 100644 --- a/tvix/eval/src/value/builtin.rs +++ b/tvix/eval/src/value/builtin.rs @@ -3,7 +3,7 @@ //! //! Builtins are directly backed by Rust code operating on Nix values. -use crate::{errors::ErrorKind, vm::VM}; +use crate::vm::generators::Generator; use super::Value; @@ -12,25 +12,36 @@ use std::{ rc::Rc, }; -/// Trait for closure types of builtins implemented directly by -/// backing Rust code. +/// Trait for closure types of builtins. /// -/// Builtins declare their arity and are passed a vector with the -/// right number of arguments. Additionally, as they might have to -/// force the evaluation of thunks, they are passed a reference to the -/// current VM which they can use for forcing a value. +/// Builtins are expected to yield a generator which can be run by the VM to +/// produce the final value. /// -/// Errors returned from a builtin will be annotated with the location -/// of the call to the builtin. -pub trait BuiltinFn: Fn(Vec<Value>, &mut VM) -> Result<Value, ErrorKind> {} -impl<F: Fn(Vec<Value>, &mut VM) -> Result<Value, ErrorKind>> BuiltinFn for F {} - -/// Description of a single argument passed to a builtin -pub struct BuiltinArgument { - /// Whether the argument should be forced before the underlying builtin function is called - pub strict: bool, - /// The name of the argument, to be used in docstrings and error messages - pub name: &'static str, +/// Implementors should use the builtins-macros to create these functions +/// instead of handling the argument-passing logic manually. +pub trait BuiltinGen: Fn(Vec<Value>) -> Generator {} +impl<F: Fn(Vec<Value>) -> Generator> BuiltinGen for F {} + +#[derive(Clone)] +pub struct BuiltinRepr { + name: &'static str, + /// Optional documentation for the builtin. + documentation: Option<&'static str>, + arg_count: usize, + + func: Rc<dyn BuiltinGen>, + + /// Partially applied function arguments. + partials: Vec<Value>, +} + +pub enum BuiltinResult { + /// Builtin was not ready to be called (arguments missing) and remains + /// partially applied. + Partial(Builtin), + + /// Builtin was called and constructed a generator that the VM must run. + Called(&'static str, Generator), } /// Represents a single built-in function which directly executes Rust @@ -46,74 +57,74 @@ pub struct BuiltinArgument { /// "capture" the partially applied arguments, and are treated /// specially when printing their representation etc. #[derive(Clone)] -pub struct Builtin { - name: &'static str, - /// Array of arguments to the builtin. - arguments: &'static [BuiltinArgument], - /// Optional documentation for the builtin. - documentation: Option<&'static str>, - func: Rc<dyn BuiltinFn>, +pub struct Builtin(Box<BuiltinRepr>); - /// Partially applied function arguments. - partials: Vec<Value>, +impl From<BuiltinRepr> for Builtin { + fn from(value: BuiltinRepr) -> Self { + Builtin(Box::new(value)) + } } impl Builtin { - pub fn new<F: BuiltinFn + 'static>( + pub fn new<F: BuiltinGen + 'static>( name: &'static str, - arguments: &'static [BuiltinArgument], documentation: Option<&'static str>, + arg_count: usize, func: F, ) -> Self { - Builtin { + BuiltinRepr { name, - arguments, documentation, + arg_count, func: Rc::new(func), partials: vec![], } + .into() } pub fn name(&self) -> &'static str { - self.name + self.0.name } pub fn documentation(&self) -> Option<&'static str> { - self.documentation + self.0.documentation } - /// Apply an additional argument to the builtin, which will either - /// lead to execution of the function or to returning a partial - /// builtin. - pub fn apply(mut self, vm: &mut VM, arg: Value) -> Result<Value, ErrorKind> { - self.partials.push(arg); - - if self.partials.len() == self.arguments.len() { - for (idx, BuiltinArgument { strict, .. }) in self.arguments.iter().enumerate() { - if *strict { - self.partials[idx].force(vm)?; - } - } - return (self.func)(self.partials, vm); - } + /// Apply an additional argument to the builtin. + /// After this, [`Builtin::call`] *must* be called, otherwise it may leave + /// the builtin in an incorrect state. + pub fn apply_arg(&mut self, arg: Value) { + self.0.partials.push(arg); + + debug_assert!( + self.0.partials.len() <= self.0.arg_count, + "Tvix bug: pushed too many arguments to builtin" + ); + } - // Function is not yet ready to be called. - Ok(Value::Builtin(self)) + /// Attempt to call a builtin, which will produce a generator if it is fully + /// applied or return the builtin if it is partially applied. + pub fn call(self) -> BuiltinResult { + if self.0.partials.len() == self.0.arg_count { + BuiltinResult::Called(self.0.name, (self.0.func)(self.0.partials)) + } else { + BuiltinResult::Partial(self) + } } } impl Debug for Builtin { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "builtin[{}]", self.name) + write!(f, "builtin[{}]", self.0.name) } } impl Display for Builtin { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - if !self.partials.is_empty() { - f.write_str("<<primop-app>>") + if !self.0.partials.is_empty() { + f.write_str("<PRIMOP-APP>") } else { - f.write_str("<<primop>>") + f.write_str("<PRIMOP>") } } } @@ -121,6 +132,6 @@ impl Display for Builtin { /// Builtins are uniquely identified by their name impl PartialEq for Builtin { fn eq(&self, other: &Self) -> bool { - self.name == other.name + self.0.name == other.0.name } } diff --git a/tvix/eval/src/value/function.rs b/tvix/eval/src/value/function.rs index 7a21223e04..7592e3d641 100644 --- a/tvix/eval/src/value/function.rs +++ b/tvix/eval/src/value/function.rs @@ -1,5 +1,5 @@ //! This module implements the runtime representation of functions. -use std::{collections::HashMap, hash::Hash, rc::Rc}; +use std::{collections::BTreeMap, hash::Hash, rc::Rc}; use codemap::Span; use smol_str::SmolStr; @@ -11,13 +11,17 @@ use super::NixString; #[derive(Clone, Debug, PartialEq)] pub(crate) struct Formals { /// Map from argument name, to whether that argument is required - pub(crate) arguments: HashMap<NixString, bool>, + pub(crate) arguments: BTreeMap<NixString, bool>, /// Do the formals of this function accept extra arguments pub(crate) ellipsis: bool, /// The span of the formals themselves, to use to emit errors pub(crate) span: Span, + + /// Optionally tracks a name for all function arguments (args@ style). + /// Used by toXML. + pub(crate) name: Option<String>, } impl Formals { @@ -27,10 +31,10 @@ impl Formals { /// ellipsis pub(crate) fn contains<Q>(&self, arg: &Q) -> bool where - Q: ?Sized + Hash + Eq, + Q: ?Sized + Hash + Ord + Eq, NixString: std::borrow::Borrow<Q>, { - self.ellipsis || self.arguments.contains_key(&arg) + self.ellipsis || self.arguments.contains_key(arg) } } @@ -39,7 +43,14 @@ impl Formals { /// OpThunkSuspended referencing it. At runtime `Lambda` is usually wrapped /// in `Rc` to avoid copying the `Chunk` it holds (which can be /// quite large). -#[derive(Debug, Default)] +/// +/// In order to correctly reproduce cppnix's "pointer equality" +/// semantics it is important that we never clone a Lambda -- +/// use `Rc<Lambda>::clone()` instead. This struct deliberately +/// does not `derive(Clone)` in order to prevent this from being +/// done accidentally. +/// +#[derive(/* do not add Clone here */ Debug, Default)] pub struct Lambda { pub(crate) chunk: Chunk, @@ -51,7 +62,7 @@ pub struct Lambda { /// Number of upvalues which the code in this Lambda closes /// over, and which need to be initialised at /// runtime. Information about the variables is emitted using - /// data-carrying opcodes (see [`OpCode::DataStackIdx`]). + /// data-carrying opcodes (see [`crate::opcode::OpCode::DataStackIdx`]). pub(crate) upvalue_count: usize, pub(crate) formals: Option<Formals>, } @@ -62,13 +73,17 @@ impl Lambda { } } -#[derive(Clone, Debug)] +/// +/// In order to correctly reproduce cppnix's "pointer equality" +/// semantics it is important that we never clone a Lambda -- +/// use `Rc<Lambda>::clone()` instead. This struct deliberately +/// does not `derive(Clone)` in order to prevent this from being +/// done accidentally. +/// +#[derive(/* do not add Clone here */ Debug)] pub struct Closure { pub lambda: Rc<Lambda>, pub upvalues: Rc<Upvalues>, - /// true if all upvalues have been realised - #[cfg(debug_assertions)] - pub is_finalised: bool, } impl Closure { @@ -79,19 +94,8 @@ impl Closure { ) } - /// Do not call this function unless you have read - /// `tvix/docs/value-pointer-equality.md` carefully. - pub fn ptr_eq(&self, other: &Self) -> bool { - Rc::ptr_eq(&self.lambda, &other.lambda) && Rc::ptr_eq(&self.upvalues, &other.upvalues) - } - pub fn new_with_upvalues(upvalues: Rc<Upvalues>, lambda: Rc<Lambda>) -> Self { - Closure { - upvalues, - lambda, - #[cfg(debug_assertions)] - is_finalised: true, - } + Closure { upvalues, lambda } } pub fn chunk(&self) -> &Chunk { @@ -102,7 +106,7 @@ impl Closure { self.lambda.clone() } - pub fn upvalues(&self) -> &Upvalues { - &self.upvalues + pub fn upvalues(&self) -> Rc<Upvalues> { + self.upvalues.clone() } } diff --git a/tvix/eval/src/value/json.rs b/tvix/eval/src/value/json.rs new file mode 100644 index 0000000000..24a6bcaf6f --- /dev/null +++ b/tvix/eval/src/value/json.rs @@ -0,0 +1,154 @@ +/// Implementation of Value serialisation *to* JSON. +/// +/// This can not be implemented through standard serde-derive methods, +/// as there is internal Nix logic that must happen within the +/// serialisation methods. +use super::{CoercionKind, Value}; +use crate::errors::{CatchableErrorKind, ErrorKind}; +use crate::generators::{self, GenCo}; +use crate::NixContext; + +use bstr::ByteSlice; +use serde_json::value::to_value; +use serde_json::Value as Json; // name clash with *our* `Value` +use serde_json::{Map, Number}; + +impl Value { + /// Transforms the structure into a JSON + /// and accumulate all encountered context in the second's element + /// of the return type. + pub async fn into_contextful_json( + self, + co: &GenCo, + ) -> Result<Result<(Json, NixContext), CatchableErrorKind>, ErrorKind> { + let self_forced = generators::request_force(co, self).await; + let mut context = NixContext::new(); + + let value = match self_forced { + Value::Null => Json::Null, + Value::Bool(b) => Json::Bool(b), + Value::Integer(i) => Json::Number(Number::from(i)), + Value::Float(f) => to_value(f)?, + Value::String(s) => { + context.mimic(&s); + + Json::String(s.to_str()?.to_owned()) + } + + Value::Path(p) => { + let imported = generators::request_path_import(co, *p).await; + let path = imported.to_string_lossy().to_string(); + context = context.append(crate::NixContextElement::Plain(path.clone())); + Json::String(path) + } + + Value::List(l) => { + let mut out = vec![]; + + for val in l.into_iter() { + match generators::request_to_json(co, val).await { + Ok((v, ctx)) => { + context.extend(ctx.into_iter()); + out.push(v) + } + Err(cek) => return Ok(Err(cek)), + } + } + + Json::Array(out) + } + + Value::Attrs(attrs) => { + // Attribute sets with a callable `__toString` attribute + // serialise to the string-coerced version of the result of + // calling that. + if attrs.select("__toString").is_some() { + let span = generators::request_span(co).await; + match Value::Attrs(attrs) + .coerce_to_string_( + co, + CoercionKind { + strong: false, + import_paths: false, + }, + span, + ) + .await? + { + Value::Catchable(cek) => return Ok(Err(*cek)), + Value::String(s) => { + // We need a fresh context here because `__toString` will discard + // everything. + let mut fresh = NixContext::new(); + fresh.mimic(&s); + + return Ok(Ok((Json::String(s.to_str()?.to_owned()), fresh))); + } + _ => panic!("Value::coerce_to_string_() returned a non-string!"), + } + } + + // Attribute sets with an `outPath` attribute + // serialise to a JSON serialisation of that inner + // value (regardless of what it is!). + if let Some(out_path) = attrs.select("outPath") { + return Ok(generators::request_to_json(co, out_path.clone()).await); + } + + let mut out = Map::with_capacity(attrs.len()); + for (name, value) in attrs.into_iter_sorted() { + out.insert( + name.to_str()?.to_owned(), + match generators::request_to_json(co, value).await { + Ok((v, ctx)) => { + context.extend(ctx.into_iter()); + v + } + Err(cek) => return Ok(Err(cek)), + }, + ); + } + + Json::Object(out) + } + + Value::Catchable(c) => return Ok(Err(*c)), + + val @ Value::Closure(_) + | val @ Value::Thunk(_) + | val @ Value::Builtin(_) + | val @ Value::AttrNotFound + | val @ Value::Blueprint(_) + | val @ Value::DeferredUpvalue(_) + | val @ Value::UnresolvedPath(_) + | val @ Value::Json(..) + | val @ Value::FinaliseRequest(_) => { + return Err(ErrorKind::NotSerialisableToJson(val.type_of())) + } + }; + + Ok(Ok((value, context))) + } + + /// Generator version of the above, which wraps responses in + /// [`Value::Json`]. + pub(crate) async fn into_contextful_json_generator( + self, + co: GenCo, + ) -> Result<Value, ErrorKind> { + match self.into_contextful_json(&co).await? { + Err(cek) => Ok(Value::from(cek)), + Ok((json, ctx)) => Ok(Value::Json(Box::new((json, ctx)))), + } + } + + /// Transforms the structure into a JSON + /// All the accumulated context is ignored, use [`into_contextful_json`] + /// to obtain the resulting context of the JSON object. + pub async fn into_json( + self, + co: &GenCo, + ) -> Result<Result<Json, CatchableErrorKind>, ErrorKind> { + Ok(self.into_contextful_json(co).await?.map(|(json, _)| json)) + } +} diff --git a/tvix/eval/src/value/list.rs b/tvix/eval/src/value/list.rs index 2604b935ed..2b8b3de28d 100644 --- a/tvix/eval/src/value/list.rs +++ b/tvix/eval/src/value/list.rs @@ -1,23 +1,24 @@ //! This module implements Nix lists. -use std::ops::Deref; +use std::ops::Index; use std::rc::Rc; -use crate::errors::ErrorKind; -use crate::vm::VM; +use imbl::{vector, Vector}; + +use serde::Deserialize; use super::thunk::ThunkSet; use super::TotalDisplay; use super::Value; #[repr(transparent)] -#[derive(Clone, Debug)] -pub struct NixList(Rc<Vec<Value>>); +#[derive(Clone, Debug, Deserialize)] +pub struct NixList(Rc<Vector<Value>>); impl TotalDisplay for NixList { fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result { f.write_str("[ ")?; - for v in self.0.as_ref() { + for v in self { v.total_fmt(f, set)?; f.write_str(" ")?; } @@ -26,42 +27,13 @@ impl TotalDisplay for NixList { } } -impl From<Rc<Vec<Value>>> for NixList { - fn from(v: Rc<Vec<Value>>) -> Self { - Self(v) - } -} - -impl From<Vec<Value>> for NixList { - fn from(vs: Vec<Value>) -> Self { +impl From<Vector<Value>> for NixList { + fn from(vs: Vector<Value>) -> Self { Self(Rc::new(vs)) } } -#[cfg(feature = "arbitrary")] -mod arbitrary { - use proptest::{ - prelude::{any_with, Arbitrary}, - strategy::{BoxedStrategy, Strategy}, - }; - - use super::*; - - impl Arbitrary for NixList { - type Parameters = <Vec<Value> as Arbitrary>::Parameters; - type Strategy = BoxedStrategy<Self>; - - fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { - any_with::<Rc<Vec<Value>>>(args).prop_map(Self).boxed() - } - } -} - impl NixList { - pub fn new() -> Self { - Self(Rc::new(vec![])) - } - pub fn len(&self) -> usize { self.0.len() } @@ -70,6 +42,10 @@ impl NixList { self.0.get(i) } + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + pub fn construct(count: usize, stack_slice: Vec<Value>) -> Self { debug_assert!( count == stack_slice.len(), @@ -78,10 +54,10 @@ impl NixList { stack_slice.len(), ); - NixList(Rc::new(stack_slice)) + NixList(Rc::new(Vector::from_iter(stack_slice))) } - pub fn iter(&self) -> std::slice::Iter<Value> { + pub fn iter(&self) -> vector::Iter<Value> { self.0.iter() } @@ -89,57 +65,38 @@ impl NixList { Rc::ptr_eq(&self.0, &other.0) } - /// Compare `self` against `other` for equality using Nix equality semantics - pub fn nix_eq(&self, other: &Self, vm: &mut VM) -> Result<bool, ErrorKind> { - if self.ptr_eq(other) { - return Ok(true); - } - if self.len() != other.len() { - return Ok(false); - } - - for (v1, v2) in self.iter().zip(other.iter()) { - if !v1.nix_eq(v2, vm)? { - return Ok(false); - } - } - - Ok(true) - } - - /// force each element of the list (shallowly), making it safe to call .get().value() - pub fn force_elements(&self, vm: &mut VM) -> Result<(), ErrorKind> { - self.iter().try_for_each(|v| v.force(vm).map(|_| ())) + pub fn into_inner(self) -> Vector<Value> { + Rc::try_unwrap(self.0).unwrap_or_else(|rc| (*rc).clone()) } - pub fn into_vec(self) -> Vec<Value> { - crate::unwrap_or_clone_rc(self.0) + #[deprecated(note = "callers should avoid constructing from Vec")] + pub fn from_vec(vs: Vec<Value>) -> Self { + Self(Rc::new(Vector::from_iter(vs))) } } impl IntoIterator for NixList { type Item = Value; - type IntoIter = std::vec::IntoIter<Value>; + type IntoIter = imbl::vector::ConsumingIter<Value>; - fn into_iter(self) -> std::vec::IntoIter<Value> { - self.into_vec().into_iter() + fn into_iter(self) -> Self::IntoIter { + self.into_inner().into_iter() } } impl<'a> IntoIterator for &'a NixList { type Item = &'a Value; - - type IntoIter = std::slice::Iter<'a, Value>; + type IntoIter = imbl::vector::Iter<'a, Value>; fn into_iter(self) -> Self::IntoIter { self.0.iter() } } -impl Deref for NixList { - type Target = Vec<Value>; +impl Index<usize> for NixList { + type Output = Value; - fn deref(&self) -> &Self::Target { - &self.0 + fn index(&self, index: usize) -> &Self::Output { + &self.0[index] } } diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index 2bca9e6d32..dfad0cd839 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -1,61 +1,122 @@ //! This module implements the backing representation of runtime //! values in the Nix language. use std::cmp::Ordering; -use std::ops::Deref; +use std::fmt::Display; +use std::num::{NonZeroI32, NonZeroUsize}; use std::path::PathBuf; use std::rc::Rc; -use std::{cell::Ref, fmt::Display}; + +use bstr::{BString, ByteVec}; +use lexical_core::format::CXX_LITERAL; +use serde::Deserialize; #[cfg(feature = "arbitrary")] mod arbitrary; mod attrs; mod builtin; mod function; +mod json; mod list; mod path; mod string; mod thunk; -use crate::errors::ErrorKind; +use crate::errors::{CatchableErrorKind, ErrorKind}; use crate::opcode::StackIdx; -use crate::vm::VM; +use crate::spans::LightSpan; +use crate::vm::generators::{self, GenCo}; +use crate::AddContext; pub use attrs::NixAttrs; -pub use builtin::{Builtin, BuiltinArgument}; +pub use builtin::{Builtin, BuiltinResult}; pub(crate) use function::Formals; pub use function::{Closure, Lambda}; pub use list::NixList; pub use path::canon_path; -pub use string::NixString; +pub use string::{NixContext, NixContextElement, NixString}; pub use thunk::Thunk; -use self::thunk::ThunkSet; +pub use self::thunk::ThunkSet; + +use lazy_static::lazy_static; #[warn(variant_size_differences)] -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Deserialize)] +#[serde(untagged)] pub enum Value { Null, Bool(bool), Integer(i64), Float(f64), String(NixString), - Path(PathBuf), - Attrs(Rc<NixAttrs>), + + #[serde(skip)] + Path(Box<PathBuf>), + Attrs(Box<NixAttrs>), List(NixList), - Closure(Closure), + + #[serde(skip)] + Closure(Rc<Closure>), // must use Rc<Closure> here in order to get proper pointer equality + + #[serde(skip)] Builtin(Builtin), // Internal values that, while they technically exist at runtime, // are never returned to or created directly by users. + #[serde(skip_deserializing)] Thunk(Thunk), // See [`compiler::compile_select_or()`] for explanation + #[serde(skip)] AttrNotFound, // this can only occur in Chunk::Constants and nowhere else + #[serde(skip)] Blueprint(Rc<Lambda>), + #[serde(skip)] DeferredUpvalue(StackIdx), - UnresolvedPath(PathBuf), + #[serde(skip)] + UnresolvedPath(Box<PathBuf>), + #[serde(skip)] + Json(Box<(serde_json::Value, NixContext)>), + + #[serde(skip)] + FinaliseRequest(bool), + + #[serde(skip)] + // TODO(tazjin): why is this in a Box? + Catchable(Box<CatchableErrorKind>), +} + +impl From<CatchableErrorKind> for Value { + #[inline] + fn from(c: CatchableErrorKind) -> Value { + Value::Catchable(Box::new(c)) + } +} + +impl<V> From<Result<V, CatchableErrorKind>> for Value +where + Value: From<V>, +{ + #[inline] + fn from(v: Result<V, CatchableErrorKind>) -> Value { + match v { + Ok(v) => v.into(), + Err(e) => Value::Catchable(Box::new(e)), + } + } +} + +lazy_static! { + static ref WRITE_FLOAT_OPTIONS: lexical_core::WriteFloatOptions = + lexical_core::WriteFloatOptionsBuilder::new() + .trim_floats(true) + .round_mode(lexical_core::write_float_options::RoundMode::Round) + .positive_exponent_break(Some(NonZeroI32::new(5).unwrap())) + .max_significant_digits(Some(NonZeroUsize::new(6).unwrap())) + .build() + .unwrap(); } // Helper macros to generate the to_*/as_* macros while accounting for @@ -107,38 +168,23 @@ macro_rules! gen_is { } /// Describes what input types are allowed when coercing a `Value` to a string -#[derive(Clone, Copy, PartialEq, Debug)] -pub enum CoercionKind { - /// Force thunks, but perform no other coercions. - ThunksOnly, - /// Only coerce already "stringly" types like strings and paths, but also - /// coerce sets that have a `__toString` attribute. Equivalent to - /// `!coerceMore` in C++ Nix. - Weak, - /// Coerce all value types included by `Weak`, but also coerce `null`, - /// booleans, integers, floats and lists of coercible types. Equivalent to - /// `coerceMore` in C++ Nix. - Strong, -} - -/// A reference to a [`Value`] returned by a call to [`Value::force`], whether the value was -/// originally a thunk or not. -/// -/// Implements [`Deref`] to [`Value`], so can generally be used as a [`Value`] -pub(crate) enum ForceResult<'a> { - ForcedThunk(Ref<'a, Value>), - Immediate(&'a Value), -} - -impl<'a> Deref for ForceResult<'a> { - type Target = Value; - - fn deref(&self) -> &Self::Target { - match self { - ForceResult::ForcedThunk(r) => r, - ForceResult::Immediate(v) => v, - } - } +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub struct CoercionKind { + /// If false only coerce already "stringly" types like strings and paths, but + /// also coerce sets that have a `__toString` attribute. In Tvix, this is + /// usually called a weak coercion. Equivalent to passing `false` as the + /// `coerceMore` argument of `EvalState::coerceToString` in C++ Nix. + /// + /// If true coerce all value types included by a weak coercion, but also + /// coerce `null`, booleans, integers, floats and lists of coercible types. + /// Consequently, we call this a strong coercion. Equivalent to passing + /// `true` as `coerceMore` in C++ Nix. + pub strong: bool, + + /// If `import_paths` is `true`, paths are imported into the store and their + /// store path is the result of the coercion (equivalent to the + /// `copyToStore` argument of `EvalState::coerceToString` in C++ Nix). + pub import_paths: bool, } impl<T> From<T> for Value @@ -154,125 +200,463 @@ where impl Value { /// Construct a [`Value::Attrs`] from a [`NixAttrs`]. pub fn attrs(attrs: NixAttrs) -> Self { - Self::Attrs(Rc::new(attrs)) + Self::Attrs(Box::new(attrs)) } } +/// Controls what kind of by-pointer equality comparison is allowed. +/// +/// See `//tvix/docs/value-pointer-equality.md` for details. +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum PointerEquality { + /// Pointer equality not allowed at all. + ForbidAll, + + /// Pointer equality comparisons only allowed for nested values. + AllowNested, + + /// Pointer equality comparisons are allowed in all contexts. + AllowAll, +} + impl Value { + /// Deeply forces a value, traversing e.g. lists and attribute sets and forcing + /// their contents, too. + /// + /// This is a generator function. + pub(super) async fn deep_force(self, co: GenCo, span: LightSpan) -> Result<Value, ErrorKind> { + if let Some(v) = Self::deep_force_(self.clone(), co, span).await? { + Ok(v) + } else { + Ok(self) + } + } + + /// Returns Some(v) or None to indicate the returned value is myself + async fn deep_force_( + myself: Value, + co: GenCo, + span: LightSpan, + ) -> Result<Option<Value>, ErrorKind> { + // This is a stack of values which still remain to be forced. + let mut vals = vec![myself]; + + let mut thunk_set: ThunkSet = Default::default(); + + loop { + let v = if let Some(v) = vals.pop() { + v + } else { + return Ok(None); + }; + + // Get rid of any top-level thunks, and bail out of self-recursive + // thunks. + let value = if let Value::Thunk(t) = &v { + if !thunk_set.insert(t) { + continue; + } + Thunk::force_(t.clone(), &co, span.clone()).await? + } else { + v + }; + + match value { + // Short-circuit on already evaluated values, or fail on internal values. + Value::Null + | Value::Bool(_) + | Value::Integer(_) + | Value::Float(_) + | Value::String(_) + | Value::Path(_) + | Value::Closure(_) + | Value::Builtin(_) => continue, + + Value::List(list) => { + for val in list.into_iter().rev() { + vals.push(val); + } + continue; + } + + Value::Attrs(attrs) => { + for (_, val) in attrs.into_iter().rev() { + vals.push(val); + } + continue; + } + + Value::Thunk(_) => panic!("Tvix bug: force_value() returned a thunk"), + + Value::Catchable(_) => return Ok(Some(value)), + + Value::AttrNotFound + | Value::Blueprint(_) + | Value::DeferredUpvalue(_) + | Value::UnresolvedPath(_) + | Value::Json(..) + | Value::FinaliseRequest(_) => panic!( + "Tvix bug: internal value left on stack: {}", + value.type_of() + ), + } + } + } + + pub async fn coerce_to_string( + self, + co: GenCo, + kind: CoercionKind, + span: LightSpan, + ) -> Result<Value, ErrorKind> { + self.coerce_to_string_(&co, kind, span).await + } + /// Coerce a `Value` to a string. See `CoercionKind` for a rundown of what /// input types are accepted under what circumstances. - pub fn coerce_to_string( - &self, + pub async fn coerce_to_string_( + self, + co: &GenCo, kind: CoercionKind, - vm: &mut VM, - ) -> Result<NixString, ErrorKind> { - // TODO: eventually, this will need to handle string context and importing - // files into the Nix store depending on what context the coercion happens in - if let Value::Thunk(t) = self { - t.force(vm)?; - } + span: LightSpan, + ) -> Result<Value, ErrorKind> { + let mut result = BString::default(); + let mut vals = vec![self]; + // Track if we are coercing the first value of a list to correctly emit + // separating white spaces. + let mut is_list_head = None; + // FIXME(raitobezarius): as per https://b.tvl.fyi/issues/364 + // we might be interested into more powerful context-related coercion kinds. + let mut context: NixContext = NixContext::new(); + + loop { + let value = if let Some(v) = vals.pop() { + v.force(co, span.clone()).await? + } else { + return Ok(Value::String(NixString::new_context_from(context, result))); + }; + let coerced: Result<BString, _> = match (value, kind) { + // coercions that are always done + (Value::String(mut s), _) => { + if let Some(ctx) = s.take_context() { + context.extend(ctx.into_iter()); + } + Ok((*s).into()) + } - match (self, kind) { - // deal with thunks - (Value::Thunk(t), _) => t.value().coerce_to_string(kind, vm), + // TODO(sterni): Think about proper encoding handling here. This needs + // general consideration anyways, since one current discrepancy between + // C++ Nix and Tvix is that the former's strings are arbitrary byte + // sequences without NUL bytes, whereas Tvix only allows valid + // Unicode. See also b/189. + ( + Value::Path(p), + CoercionKind { + import_paths: true, .. + }, + ) => { + let imported = generators::request_path_import(co, *p).await; + // When we import a path from the evaluator, we must attach + // its original path as its context. + context = context.append(NixContextElement::Plain( + imported.to_string_lossy().to_string(), + )); + Ok(imported.into_os_string().into_encoded_bytes().into()) + } + ( + Value::Path(p), + CoercionKind { + import_paths: false, + .. + }, + ) => Ok(p.into_os_string().into_encoded_bytes().into()), + + // Attribute sets can be converted to strings if they either have an + // `__toString` attribute which holds a function that receives the + // set itself or an `outPath` attribute which should be a string. + // `__toString` is preferred. + (Value::Attrs(attrs), kind) => { + if let Some(to_string) = attrs.select("__toString") { + let callable = to_string.clone().force(co, span.clone()).await?; + + // Leave the attribute set on the stack as an argument + // to the function call. + generators::request_stack_push(co, Value::Attrs(attrs.clone())).await; + + // Call the callable ... + let result = generators::request_call(co, callable).await; + + // Recurse on the result, as attribute set coercion + // actually works recursively, e.g. you can even return + // /another/ set with a __toString attr. + vals.push(result); + continue; + } else if let Some(out_path) = attrs.select("outPath") { + vals.push(out_path.clone()); + continue; + } else { + return Err(ErrorKind::NotCoercibleToString { from: "set", kind }); + } + } + + // strong coercions + (Value::Null, CoercionKind { strong: true, .. }) + | (Value::Bool(false), CoercionKind { strong: true, .. }) => Ok("".into()), + (Value::Bool(true), CoercionKind { strong: true, .. }) => Ok("1".into()), + + (Value::Integer(i), CoercionKind { strong: true, .. }) => Ok(format!("{i}").into()), + (Value::Float(f), CoercionKind { strong: true, .. }) => { + // contrary to normal Display, coercing a float to a string will + // result in unconditional 6 decimal places + Ok(format!("{:.6}", f).into()) + } - // coercions that are always done - (Value::String(s), _) => Ok(s.clone()), + // Lists are coerced by coercing their elements and interspersing spaces + (Value::List(list), CoercionKind { strong: true, .. }) => { + for elem in list.into_iter().rev() { + vals.push(elem); + } + // In case we are coercing a list within a list we don't want + // to touch this. Since the algorithm is nonrecursive, the + // space would not have been created yet (due to continue). + if is_list_head.is_none() { + is_list_head = Some(true); + } + continue; + } + + (Value::Thunk(_), _) => panic!("Tvix bug: force returned unforced thunk"), + + val @ (Value::Closure(_), _) + | val @ (Value::Builtin(_), _) + | val @ (Value::Null, _) + | val @ (Value::Bool(_), _) + | val @ (Value::Integer(_), _) + | val @ (Value::Float(_), _) + | val @ (Value::List(_), _) => Err(ErrorKind::NotCoercibleToString { + from: val.0.type_of(), + kind, + }), + + (c @ Value::Catchable(_), _) => return Ok(c), + + (Value::AttrNotFound, _) + | (Value::Blueprint(_), _) + | (Value::DeferredUpvalue(_), _) + | (Value::UnresolvedPath(_), _) + | (Value::Json(..), _) + | (Value::FinaliseRequest(_), _) => { + panic!("tvix bug: .coerce_to_string() called on internal value") + } + }; - // TODO(sterni): Think about proper encoding handling here. This needs - // general consideration anyways, since one current discrepancy between - // C++ Nix and Tvix is that the former's strings are arbitrary byte - // sequences without NUL bytes, whereas Tvix only allows valid - // Unicode. See also b/189. - (Value::Path(p), kind) if kind != CoercionKind::ThunksOnly => { - Ok(p.to_string_lossy().into_owned().into()) + if let Some(head) = is_list_head { + if !head { + result.push(b' '); + } else { + is_list_head = Some(false); + } } - // Attribute sets can be converted to strings if they either have an - // `__toString` attribute which holds a function that receives the - // set itself or an `outPath` attribute which should be a string. - // `__toString` is preferred. - (Value::Attrs(attrs), kind) if kind != CoercionKind::ThunksOnly => { - match (attrs.select("__toString"), attrs.select("outPath")) { - (None, None) => Err(ErrorKind::NotCoercibleToString { from: "set", kind }), - - (Some(f), _) => { - // use a closure here to deal with the thunk borrow we need to do below - let call_to_string = |value: &Value, vm: &mut VM| { - // Leave self on the stack as an argument to the function call. - vm.push(self.clone()); - vm.call_value(value)?; - let result = vm.pop(); - - match result { - Value::String(s) => Ok(s), - // Attribute set coercion actually works - // recursively, e.g. you can even return - // /another/ set with a __toString attr. - _ => result.coerce_to_string(kind, vm), - } - }; + result.push_str(&coerced?); + } + } - if let Value::Thunk(t) = f { - t.force(vm)?; - let guard = t.value(); - call_to_string(&*guard, vm) - } else { - call_to_string(f, vm) + pub(crate) async fn nix_eq_owned_genco( + self, + other: Value, + co: GenCo, + ptr_eq: PointerEquality, + span: LightSpan, + ) -> Result<Value, ErrorKind> { + self.nix_eq(other, &co, ptr_eq, span).await + } + + /// Compare two Nix values for equality, forcing nested parts of the structure + /// as needed. + /// + /// This comparison needs to be invoked for nested values (e.g. in lists and + /// attribute sets) as well, which is done by suspending and asking the VM to + /// perform the nested comparison. + /// + /// The `top_level` parameter controls whether this invocation is the top-level + /// comparison, or a nested value comparison. See + /// `//tvix/docs/value-pointer-equality.md` + pub(crate) async fn nix_eq( + self, + other: Value, + co: &GenCo, + ptr_eq: PointerEquality, + span: LightSpan, + ) -> Result<Value, ErrorKind> { + // this is a stack of ((v1,v2),peq) triples to be compared; + // after each triple is popped off of the stack, v1 is + // compared to v2 using peq-mode PointerEquality + let mut vals = vec![((self, other), ptr_eq)]; + + loop { + let ((a, b), ptr_eq) = if let Some(abp) = vals.pop() { + abp + } else { + // stack is empty, so comparison has succeeded + return Ok(Value::Bool(true)); + }; + let a = match a { + Value::Thunk(thunk) => { + // If both values are thunks, and thunk comparisons are allowed by + // pointer, do that and move on. + if ptr_eq == PointerEquality::AllowAll { + if let Value::Thunk(t1) = &b { + if t1.ptr_eq(&thunk) { + continue; + } } + }; + + Thunk::force_(thunk, co, span.clone()).await? + } + + _ => a, + }; + + let b = b.force(co, span.clone()).await?; + + debug_assert!(!matches!(a, Value::Thunk(_))); + debug_assert!(!matches!(b, Value::Thunk(_))); + + let result = match (a, b) { + // Trivial comparisons + (c @ Value::Catchable(_), _) => return Ok(c), + (_, c @ Value::Catchable(_)) => return Ok(c), + (Value::Null, Value::Null) => true, + (Value::Bool(b1), Value::Bool(b2)) => b1 == b2, + (Value::String(s1), Value::String(s2)) => s1 == s2, + (Value::Path(p1), Value::Path(p2)) => p1 == p2, + + // Numerical comparisons (they work between float & int) + (Value::Integer(i1), Value::Integer(i2)) => i1 == i2, + (Value::Integer(i), Value::Float(f)) => i as f64 == f, + (Value::Float(f1), Value::Float(f2)) => f1 == f2, + (Value::Float(f), Value::Integer(i)) => i as f64 == f, + + // List comparisons + (Value::List(l1), Value::List(l2)) => { + if ptr_eq >= PointerEquality::AllowNested && l1.ptr_eq(&l2) { + continue; } - // Similarly to `__toString` we also coerce recursively for `outPath` - (None, Some(s)) => s.coerce_to_string(kind, vm), + if l1.len() != l2.len() { + return Ok(Value::Bool(false)); + } + + vals.extend(l1.into_iter().rev().zip(l2.into_iter().rev()).zip( + std::iter::repeat(std::cmp::max(ptr_eq, PointerEquality::AllowNested)), + )); + continue; } - } - // strong coercions - (Value::Null, CoercionKind::Strong) | (Value::Bool(false), CoercionKind::Strong) => { - Ok("".into()) - } - (Value::Bool(true), CoercionKind::Strong) => Ok("1".into()), + (_, Value::List(_)) | (Value::List(_), _) => return Ok(Value::Bool(false)), - (Value::Integer(i), CoercionKind::Strong) => Ok(format!("{i}").into()), - (Value::Float(f), CoercionKind::Strong) => { - // contrary to normal Display, coercing a float to a string will - // result in unconditional 6 decimal places - Ok(format!("{:.6}", f).into()) - } + // Attribute set comparisons + (Value::Attrs(a1), Value::Attrs(a2)) => { + if ptr_eq >= PointerEquality::AllowNested && a1.ptr_eq(&a2) { + continue; + } - // Lists are coerced by coercing their elements and interspersing spaces - (Value::List(l), CoercionKind::Strong) => { - // TODO(sterni): use intersperse when it becomes available? - // https://github.com/rust-lang/rust/issues/79524 - l.iter() - .map(|v| v.coerce_to_string(kind, vm)) - .reduce(|acc, string| { - let a = acc?; - let s = &string?; - Ok(a.concat(&" ".into()).concat(s)) - }) - // None from reduce indicates empty iterator - .unwrap_or_else(|| Ok("".into())) - } + // Special-case for derivation comparisons: If both attribute sets + // have `type = derivation`, compare them by `outPath`. + #[allow(clippy::single_match)] // might need more match arms later + match (a1.select("type"), a2.select("type")) { + (Some(v1), Some(v2)) => { + let s1 = v1.clone().force(co, span.clone()).await?; + if s1.is_catchable() { + return Ok(s1); + } + let s2 = v2.clone().force(co, span.clone()).await?; + if s2.is_catchable() { + return Ok(s2); + } + let s1 = s1.to_str(); + let s2 = s2.to_str(); + + if let (Ok(s1), Ok(s2)) = (s1, s2) { + if s1 == "derivation" && s2 == "derivation" { + // TODO(tazjin): are the outPaths really required, + // or should it fall through? + let out1 = a1 + .select_required("outPath") + .context("comparing derivations")? + .clone(); + + let out2 = a2 + .select_required("outPath") + .context("comparing derivations")? + .clone(); + + let out1 = out1.clone().force(co, span.clone()).await?; + let out2 = out2.clone().force(co, span.clone()).await?; + + if out1.is_catchable() { + return Ok(out1); + } + + if out2.is_catchable() { + return Ok(out2); + } + + let result = + out1.to_contextful_str()? == out2.to_contextful_str()?; + if !result { + return Ok(Value::Bool(false)); + } else { + continue; + } + } + } + } + _ => {} + }; + + if a1.len() != a2.len() { + return Ok(Value::Bool(false)); + } - (Value::Path(_), _) - | (Value::Attrs(_), _) - | (Value::Closure(_), _) - | (Value::Builtin(_), _) - | (Value::Null, _) - | (Value::Bool(_), _) - | (Value::Integer(_), _) - | (Value::Float(_), _) - | (Value::List(_), _) => Err(ErrorKind::NotCoercibleToString { - from: self.type_of(), - kind, - }), - - (Value::AttrNotFound, _) - | (Value::Blueprint(_), _) - | (Value::DeferredUpvalue(_), _) - | (Value::UnresolvedPath(_), _) => { - panic!("tvix bug: .coerce_to_string() called on internal value") + // note that it is important to be careful here with the + // order we push the keys and values in order to properly + // compare attrsets containing `throw` elements. + let iter1 = a1.into_iter_sorted().rev(); + let iter2 = a2.into_iter_sorted().rev(); + for ((k1, v1), (k2, v2)) in iter1.zip(iter2) { + vals.push(( + (v1, v2), + std::cmp::max(ptr_eq, PointerEquality::AllowNested), + )); + vals.push(( + (k1.into(), k2.into()), + std::cmp::max(ptr_eq, PointerEquality::AllowNested), + )); + } + continue; + } + + (Value::Attrs(_), _) | (_, Value::Attrs(_)) => return Ok(Value::Bool(false)), + + (Value::Closure(c1), Value::Closure(c2)) + if ptr_eq >= PointerEquality::AllowNested => + { + if Rc::ptr_eq(&c1, &c2) { + continue; + } else { + return Ok(Value::Bool(false)); + } + } + + // Everything else is either incomparable (e.g. internal types) or + // false. + _ => return Ok(Value::Bool(false)), + }; + if !result { + return Ok(Value::Bool(false)); } } } @@ -289,141 +673,212 @@ impl Value { Value::List(_) => "list", Value::Closure(_) | Value::Builtin(_) => "lambda", - // Internal types - Value::Thunk(_) - | Value::AttrNotFound - | Value::Blueprint(_) - | Value::DeferredUpvalue(_) - | Value::UnresolvedPath(_) => "internal", + // Internal types. Note: These are only elaborated here + // because it makes debugging easier. If a user ever sees + // any of these strings, it's a bug. + Value::Thunk(_) => "internal[thunk]", + Value::AttrNotFound => "internal[attr_not_found]", + Value::Blueprint(_) => "internal[blueprint]", + Value::DeferredUpvalue(_) => "internal[deferred_upvalue]", + Value::UnresolvedPath(_) => "internal[unresolved_path]", + Value::Json(..) => "internal[json]", + Value::FinaliseRequest(_) => "internal[finaliser_sentinel]", + Value::Catchable(_) => "internal[catchable]", } } gen_cast!(as_bool, bool, "bool", Value::Bool(b), *b); gen_cast!(as_int, i64, "int", Value::Integer(x), *x); gen_cast!(as_float, f64, "float", Value::Float(x), *x); - gen_cast!(to_str, NixString, "string", Value::String(s), s.clone()); - gen_cast!(to_attrs, Rc<NixAttrs>, "set", Value::Attrs(a), a.clone()); + + /// Cast the current value into a **context-less** string. + /// If you wanted to cast it into a potentially contextful string, + /// you have to explicitly use `to_contextful_str`. + /// Contextful strings are special, they should not be obtained + /// everytime you want a string. + pub fn to_str(&self) -> Result<NixString, ErrorKind> { + match self { + Value::String(s) if !s.has_context() => Ok((*s).clone()), + Value::Thunk(thunk) => Self::to_str(&thunk.value()), + other => Err(type_error("contextless strings", other)), + } + } + + gen_cast!( + to_contextful_str, + NixString, + "contextful string", + Value::String(s), + (*s).clone() + ); + gen_cast!(to_path, Box<PathBuf>, "path", Value::Path(p), p.clone()); + gen_cast!(to_attrs, Box<NixAttrs>, "set", Value::Attrs(a), a.clone()); gen_cast!(to_list, NixList, "list", Value::List(l), l.clone()); - gen_cast!(to_closure, Closure, "lambda", Value::Closure(c), c.clone()); + gen_cast!( + as_closure, + Rc<Closure>, + "lambda", + Value::Closure(c), + c.clone() + ); gen_cast_mut!(as_list_mut, NixList, "list", List); gen_is!(is_path, Value::Path(_)); gen_is!(is_number, Value::Integer(_) | Value::Float(_)); gen_is!(is_bool, Value::Bool(_)); + gen_is!(is_attrs, Value::Attrs(_)); + gen_is!(is_catchable, Value::Catchable(_)); - /// Compare `self` against `other` for equality using Nix equality semantics. + /// Returns `true` if the value is a [`Thunk`]. /// - /// Takes a reference to the `VM` to allow forcing thunks during comparison - pub fn nix_eq(&self, other: &Self, vm: &mut VM) -> Result<bool, ErrorKind> { - match (self, other) { - // Trivial comparisons - (Value::Null, Value::Null) => Ok(true), - (Value::Bool(b1), Value::Bool(b2)) => Ok(b1 == b2), - (Value::String(s1), Value::String(s2)) => Ok(s1 == s2), - (Value::Path(p1), Value::Path(p2)) => Ok(p1 == p2), - - // Numerical comparisons (they work between float & int) - (Value::Integer(i1), Value::Integer(i2)) => Ok(i1 == i2), - (Value::Integer(i), Value::Float(f)) => Ok(*i as f64 == *f), - (Value::Float(f1), Value::Float(f2)) => Ok(f1 == f2), - (Value::Float(f), Value::Integer(i)) => Ok(*i as f64 == *f), - - (Value::Attrs(_), Value::Attrs(_)) - | (Value::List(_), Value::List(_)) - | (Value::Thunk(_), _) - | (_, Value::Thunk(_)) => Ok(vm.nix_eq(self.clone(), other.clone(), false)?), - - // Everything else is either incomparable (e.g. internal - // types) or false. - _ => Ok(false), - } + /// [`Thunk`]: Value::Thunk + pub fn is_thunk(&self) -> bool { + matches!(self, Self::Thunk(..)) } /// Compare `self` against other using (fallible) Nix ordering semantics. - pub fn nix_cmp(&self, other: &Self, vm: &mut VM) -> Result<Option<Ordering>, ErrorKind> { - match (self, other) { - // same types - (Value::Integer(i1), Value::Integer(i2)) => Ok(i1.partial_cmp(i2)), - (Value::Float(f1), Value::Float(f2)) => Ok(f1.partial_cmp(f2)), - (Value::String(s1), Value::String(s2)) => Ok(s1.partial_cmp(s2)), - (Value::List(l1), Value::List(l2)) => { - for i in 0.. { - if i == l2.len() { - return Ok(Some(Ordering::Greater)); - } else if i == l1.len() { - return Ok(Some(Ordering::Less)); - } else if !vm.nix_eq(l1[i].clone(), l2[i].clone(), true)? { - return l1[i].force(vm)?.nix_cmp(&*l2[i].force(vm)?, vm); + /// + /// The function is intended to be used from within other generator + /// functions or `gen!` blocks. + pub async fn nix_cmp_ordering( + self, + other: Self, + co: GenCo, + span: LightSpan, + ) -> Result<Result<Ordering, CatchableErrorKind>, ErrorKind> { + Self::nix_cmp_ordering_(self, other, co, span).await + } + + async fn nix_cmp_ordering_( + myself: Self, + other: Self, + co: GenCo, + span: LightSpan, + ) -> Result<Result<Ordering, CatchableErrorKind>, ErrorKind> { + // this is a stack of ((v1,v2),peq) triples to be compared; + // after each triple is popped off of the stack, v1 is + // compared to v2 using peq-mode PointerEquality + let mut vals = vec![((myself, other), PointerEquality::ForbidAll)]; + + loop { + let ((mut a, mut b), ptr_eq) = if let Some(abp) = vals.pop() { + abp + } else { + // stack is empty, so they are equal + return Ok(Ok(Ordering::Equal)); + }; + if ptr_eq == PointerEquality::AllowAll { + if a.clone() + .nix_eq(b.clone(), &co, PointerEquality::AllowAll, span.clone()) + .await? + .as_bool()? + { + continue; + } + a = a.force(&co, span.clone()).await?; + b = b.force(&co, span.clone()).await?; + } + let result = match (a, b) { + (Value::Catchable(c), _) => return Ok(Err(*c)), + (_, Value::Catchable(c)) => return Ok(Err(*c)), + // same types + (Value::Integer(i1), Value::Integer(i2)) => i1.cmp(&i2), + (Value::Float(f1), Value::Float(f2)) => f1.total_cmp(&f2), + (Value::String(s1), Value::String(s2)) => s1.cmp(&s2), + (Value::List(l1), Value::List(l2)) => { + let max = l1.len().max(l2.len()); + for j in 0..max { + let i = max - 1 - j; + if i >= l2.len() { + vals.push(((1.into(), 0.into()), PointerEquality::ForbidAll)); + } else if i >= l1.len() { + vals.push(((0.into(), 1.into()), PointerEquality::ForbidAll)); + } else { + vals.push(((l1[i].clone(), l2[i].clone()), PointerEquality::AllowAll)); + } } + continue; } - unreachable!() - } + // different types + (Value::Integer(i1), Value::Float(f2)) => (i1 as f64).total_cmp(&f2), + (Value::Float(f1), Value::Integer(i2)) => f1.total_cmp(&(i2 as f64)), - // different types - (Value::Integer(i1), Value::Float(f2)) => Ok((*i1 as f64).partial_cmp(f2)), - (Value::Float(f1), Value::Integer(i2)) => Ok(f1.partial_cmp(&(*i2 as f64))), + // unsupported types + (lhs, rhs) => { + return Err(ErrorKind::Incomparable { + lhs: lhs.type_of(), + rhs: rhs.type_of(), + }) + } + }; + if result != Ordering::Equal { + return Ok(Ok(result)); + } + } + } - // unsupported types - (lhs, rhs) => Err(ErrorKind::Incomparable { - lhs: lhs.type_of(), - rhs: rhs.type_of(), - }), + // TODO(amjoseph): de-asyncify this (when called directly by the VM) + pub async fn force(self, co: &GenCo, span: LightSpan) -> Result<Value, ErrorKind> { + if let Value::Thunk(thunk) = self { + // TODO(amjoseph): use #[tailcall::mutual] + return Thunk::force_(thunk, co, span).await; } + Ok(self) } - /// Ensure `self` is forced if it is a thunk, and return a reference to the resulting value. - pub(crate) fn force(&self, vm: &mut VM) -> Result<ForceResult, ErrorKind> { - match self { - Self::Thunk(thunk) => { - thunk.force(vm)?; - Ok(ForceResult::ForcedThunk(thunk.value())) - } - _ => Ok(ForceResult::Immediate(self)), + // need two flavors, because async + pub async fn force_owned_genco(self, co: GenCo, span: LightSpan) -> Result<Value, ErrorKind> { + if let Value::Thunk(thunk) = self { + // TODO(amjoseph): use #[tailcall::mutual] + return Thunk::force_(thunk, &co, span).await; } + Ok(self) } - /// Ensure `self` is *deeply* forced, including all recursive sub-values - pub(crate) fn deep_force( - &self, - vm: &mut VM, - thunk_set: &mut ThunkSet, - ) -> Result<(), ErrorKind> { + /// Explain a value in a human-readable way, e.g. by presenting + /// the docstrings of functions if present. + pub fn explain(&self) -> String { match self { - Value::Null - | Value::Bool(_) - | Value::Integer(_) - | Value::Float(_) - | Value::String(_) - | Value::Path(_) - | Value::Closure(_) - | Value::Builtin(_) - | Value::AttrNotFound - | Value::Blueprint(_) - | Value::DeferredUpvalue(_) - | Value::UnresolvedPath(_) => Ok(()), - Value::Attrs(a) => { - for (_, v) in a.iter() { - v.deep_force(vm, thunk_set)?; + Value::Null => "the 'null' value".into(), + Value::Bool(b) => format!("the boolean value '{}'", b), + Value::Integer(i) => format!("the integer '{}'", i), + Value::Float(f) => format!("the float '{}'", f), + Value::String(s) if s.has_context() => format!("the contextful string '{}'", s), + Value::String(s) => format!("the contextless string '{}'", s), + Value::Path(p) => format!("the path '{}'", p.to_string_lossy()), + Value::Attrs(attrs) => format!("a {}-item attribute set", attrs.len()), + Value::List(list) => format!("a {}-item list", list.len()), + + Value::Closure(f) => { + if let Some(name) = &f.lambda.name { + format!("the user-defined Nix function '{}'", name) + } else { + "a user-defined Nix function".to_string() } - Ok(()) } - Value::List(l) => { - for val in l { - val.deep_force(vm, thunk_set)?; + + Value::Builtin(b) => { + let mut out = format!("the builtin function '{}'", b.name()); + if let Some(docs) = b.documentation() { + out.push_str("\n\n"); + out.push_str(docs); } - Ok(()) + out } - Value::Thunk(thunk) => { - if !thunk_set.insert(thunk) { - return Ok(()); - } - thunk.force(vm)?; - let value = thunk.value().clone(); - value.deep_force(vm, thunk_set) - } + // TODO: handle suspended thunks with a different explanation instead of panicking + Value::Thunk(t) => t.value().explain(), + + Value::Catchable(_) => "a catchable failure".into(), + + Value::AttrNotFound + | Value::Blueprint(_) + | Value::DeferredUpvalue(_) + | Value::UnresolvedPath(_) + | Value::Json(..) + | Value::FinaliseRequest(_) => "an internal Tvix evaluator value".into(), } } } @@ -438,6 +893,79 @@ impl Display for Value { } } +/// Emulates the C++-Nix style formatting of floats, which diverges +/// significantly from Rust's native float formatting. +fn total_fmt_float<F: std::fmt::Write>(num: f64, mut f: F) -> std::fmt::Result { + let mut buf = [b'0'; lexical_core::BUFFER_SIZE]; + let mut s = lexical_core::write_with_options::<f64, { CXX_LITERAL }>( + num, + &mut buf, + &WRITE_FLOAT_OPTIONS, + ); + + // apply some postprocessing on the buffer. If scientific + // notation is used (we see an `e`), and the next character is + // a digit, add the missing `+` sign.) + let mut new_s = Vec::with_capacity(s.len()); + + if s.contains(&b'e') { + for (i, c) in s.iter().enumerate() { + // encountered `e` + if c == &b'e' { + // next character is a digit (so no negative exponent) + if s.len() > i && s[i + 1].is_ascii_digit() { + // copy everything from the start up to (including) the e + new_s.extend_from_slice(&s[0..=i]); + // add the missing '+' + new_s.push(b'+'); + // check for the remaining characters. + // If it's only one, we need to prepend a trailing zero + if s.len() == i + 2 { + new_s.push(b'0'); + } + new_s.extend_from_slice(&s[i + 1..]); + break; + } + } + } + + // if we modified the scientific notation, flip the reference + if !new_s.is_empty() { + s = &mut new_s + } + } else if s.contains(&b'.') { + // else, if this is not scientific notation, and there's a + // decimal point, make sure we really drop trailing zeroes. + // In some cases, lexical_core doesn't. + for (i, c) in s.iter().enumerate() { + // at `.`` + if c == &b'.' { + // trim zeroes from the right side. + let frac = String::from_utf8_lossy(&s[i + 1..]); + let frac_no_trailing_zeroes = frac.trim_end_matches('0'); + + if frac.len() != frac_no_trailing_zeroes.len() { + // we managed to strip something, construct new_s + if frac_no_trailing_zeroes.is_empty() { + // if frac_no_trailing_zeroes is empty, the fractional part was all zeroes, so we can drop the decimal point as well + new_s.extend_from_slice(&s[0..=i - 1]); + } else { + // else, assemble the rest of the string + new_s.extend_from_slice(&s[0..=i]); + new_s.extend_from_slice(frac_no_trailing_zeroes.as_bytes()); + } + + // flip the reference + s = &mut new_s; + break; + } + } + } + } + + write!(f, "{}", String::from_utf8_lossy(s)) +} + impl TotalDisplay for Value { fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result { match self { @@ -449,24 +977,27 @@ impl TotalDisplay for Value { Value::Path(p) => p.display().fmt(f), Value::Attrs(attrs) => attrs.total_fmt(f, set), Value::List(list) => list.total_fmt(f, set), - Value::Closure(_) => f.write_str("lambda"), // TODO: print position + // TODO: fancy REPL display with position + Value::Closure(_) => f.write_str("<LAMBDA>"), Value::Builtin(builtin) => builtin.fmt(f), // Nix prints floats with a maximum precision of 5 digits - // only. - Value::Float(num) => { - write!(f, "{}", format!("{:.5}", num).trim_end_matches(['.', '0'])) - } + // only. Except when it decides to use scientific notation + // (with a + after the `e`, and zero-padded to 0 digits) + Value::Float(num) => total_fmt_float(*num, f), // internal types Value::AttrNotFound => f.write_str("internal[not found]"), Value::Blueprint(_) => f.write_str("internal[blueprint]"), Value::DeferredUpvalue(_) => f.write_str("internal[deferred_upvalue]"), Value::UnresolvedPath(_) => f.write_str("internal[unresolved_path]"), + Value::Json(..) => f.write_str("internal[json]"), + Value::FinaliseRequest(_) => f.write_str("internal[finaliser_sentinel]"), // Delegate thunk display to the type, as it must handle // the case of already evaluated or cyclic thunks. Value::Thunk(t) => t.total_fmt(f, set), + Value::Catchable(_) => panic!("total_fmt() called on a CatchableErrorKind"), } } } @@ -483,58 +1014,15 @@ impl From<i64> for Value { } } -impl From<PathBuf> for Value { - fn from(path: PathBuf) -> Self { - Self::Path(path) - } -} - -impl From<Vec<Value>> for Value { - fn from(val: Vec<Value>) -> Self { - Self::List(NixList::from(val)) +impl From<f64> for Value { + fn from(i: f64) -> Self { + Self::Float(i) } } -impl TryFrom<serde_json::Value> for Value { - type Error = ErrorKind; - - fn try_from(value: serde_json::Value) -> Result<Self, Self::Error> { - // TODO(grfn): Replace with a real serde::Deserialize impl (for perf) - match value { - serde_json::Value::Null => Ok(Self::Null), - serde_json::Value::Bool(b) => Ok(Self::Bool(b)), - serde_json::Value::Number(n) => { - if let Some(i) = n.as_i64() { - Ok(Self::Integer(i)) - } else if let Some(f) = n.as_f64() { - Ok(Self::Float(f)) - } else { - Err(ErrorKind::FromJsonError(format!( - "JSON number not representable as Nix value: {n}" - ))) - } - } - serde_json::Value::String(s) => Ok(s.into()), - serde_json::Value::Array(a) => Ok(a - .into_iter() - .map(Value::try_from) - .collect::<Result<Vec<_>, _>>()? - .into()), - serde_json::Value::Object(obj) => { - match (obj.len(), obj.get("name"), obj.get("value")) { - (2, Some(name), Some(value)) => Ok(Self::attrs(NixAttrs::from_kv( - name.clone().try_into()?, - value.clone().try_into()?, - ))), - _ => Ok(Self::attrs(NixAttrs::from_iter( - obj.into_iter() - .map(|(k, v)| Ok((k.into(), v.try_into()?))) - .collect::<Result<Vec<(NixString, Value)>, ErrorKind>>()? - .into_iter(), - ))), - } - } - } +impl From<PathBuf> for Value { + fn from(path: PathBuf) -> Self { + Self::Path(Box::new(path)) } } @@ -548,52 +1036,38 @@ fn type_error(expected: &'static str, actual: &Value) -> ErrorKind { #[cfg(test)] mod tests { use super::*; + use std::mem::size_of; - mod nix_eq { - use crate::observer::NoOpObserver; - - use super::*; - use proptest::prelude::ProptestConfig; - use test_strategy::proptest; - - #[proptest(ProptestConfig { cases: 5, ..Default::default() })] - fn reflexive(x: Value) { - let mut observer = NoOpObserver {}; - let mut vm = VM::new(Default::default(), &mut observer); - - assert!(x.nix_eq(&x, &mut vm).unwrap()) - } - - #[proptest(ProptestConfig { cases: 5, ..Default::default() })] - fn symmetric(x: Value, y: Value) { - let mut observer = NoOpObserver {}; - let mut vm = VM::new(Default::default(), &mut observer); - - assert_eq!( - x.nix_eq(&y, &mut vm).unwrap(), - y.nix_eq(&x, &mut vm).unwrap() - ) - } + #[test] + fn size() { + assert_eq!(size_of::<Value>(), 16); + } - #[proptest(ProptestConfig { cases: 5, ..Default::default() })] - fn transitive(x: Value, y: Value, z: Value) { - let mut observer = NoOpObserver {}; - let mut vm = VM::new(Default::default(), &mut observer); - - if x.nix_eq(&y, &mut vm).unwrap() && y.nix_eq(&z, &mut vm).unwrap() { - assert!(x.nix_eq(&z, &mut vm).unwrap()) - } - } + mod floats { + use crate::value::total_fmt_float; #[test] - fn list_int_float_fungibility() { - let mut observer = NoOpObserver {}; - let mut vm = VM::new(Default::default(), &mut observer); - - let v1 = Value::List(NixList::from(vec![Value::Integer(1)])); - let v2 = Value::List(NixList::from(vec![Value::Float(1.0)])); - - assert!(v1.nix_eq(&v2, &mut vm).unwrap()) + fn format_float() { + let ff = [ + (0f64, "0"), + (1.0f64, "1"), + (-0.01, "-0.01"), + (5e+22, "5e+22"), + (1e6, "1e+06"), + (-2E-2, "-0.02"), + (6.626e-34, "6.626e-34"), + (9_224_617.445_991_227, "9.22462e+06"), + ]; + for (n, expected) in ff.iter() { + let mut buf = String::new(); + let res = total_fmt_float(*n, &mut buf); + assert!(res.is_ok()); + assert_eq!( + expected, &buf, + "{} should be formatted as {}, but got {}", + n, expected, &buf + ); + } } } } diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index 66697a7f2f..4f869eb00d 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -1,58 +1,535 @@ -//! This module implements Nix language strings and their different -//! backing implementations. +//! This module implements Nix language strings. +//! +//! Nix language strings never need to be modified on the language +//! level, allowing us to shave off some memory overhead and only +//! paying the cost when creating new strings. +use bstr::{BStr, BString, ByteSlice, Chars}; use rnix::ast; -use smol_str::SmolStr; -use std::ffi::OsStr; +use std::alloc::{alloc, dealloc, handle_alloc_error, Layout}; +use std::borrow::{Borrow, Cow}; +use std::collections::HashSet; +use std::ffi::c_void; +use std::fmt::{self, Debug, Display}; use std::hash::Hash; use std::ops::Deref; -use std::path::Path; -use std::{borrow::Cow, fmt::Display, str::Chars}; - -#[derive(Clone, Debug)] -enum StringRepr { - Smol(SmolStr), - Heap(String), +use std::ptr::{self, NonNull}; +use std::slice; + +use serde::de::{Deserializer, Visitor}; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Debug, Serialize, Hash, PartialEq, Eq)] +pub enum NixContextElement { + /// A plain store path (e.g. source files copied to the store) + Plain(String), + + /// Single output of a derivation, represented by its name and its derivation path. + Single { name: String, derivation: String }, + + /// A reference to a complete derivation + /// including its source and its binary closure. + /// It is used for the `drvPath` attribute context. + /// The referred string is the store path to + /// the derivation path. + Derivation(String), } +/// Nix context strings representation in Tvix. This tracks a set of different kinds of string +/// dependencies that we can come across during manipulation of our language primitives, mostly +/// strings. There's some simple algebra of context strings and how they propagate w.r.t. primitive +/// operations, e.g. concatenation, interpolation and other string operations. #[repr(transparent)] -#[derive(Clone, Debug)] -pub struct NixString(StringRepr); +#[derive(Clone, Debug, Serialize, Default)] +pub struct NixContext(HashSet<NixContextElement>); + +impl From<NixContextElement> for NixContext { + fn from(value: NixContextElement) -> Self { + Self([value].into()) + } +} + +impl From<HashSet<NixContextElement>> for NixContext { + fn from(value: HashSet<NixContextElement>) -> Self { + Self(value) + } +} + +impl<const N: usize> From<[NixContextElement; N]> for NixContext { + fn from(value: [NixContextElement; N]) -> Self { + Self(HashSet::from(value)) + } +} + +impl NixContext { + /// Creates an empty context that can be populated + /// and passed to form a contextful [NixString], albeit + /// if the context is concretly empty, the resulting [NixString] + /// will be contextless. + pub fn new() -> Self { + Self::default() + } + + /// For internal consumers, we let people observe + /// if the [NixContext] is actually empty or not + /// to decide whether they want to skip the allocation + /// of a full blown [HashSet]. + pub(crate) fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Consumes a new [NixContextElement] and add it if not already + /// present in this context. + pub fn append(mut self, other: NixContextElement) -> Self { + self.0.insert(other); + self + } + + /// Extends the existing context with more context elements. + pub fn extend<T>(&mut self, iter: T) + where + T: IntoIterator<Item = NixContextElement>, + { + self.0.extend(iter) + } + + /// Copies from another [NixString] its context strings + /// in this context. + pub fn mimic(&mut self, other: &NixString) { + if let Some(context) = other.context() { + self.extend(context.iter().cloned()); + } + } + + /// Iterates over "plain" context elements, e.g. sources imported + /// in the store without more information, i.e. `toFile` or coerced imported paths. + /// It yields paths to the store. + pub fn iter_plain(&self) -> impl Iterator<Item = &str> { + self.iter().filter_map(|elt| { + if let NixContextElement::Plain(s) = elt { + Some(s.as_str()) + } else { + None + } + }) + } + + /// Iterates over "full derivations" context elements, e.g. something + /// referring to their `drvPath`, i.e. their full sources and binary closure. + /// It yields derivation paths. + pub fn iter_derivation(&self) -> impl Iterator<Item = &str> { + self.iter().filter_map(|elt| { + if let NixContextElement::Derivation(s) = elt { + Some(s.as_str()) + } else { + None + } + }) + } + + /// Iterates over "single" context elements, e.g. single derived paths, + /// or also known as the single output of a given derivation. + /// The first element of the tuple is the output name + /// and the second element is the derivation path. + pub fn iter_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { + self.iter().filter_map(|elt| { + if let NixContextElement::Single { name, derivation } = elt { + Some((name.as_str(), derivation.as_str())) + } else { + None + } + }) + } + + /// Iterates over any element of the context. + pub fn iter(&self) -> impl Iterator<Item = &NixContextElement> { + self.0.iter() + } + + /// Produces a list of owned references to this current context, + /// no matter its type. + pub fn to_owned_references(self) -> Vec<String> { + self.0 + .into_iter() + .map(|ctx| match ctx { + NixContextElement::Derivation(drv_path) => drv_path, + NixContextElement::Plain(store_path) => store_path, + NixContextElement::Single { derivation, .. } => derivation, + }) + .collect() + } +} + +impl IntoIterator for NixContext { + type Item = NixContextElement; + + type IntoIter = std::collections::hash_set::IntoIter<NixContextElement>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +/// This type is never instantiated, but serves to document the memory layout of the actual heap +/// allocation for Nix strings. +#[allow(dead_code)] +struct NixStringInner { + /// The string context, if any. Note that this is boxed to take advantage of the null pointer + /// niche, otherwise this field ends up being very large: + /// + /// ```notrust + /// >> std::mem::size_of::<Option<HashSet<String>>>() + /// 48 + /// + /// >> std::mem::size_of::<Option<Box<HashSet<String>>>>() + /// 8 + /// ``` + context: Option<Box<NixContext>>, + /// The length of the data, stored *inline in the allocation* + length: usize, + /// The actual data for the string itself. Will always be `length` bytes long + data: [u8], +} + +#[allow(clippy::zst_offset)] +impl NixStringInner { + /// Construct a [`Layout`] for a nix string allocation with the given length. + /// + /// Returns a tuple of: + /// 1. The layout itself. + /// 2. The offset of [`Self::length`] within the allocation, assuming the allocation starts at 0 + /// 3. The offset of the data array within the allocation, assuming the allocation starts at 0 + fn layout(len: usize) -> (Layout, usize, usize) { + let layout = Layout::new::<Option<Box<NixContext>>>(); + let (layout, len_offset) = layout.extend(Layout::new::<usize>()).unwrap(); + let (layout, data_offset) = layout.extend(Layout::array::<u8>(len).unwrap()).unwrap(); + (layout, len_offset, data_offset) + } + + /// Returns the [`Layout`] for an *already-allocated* nix string, loading the length from the + /// pointer. + /// + /// Returns a tuple of: + /// 1. The layout itself. + /// 2. The offset of [`Self::length`] within the allocation, assuming the allocation starts at 0 + /// 3. The offset of the data array within the allocation, assuming the allocation starts at 0 + /// + /// # Safety + /// + /// This function must only be called on a pointer that has been properly initialized with + /// [`Self::alloc`]. The data buffer may not necessarily be initialized + unsafe fn layout_of(this: NonNull<c_void>) -> (Layout, usize, usize) { + let layout = Layout::new::<Option<Box<NixContext>>>(); + let (_, len_offset) = layout.extend(Layout::new::<usize>()).unwrap(); + // SAFETY: Layouts are linear, so even though we haven't involved data at all yet, we know + // the len_offset is a valid offset into the second field of the allocation + let len = *(this.as_ptr().add(len_offset) as *const usize); + Self::layout(len) + } + + /// Allocate an *uninitialized* nix string with the given length. Writes the length to the + /// length value in the pointer, but leaves both context and data uninitialized + /// + /// This function is safe to call (as constructing pointers of any sort of validity is always + /// safe in Rust) but it is unsafe to use the resulting pointer to do anything other than + /// + /// 1. Read the length + /// 2. Write the context + /// 3. Write the data + /// + /// until the string is fully initialized + fn alloc(len: usize) -> NonNull<c_void> { + let (layout, len_offset, _data_offset) = Self::layout(len); + debug_assert_ne!(layout.size(), 0); + unsafe { + // SAFETY: Layout has non-zero size, since the layout of the context and the + // layout of the len both have non-zero size + let ptr = alloc(layout); + + if let Some(this) = NonNull::new(ptr as *mut _) { + // SAFETY: We've allocated with a layout that causes the len_offset to be in-bounds + // and writeable, and if the allocation succeeded it won't wrap + ((this.as_ptr() as *mut u8).add(len_offset) as *mut usize).write(len); + debug_assert_eq!(Self::len(this), len); + this + } else { + handle_alloc_error(layout); + } + } + } + + /// Deallocate the Nix string at the given pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn dealloc(this: NonNull<c_void>) { + let (layout, _, _) = Self::layout_of(this); + // SAFETY: okay because of the safety guarantees of this method + dealloc(this.as_ptr() as *mut u8, layout) + } + + /// Return the length of the Nix string at the given pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn len(this: NonNull<c_void>) -> usize { + let (_, len_offset, _) = Self::layout_of(this); + // SAFETY: As long as the safety guarantees of this method are upheld, we've allocated with + // a layout that causes the len_offset to be in-bounds and writeable, and if the allocation + // succeeded it won't wrap + *(this.as_ptr().add(len_offset) as *const usize) + } + + /// Return a pointer to the context value within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn context_ptr(this: NonNull<c_void>) -> *mut Option<Box<NixContext>> { + // SAFETY: The context is the first field in the layout of the allocation + this.as_ptr() as *mut Option<Box<NixContext>> + } + + /// Construct a shared reference to the context value within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the context has been properly initialized (by writing to the + /// pointer returned from [`Self::context_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`NonNull::as_ref`] for more. + unsafe fn context_ref<'a>(this: NonNull<c_void>) -> &'a Option<Box<NixContext>> { + Self::context_ptr(this).as_ref().unwrap() + } + + /// Construct a mutable reference to the context value within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the context has been properly initialized (by writing to the + /// pointer returned from [`Self::context_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`NonNull::as_mut`] for more. + unsafe fn context_mut<'a>(this: NonNull<c_void>) -> &'a mut Option<Box<NixContext>> { + Self::context_ptr(this).as_mut().unwrap() + } + + /// Return a pointer to the data array within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn data_ptr(this: NonNull<c_void>) -> *mut u8 { + let (_, _, data_offset) = Self::layout_of(this); + // SAFETY: data is the third field in the layout of the allocation + this.as_ptr().add(data_offset) as *mut u8 + } + + /// Construct a shared reference to the data slice within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the data array has been properly initialized (by writing to the + /// pointer returned from [`Self::data_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`slice::from_raw_parts`] for more. + unsafe fn data_slice<'a>(this: NonNull<c_void>) -> &'a [u8] { + let len = Self::len(this); + let data = Self::data_ptr(this); + slice::from_raw_parts(data, len) + } + + /// Construct a mutable reference to the data slice within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the data array has been properly initialized (by writing to the + /// pointer returned from [`Self::data_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`slice::from_raw_parts_mut`] for more. + #[allow(dead_code)] + unsafe fn data_slice_mut<'a>(this: NonNull<c_void>) -> &'a mut [u8] { + let len = Self::len(this); + let data = Self::data_ptr(this); + slice::from_raw_parts_mut(data, len) + } + + /// Clone the Nix string pointed to by this pointer, and return a pointer to a new Nix string + /// containing the same data and context. + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the context has been properly initialized (by writing to the + /// pointer returned from [`Self::context_ptr`]), and the data array has been properly + /// initialized (by writing to the pointer returned from [`Self::data_ptr`]). + unsafe fn clone(this: NonNull<c_void>) -> NonNull<c_void> { + let (layout, _, _) = Self::layout_of(this); + let ptr = alloc(layout); + if let Some(new) = NonNull::new(ptr as *mut _) { + ptr::copy_nonoverlapping(this.as_ptr(), new.as_ptr(), layout.size()); + Self::context_ptr(new).write(Self::context_ref(this).clone()); + new + } else { + handle_alloc_error(layout); + } + } +} + +/// Nix string values +/// +/// # Internals +/// +/// For performance reasons (to keep allocations small, and to avoid indirections), [`NixString`] is +/// represented as a single *thin* pointer to a packed data structure containing the +/// [context][NixContext] and the string data itself (which is a raw byte array, to match the Nix +/// string semantics that allow any array of bytes to be represented by a string). + +/// This memory representation is documented in [`NixStringInner`], but since Rust prefers to deal +/// with slices via *fat pointers* (pointers that include the length in the *pointer*, not in the +/// heap allocation), we have to do mostly manual layout management and allocation for this +/// representation. See the documentation for the methods of [`NixStringInner`] for more information +pub struct NixString(NonNull<c_void>); + +unsafe impl Send for NixString {} +unsafe impl Sync for NixString {} + +impl Drop for NixString { + fn drop(&mut self) { + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of dealloc + unsafe { + NixStringInner::dealloc(self.0); + } + } +} + +impl Clone for NixString { + fn clone(&self) -> Self { + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of clone + unsafe { Self(NixStringInner::clone(self.0)) } + } +} + +impl Debug for NixString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(ctx) = self.context() { + f.debug_struct("NixString") + .field("context", ctx) + .field("data", &self.as_bstr()) + .finish() + } else { + write!(f, "{:?}", self.as_bstr()) + } + } +} impl PartialEq for NixString { fn eq(&self, other: &Self) -> bool { - self.as_str() == other.as_str() + self.as_bstr() == other.as_bstr() } } impl Eq for NixString {} +impl PartialEq<&[u8]> for NixString { + fn eq(&self, other: &&[u8]) -> bool { + **self == **other + } +} + +impl PartialEq<&str> for NixString { + fn eq(&self, other: &&str) -> bool { + **self == other.as_bytes() + } +} + impl PartialOrd for NixString { fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - self.as_str().partial_cmp(other.as_str()) + Some(self.cmp(other)) } } impl Ord for NixString { fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.as_str().cmp(other.as_str()) + self.as_bstr().cmp(other.as_bstr()) + } +} + +impl From<Box<BStr>> for NixString { + fn from(value: Box<BStr>) -> Self { + Self::new(&value, None) + } +} + +impl From<BString> for NixString { + fn from(value: BString) -> Self { + Self::new(&value, None) + } +} + +impl From<&BStr> for NixString { + fn from(value: &BStr) -> Self { + value.to_owned().into() + } +} + +impl From<&[u8]> for NixString { + fn from(value: &[u8]) -> Self { + Self::from(value.to_owned()) + } +} + +impl From<Vec<u8>> for NixString { + fn from(value: Vec<u8>) -> Self { + value.into_boxed_slice().into() + } +} + +impl From<Box<[u8]>> for NixString { + fn from(value: Box<[u8]>) -> Self { + Self::new(&value, None) } } impl From<&str> for NixString { fn from(s: &str) -> Self { - NixString(StringRepr::Smol(SmolStr::new(s))) + s.as_bytes().into() } } impl From<String> for NixString { fn from(s: String) -> Self { - NixString(StringRepr::Heap(s)) + s.into_bytes().into() } } -impl From<SmolStr> for NixString { - fn from(s: SmolStr) -> Self { - NixString(StringRepr::Smol(s)) +impl<T> From<(T, Option<Box<NixContext>>)> for NixString +where + NixString: From<T>, +{ + fn from((s, ctx): (T, Option<Box<NixContext>>)) -> Self { + Self::new(NixString::from(s).as_ref(), ctx) + } +} + +impl From<Box<str>> for NixString { + fn from(s: Box<str>) -> Self { + s.into_boxed_bytes().into() } } @@ -62,9 +539,76 @@ impl From<ast::Ident> for NixString { } } +impl<'a> From<&'a NixString> for &'a BStr { + fn from(s: &'a NixString) -> Self { + s.as_bstr() + } +} + +// No impl From<NixString> for String, that one quotes. + +impl From<NixString> for BString { + fn from(s: NixString) -> Self { + s.as_bstr().to_owned() + } +} + +impl AsRef<[u8]> for NixString { + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl Borrow<BStr> for NixString { + fn borrow(&self) -> &BStr { + self.as_bstr() + } +} + impl Hash for NixString { fn hash<H: std::hash::Hasher>(&self, state: &mut H) { - self.as_str().hash(state) + self.as_bstr().hash(state) + } +} + +impl<'de> Deserialize<'de> for NixString { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct StringVisitor; + + impl<'de> Visitor<'de> for StringVisitor { + type Value = NixString; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a valid Nix string") + } + + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(v.into()) + } + + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(v.into()) + } + } + + deserializer.deserialize_string(StringVisitor) + } +} + +impl Deref for NixString { + type Target = BStr; + + fn deref(&self) -> &Self::Target { + self.as_bstr() } } @@ -72,7 +616,6 @@ impl Hash for NixString { mod arbitrary { use super::*; use proptest::prelude::{any_with, Arbitrary}; - use proptest::prop_oneof; use proptest::strategy::{BoxedStrategy, Strategy}; impl Arbitrary for NixString { @@ -81,29 +624,72 @@ mod arbitrary { type Strategy = BoxedStrategy<Self>; fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { - prop_oneof![ - // Either generate `StringRepr::Heap`... - any_with::<String>(args).prop_map(Self::from), - // ...or generate `StringRepr::Smol` (which `impl From<&str> for NixString` returns) - any_with::<String>(args).prop_map(|s| Self::from(s.as_str())), - ] - .boxed() + any_with::<String>(args).prop_map(Self::from).boxed() } } } impl NixString { - pub const NAME: Self = NixString(StringRepr::Smol(SmolStr::new_inline("name"))); - pub const NAME_REF: &'static Self = &Self::NAME; + fn new(contents: &[u8], context: Option<Box<NixContext>>) -> Self { + debug_assert!( + !context.as_deref().is_some_and(NixContext::is_empty), + "BUG: initialized with empty context" + ); + + // SAFETY: We're always fully initializing a NixString here: + // + // 1. NixStringInner::alloc sets up the len for us + // 2. We set the context, using ptr::write to make sure that the uninitialized memory isn't + // read or dropped + // 3. We set the data, using copy_from_nonoverlapping to make sure that the uninitialized + // memory isn't read or dropped + // + // Only *then* can we construct a NixString + unsafe { + let inner = NixStringInner::alloc(contents.len()); + NixStringInner::context_ptr(inner).write(context); + NixStringInner::data_ptr(inner) + .copy_from_nonoverlapping(contents.as_ptr(), contents.len()); + Self(inner) + } + } - pub const VALUE: Self = NixString(StringRepr::Smol(SmolStr::new_inline("value"))); - pub const VALUE_REF: &'static Self = &Self::VALUE; + pub fn new_inherit_context_from<T>(other: &NixString, new_contents: T) -> Self + where + NixString: From<T>, + { + Self::new( + Self::from(new_contents).as_ref(), + other.context().map(|c| Box::new(c.clone())), + ) + } - pub fn as_str(&self) -> &str { - match &self.0 { - StringRepr::Smol(s) => s.as_str(), - StringRepr::Heap(s) => s, - } + pub fn new_context_from<T>(context: NixContext, contents: T) -> Self + where + NixString: From<T>, + { + Self::new( + Self::from(contents).as_ref(), + if context.is_empty() { + None + } else { + Some(Box::new(context)) + }, + ) + } + + pub fn as_bstr(&self) -> &BStr { + BStr::new(self.as_bytes()) + } + + pub fn as_bytes(&self) -> &[u8] { + // SAFETY: There's no way to construct an uninitialized NixString (see the SAFETY comment in + // `new`) + unsafe { NixStringInner::data_slice(self.0) } + } + + pub fn into_bstring(self) -> BString { + self.as_bstr().to_owned() } /// Return a displayable representation of the string as an @@ -113,13 +699,15 @@ impl NixString { /// set keys, as those are only escaped in the presence of special /// characters. pub fn ident_str(&self) -> Cow<str> { - let escaped = nix_escape_string(self.as_str()); - + let escaped = match self.to_str_lossy() { + Cow::Borrowed(s) => nix_escape_string(s), + Cow::Owned(s) => nix_escape_string(&s).into_owned().into(), + }; match escaped { // A borrowed string is unchanged and can be returned as // is. Cow::Borrowed(_) => { - if is_valid_nix_identifier(&escaped) { + if is_valid_nix_identifier(&escaped) && !is_keyword(&escaped) { escaped } else { Cow::Owned(format!("\"{}\"", escaped)) @@ -133,16 +721,91 @@ impl NixString { } pub fn concat(&self, other: &Self) -> Self { - let mut s = self.as_str().to_owned(); - s.push_str(other.as_str()); - NixString(StringRepr::Heap(s)) + let mut s = self.to_vec(); + s.extend(&(***other)); + + let context = [self.context(), other.context()] + .into_iter() + .flatten() + .fold(NixContext::new(), |mut acc_ctx, new_ctx| { + // TODO: consume new_ctx? + acc_ctx.extend(new_ctx.iter().cloned()); + acc_ctx + }); + Self::new_context_from(context, s) + } + + pub(crate) fn context(&self) -> Option<&NixContext> { + // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY + // comment in `new`). + // + // Also, we're using the same lifetime and mutability as self, to fit the + // pointer-to-reference conversion rules + let context = unsafe { NixStringInner::context_ref(self.0).as_deref() }; + + debug_assert!( + !context.is_some_and(NixContext::is_empty), + "BUG: empty context" + ); + + context + } + + pub(crate) fn context_mut(&mut self) -> &mut Option<Box<NixContext>> { + // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY + // comment in `new`). + // + // Also, we're using the same lifetime and mutability as self, to fit the + // pointer-to-reference conversion rules + let context = unsafe { NixStringInner::context_mut(self.0) }; + + debug_assert!( + !context.as_deref().is_some_and(NixContext::is_empty), + "BUG: empty context" + ); + + context + } + + pub fn iter_context(&self) -> impl Iterator<Item = &NixContext> { + self.context().into_iter() + } + + pub fn iter_plain(&self) -> impl Iterator<Item = &str> { + self.iter_context().flat_map(|context| context.iter_plain()) + } + + pub fn iter_derivation(&self) -> impl Iterator<Item = &str> { + return self + .iter_context() + .flat_map(|context| context.iter_derivation()); + } + + pub fn iter_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { + return self + .iter_context() + .flat_map(|context| context.iter_single_outputs()); + } + + /// Returns whether this Nix string possess a context or not. + pub fn has_context(&self) -> bool { + self.context().is_some() + } + + /// This clears the context of the string, returning + /// the removed dependency tracking information. + pub fn take_context(&mut self) -> Option<Box<NixContext>> { + self.context_mut().take() + } + + /// This clears the context of that string, losing + /// all dependency tracking information. + pub fn clear_context(&mut self) { + let _ = self.take_context(); } pub fn chars(&self) -> Chars<'_> { - match &self.0 { - StringRepr::Heap(h) => h.chars(), - StringRepr::Smol(s) => s.chars(), - } + self.as_bstr().chars() } } @@ -158,6 +821,17 @@ fn nix_escape_char(ch: char, next: Option<&char>) -> Option<&'static str> { } } +/// Return true if this string is a keyword -- character strings +/// which lexically match the "identifier" production but are not +/// parsed as identifiers. See also cppnix commit +/// b72bc4a972fe568744d98b89d63adcd504cb586c. +fn is_keyword(s: &str) -> bool { + matches!( + s, + "if" | "then" | "else" | "assert" | "with" | "let" | "in" | "rec" | "inherit" + ) +} + /// Return true if this string can be used as an identifier in Nix. fn is_valid_nix_identifier(s: &str) -> bool { // adapted from rnix-parser's tokenizer.rs @@ -168,11 +842,11 @@ fn is_valid_nix_identifier(s: &str) -> bool { } for c in chars { match c { - 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' => (), + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => (), _ => return false, } } - return true; + true } /// Escape a Nix string for display, as most user-visible representation @@ -180,7 +854,7 @@ fn is_valid_nix_identifier(s: &str) -> bool { /// /// Note that this does not add the outer pair of surrounding quotes. fn nix_escape_string(input: &str) -> Cow<str> { - let mut iter = input.chars().enumerate().peekable(); + let mut iter = input.char_indices().peekable(); while let Some((i, c)) = iter.next() { if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) { @@ -188,6 +862,10 @@ fn nix_escape_string(input: &str) -> Cow<str> { escaped.push_str(&input[..i]); escaped.push_str(esc); + // In theory we calculate how many bytes it takes to represent `esc` + // in UTF-8 and use that for the offset. It is, however, safe to + // assume that to be 1, as all characters that can be escaped in a + // Nix string are ASCII. let mut inner_iter = input[i + 1..].chars().peekable(); while let Some(c) = inner_iter.next() { match nix_escape_char(c, inner_iter.peek()) { @@ -206,42 +884,28 @@ fn nix_escape_string(input: &str) -> Cow<str> { impl Display for NixString { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_str("\"")?; - f.write_str(&nix_escape_string(self.as_str()))?; + f.write_str(&nix_escape_string(&self.to_str_lossy()))?; f.write_str("\"") } } -impl AsRef<str> for NixString { - fn as_ref(&self) -> &str { - self.as_str() - } -} - -impl AsRef<OsStr> for NixString { - fn as_ref(&self) -> &OsStr { - self.as_str().as_ref() - } -} +#[cfg(all(test, feature = "arbitrary"))] +mod tests { + use test_strategy::proptest; -impl AsRef<Path> for NixString { - fn as_ref(&self) -> &Path { - self.as_str().as_ref() - } -} + use super::*; -impl Deref for NixString { - type Target = str; + use crate::properties::{eq_laws, hash_laws, ord_laws}; - fn deref(&self) -> &Self::Target { - self.as_str() + #[test] + fn size() { + assert_eq!(std::mem::size_of::<NixString>(), 8); } -} -#[cfg(test)] -mod tests { - use super::*; - - use crate::properties::{eq_laws, hash_laws, ord_laws}; + #[proptest] + fn clone_strings(s: NixString) { + drop(s.clone()) + } eq_laws!(NixString); hash_laws!(NixString); diff --git a/tvix/eval/src/value/thunk.rs b/tvix/eval/src/value/thunk.rs index 0d4c26bab4..a67537f945 100644 --- a/tvix/eval/src/value/thunk.rs +++ b/tvix/eval/src/value/thunk.rs @@ -21,44 +21,111 @@ use std::{ cell::{Ref, RefCell, RefMut}, collections::HashSet, + fmt::Debug, rc::Rc, }; -use codemap::Span; - use crate::{ - errors::{Error, ErrorKind}, + errors::ErrorKind, + opcode::OpCode, + spans::LightSpan, upvalues::Upvalues, value::Closure, - vm::VM, + vm::generators::{self, GenCo}, Value, }; use super::{Lambda, TotalDisplay}; +use codemap::Span; + +/// Internal representation of a suspended native thunk. +struct SuspendedNative(Box<dyn Fn() -> Result<Value, ErrorKind>>); + +impl Debug for SuspendedNative { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "SuspendedNative({:p})", self.0) + } +} /// Internal representation of the different states of a thunk. /// /// Upvalues must be finalised before leaving the initial state /// (Suspended or RecursiveClosure). The [`value()`] function may /// not be called until the thunk is in the final state (Evaluated). -#[derive(Clone, Debug)] +#[derive(Debug)] enum ThunkRepr { /// Thunk is closed over some values, suspended and awaiting /// execution. Suspended { lambda: Rc<Lambda>, - upvalues: Upvalues, - span: Span, + upvalues: Rc<Upvalues>, + light_span: LightSpan, }, + /// Thunk is a suspended native computation. + Native(SuspendedNative), + /// Thunk currently under-evaluation; encountering a blackhole /// value means that infinite recursion has occured. - Blackhole, + Blackhole { + /// Span at which the thunk was first forced. + forced_at: LightSpan, + + /// Span at which the thunk was originally suspended. + suspended_at: Option<LightSpan>, + /// Span of the first instruction of the actual code inside + /// the thunk. + content_span: Option<Span>, + }, + + // TODO(amjoseph): consider changing `Value` to `Rc<Value>` to avoid + // expensive clone()s in Thunk::force(). /// Fully evaluated thunk. Evaluated(Value), } +impl ThunkRepr { + fn debug_repr(&self) -> String { + match self { + ThunkRepr::Evaluated(v) => format!("thunk(val|{})", v), + ThunkRepr::Blackhole { .. } => "thunk(blackhole)".to_string(), + ThunkRepr::Native(_) => "thunk(native)".to_string(), + ThunkRepr::Suspended { lambda, .. } => format!("thunk({:p})", *lambda), + } + } + + /// Return the Value within a fully-evaluated ThunkRepr; panics + /// if the thunk is not fully-evaluated. + fn expect(self) -> Value { + match self { + ThunkRepr::Evaluated(value) => value, + ThunkRepr::Blackhole { .. } => panic!("Thunk::expect() called on a black-holed thunk"), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) => { + panic!("Thunk::expect() called on a suspended thunk") + } + } + } + + fn expect_ref(&self) -> &Value { + match self { + ThunkRepr::Evaluated(value) => value, + ThunkRepr::Blackhole { .. } => panic!("Thunk::expect() called on a black-holed thunk"), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) => { + panic!("Thunk::expect() called on a suspended thunk") + } + } + } + + pub fn is_forced(&self) -> bool { + match self { + ThunkRepr::Evaluated(Value::Thunk(_)) => false, + ThunkRepr::Evaluated(_) => true, + _ => false, + } + } +} + /// A thunk is created for any value which requires non-strict /// evaluation due to self-reference or lazy semantics (or both). /// Every reference cycle involving `Value`s will contain at least @@ -69,74 +136,200 @@ pub struct Thunk(Rc<RefCell<ThunkRepr>>); impl Thunk { pub fn new_closure(lambda: Rc<Lambda>) -> Self { Thunk(Rc::new(RefCell::new(ThunkRepr::Evaluated(Value::Closure( - Closure { + Rc::new(Closure { upvalues: Rc::new(Upvalues::with_capacity(lambda.upvalue_count)), lambda: lambda.clone(), - #[cfg(debug_assertions)] - is_finalised: false, - }, + }), ))))) } - pub fn new_suspended(lambda: Rc<Lambda>, span: Span) -> Self { + pub fn new_suspended(lambda: Rc<Lambda>, light_span: LightSpan) -> Self { Thunk(Rc::new(RefCell::new(ThunkRepr::Suspended { - upvalues: Upvalues::with_capacity(lambda.upvalue_count), + upvalues: Rc::new(Upvalues::with_capacity(lambda.upvalue_count)), lambda: lambda.clone(), - span, + light_span, + }))) + } + + pub fn new_suspended_native(native: Box<dyn Fn() -> Result<Value, ErrorKind>>) -> Self { + Thunk(Rc::new(RefCell::new(ThunkRepr::Native(SuspendedNative( + native, + ))))) + } + + /// Helper function to create a [`Thunk`] that calls a function given as the + /// [`Value`] `callee` with the argument `arg` when it is forced. This is + /// particularly useful in builtin implementations if the result of calling + /// a function does not need to be forced immediately, because e.g. it is + /// stored in an attribute set. + pub fn new_suspended_call(callee: Value, arg: Value, light_span: LightSpan) -> Self { + let mut lambda = Lambda::default(); + let span = light_span.span(); + + let arg_idx = lambda.chunk().push_constant(arg); + let f_idx = lambda.chunk().push_constant(callee); + + // This is basically a recreation of compile_apply(): + // We need to push the argument onto the stack and then the function. + // The function (not the argument) needs to be forced before calling. + lambda.chunk.push_op(OpCode::OpConstant(arg_idx), span); + lambda.chunk().push_op(OpCode::OpConstant(f_idx), span); + lambda.chunk.push_op(OpCode::OpForce, span); + lambda.chunk.push_op(OpCode::OpCall, span); + + // Inform the VM that the chunk has ended + lambda.chunk.push_op(OpCode::OpReturn, span); + + Thunk(Rc::new(RefCell::new(ThunkRepr::Suspended { + upvalues: Rc::new(Upvalues::with_capacity(0)), + lambda: Rc::new(lambda), + light_span, }))) } - /// Evaluate the content of a thunk, potentially repeatedly, until a - /// non-thunk value is returned. - /// - /// This will change the existing thunk (and thus all references to it, - /// providing memoization) through interior mutability. In case of nested - /// thunks, the intermediate thunk representations are replaced. - pub fn force(&self, vm: &mut VM) -> Result<(), ErrorKind> { + fn prepare_blackhole(&self, forced_at: LightSpan) -> ThunkRepr { + match &*self.0.borrow() { + ThunkRepr::Suspended { + light_span, lambda, .. + } => ThunkRepr::Blackhole { + forced_at, + suspended_at: Some(light_span.clone()), + content_span: Some(lambda.chunk.first_span()), + }, + + _ => ThunkRepr::Blackhole { + forced_at, + suspended_at: None, + content_span: None, + }, + } + } + + pub async fn force(myself: Thunk, co: GenCo, span: LightSpan) -> Result<Value, ErrorKind> { + Self::force_(myself, &co, span).await + } + pub async fn force_( + mut myself: Thunk, + co: &GenCo, + span: LightSpan, + ) -> Result<Value, ErrorKind> { + // This vector of "thunks which point to the thunk-being-forced", to + // be updated along with it, is necessary in order to write this + // function in iterative (and later, mutual-tail-call) form. + let mut also_update: Vec<Rc<RefCell<ThunkRepr>>> = vec![]; + loop { - let mut thunk_mut = self.0.borrow_mut(); + // If the current thunk is already fully evaluated, return its evaluated + // value. The VM will continue running the code that landed us here. + if myself.is_forced() { + let val = myself.unwrap_or_clone(); + for other_thunk in also_update.into_iter() { + other_thunk.replace(ThunkRepr::Evaluated(val.clone())); + } + return Ok(val); + } - match *thunk_mut { - ThunkRepr::Evaluated(Value::Thunk(ref inner_thunk)) => { - let inner_repr = inner_thunk.0.borrow().clone(); - *thunk_mut = inner_repr; + // Begin evaluation of this thunk by marking it as a blackhole, meaning + // that any other forcing frame encountering this thunk before its + // evaluation is completed detected an evaluation cycle. + let inner = myself.0.replace(myself.prepare_blackhole(span.clone())); + + match inner { + // If there was already a blackhole in the thunk, this is an + // evaluation cycle. + ThunkRepr::Blackhole { + forced_at, + suspended_at, + content_span, + } => { + return Err(ErrorKind::InfiniteRecursion { + first_force: forced_at.span(), + suspended_at: suspended_at.map(|s| s.span()), + content_span, + }) } - ThunkRepr::Evaluated(_) => return Ok(()), - ThunkRepr::Blackhole => return Err(ErrorKind::InfiniteRecursion), - - ThunkRepr::Suspended { .. } => { - if let ThunkRepr::Suspended { - lambda, - upvalues, - span, - } = std::mem::replace(&mut *thunk_mut, ThunkRepr::Blackhole) - { - drop(thunk_mut); - vm.enter_frame(lambda, upvalues, 0) - .map_err(|e| ErrorKind::ThunkForce(Box::new(Error { span, ..e })))?; - (*self.0.borrow_mut()) = ThunkRepr::Evaluated(vm.pop()) + // If there is a native function stored in the thunk, evaluate it + // and replace this thunk's representation with the result. + ThunkRepr::Native(native) => { + let value = native.0()?; + myself.0.replace(ThunkRepr::Evaluated(value)); + continue; + } + + // When encountering a suspended thunk, request that the VM enters + // it and produces the result. + ThunkRepr::Suspended { + lambda, + upvalues, + light_span, + } => { + // TODO(amjoseph): use #[tailcall::mutual] here. This can + // be turned into a tailcall to vm::execute_bytecode() by + // passing `also_update` to it. + let value = + generators::request_enter_lambda(co, lambda, upvalues, light_span).await; + myself.0.replace(ThunkRepr::Evaluated(value)); + continue; + } + + // nested thunks -- try to flatten before forcing + ThunkRepr::Evaluated(Value::Thunk(inner_thunk)) => { + match Rc::try_unwrap(inner_thunk.0) { + Ok(refcell) => { + // we are the only reference to the inner thunk, + // so steal it + myself.0.replace(refcell.into_inner()); + continue; + } + Err(rc) => { + let inner_thunk = Thunk(rc); + if inner_thunk.is_forced() { + // tail call to force the inner thunk; note that + // this means the outer thunk remains unforced + // even after calling force() on it; however the + // next time it is forced we will be one + // thunk-forcing closer to it being + // fully-evaluated. + myself + .0 + .replace(ThunkRepr::Evaluated(inner_thunk.value().clone())); + continue; + } + also_update.push(myself.0.clone()); + myself = inner_thunk; + continue; + } } } + + ThunkRepr::Evaluated(val) => { + return Ok(val); + } } } } pub fn finalise(&self, stack: &[Value]) { self.upvalues_mut().resolve_deferred_upvalues(stack); - #[cfg(debug_assertions)] - { - let inner: &mut ThunkRepr = &mut self.0.as_ref().borrow_mut(); - if let ThunkRepr::Evaluated(Value::Closure(c)) = inner { - c.is_finalised = true; - } - } } pub fn is_evaluated(&self) -> bool { matches!(*self.0.borrow(), ThunkRepr::Evaluated(_)) } + pub fn is_suspended(&self) -> bool { + matches!( + *self.0.borrow(), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) + ) + } + + /// Returns true if forcing this thunk will not change it. + pub fn is_forced(&self) -> bool { + self.0.borrow().is_forced() + } + /// Returns a reference to the inner evaluated value of a thunk. /// It is an error to call this on a thunk that has not been /// forced, or is not otherwise known to be fully evaluated. @@ -145,48 +338,42 @@ impl Thunk { // API too much. pub fn value(&self) -> Ref<Value> { Ref::map(self.0.borrow(), |thunk| match thunk { - ThunkRepr::Evaluated(value) => { - #[cfg(debug_assertions)] - if matches!( - value, - Value::Closure(Closure { - is_finalised: false, - .. - }) - ) { - panic!("Thunk::value called on an unfinalised closure"); - } - return value; + ThunkRepr::Evaluated(value) => value, + ThunkRepr::Blackhole { .. } => panic!("Thunk::value called on a black-holed thunk"), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) => { + panic!("Thunk::value called on a suspended thunk") } - ThunkRepr::Blackhole => panic!("Thunk::value called on a black-holed thunk"), - ThunkRepr::Suspended { .. } => panic!("Thunk::value called on a suspended thunk"), }) } + /// Returns the inner evaluated value of a thunk, cloning it if + /// the Rc has more than one strong reference. It is an error + /// to call this on a thunk that has not been forced, or is not + /// otherwise known to be fully evaluated. + fn unwrap_or_clone(self) -> Value { + match Rc::try_unwrap(self.0) { + Ok(refcell) => refcell.into_inner().expect(), + Err(rc) => Ref::map(rc.borrow(), |thunkrepr| thunkrepr.expect_ref()).clone(), + } + } + pub fn upvalues(&self) -> Ref<'_, Upvalues> { Ref::map(self.0.borrow(), |thunk| match thunk { - ThunkRepr::Suspended { upvalues, .. } => upvalues, - ThunkRepr::Evaluated(Value::Closure(Closure { upvalues, .. })) => upvalues, + ThunkRepr::Suspended { upvalues, .. } => upvalues.as_ref(), + ThunkRepr::Evaluated(Value::Closure(c)) => &c.upvalues, _ => panic!("upvalues() on non-suspended thunk"), }) } pub fn upvalues_mut(&self) -> RefMut<'_, Upvalues> { RefMut::map(self.0.borrow_mut(), |thunk| match thunk { - ThunkRepr::Suspended { upvalues, .. } => upvalues, - ThunkRepr::Evaluated(Value::Closure(Closure { - upvalues, - #[cfg(debug_assertions)] - is_finalised, - .. - })) => { - #[cfg(debug_assertions)] - if *is_finalised { - panic!("Thunk::upvalues_mut() called on a finalised closure"); - } - Rc::get_mut(upvalues) - .expect("upvalues_mut() was called on a thunk which already had multiple references to it") - } + ThunkRepr::Suspended { upvalues, .. } => Rc::get_mut(upvalues).unwrap(), + ThunkRepr::Evaluated(Value::Closure(c)) => Rc::get_mut( + &mut Rc::get_mut(c).unwrap().upvalues, + ) + .expect( + "upvalues_mut() was called on a thunk which already had multiple references to it", + ), thunk => panic!("upvalues() on non-suspended thunk: {thunk:?}"), }) } @@ -194,7 +381,21 @@ impl Thunk { /// Do not use this without first reading and understanding /// `tvix/docs/value-pointer-equality.md`. pub(crate) fn ptr_eq(&self, other: &Self) -> bool { - Rc::ptr_eq(&self.0, &other.0) + if Rc::ptr_eq(&self.0, &other.0) { + return true; + } + match &*self.0.borrow() { + ThunkRepr::Evaluated(Value::Closure(c1)) => match &*other.0.borrow() { + ThunkRepr::Evaluated(Value::Closure(c2)) => Rc::ptr_eq(c1, c2), + _ => false, + }, + _ => false, + } + } + + /// Helper function to format thunks in observer output. + pub(crate) fn debug_repr(&self) -> String { + self.0.borrow().debug_repr() } } @@ -204,30 +405,30 @@ impl TotalDisplay for Thunk { return f.write_str("<CYCLE>"); } - match self.0.try_borrow() { - Ok(repr) => match &*repr { - ThunkRepr::Evaluated(v) => v.total_fmt(f, set), - _ => f.write_str("internal[thunk]"), - }, - - _ => f.write_str("internal[thunk]"), + match &*self.0.borrow() { + ThunkRepr::Evaluated(v) => v.total_fmt(f, set), + ThunkRepr::Suspended { .. } | ThunkRepr::Native(_) => f.write_str("<CODE>"), + other => write!(f, "internal[{}]", other.debug_repr()), } } } -/// A wrapper type for tracking which thunks have already been seen in a -/// context. This is necessary for cycle detection. +/// A wrapper type for tracking which thunks have already been seen +/// in a context. This is necessary for printing and deeply forcing +/// cyclic non-diverging data structures like `rec { f = [ f ]; }`. +/// This is separate from the ThunkRepr::Blackhole mechanism, which +/// detects diverging data structures like `(rec { f = f; }).f`. /// /// The inner `HashSet` is not available on the outside, as it would be /// potentially unsafe to interact with the pointers in the set. #[derive(Default)] -pub struct ThunkSet(HashSet<*mut ThunkRepr>); +pub struct ThunkSet(HashSet<*const ThunkRepr>); impl ThunkSet { /// Check whether the given thunk has already been seen. Will mark the thunk /// as seen otherwise. pub fn insert(&mut self, thunk: &Thunk) -> bool { - let ptr: *mut ThunkRepr = thunk.0.as_ptr(); + let ptr: *const ThunkRepr = thunk.0.as_ptr(); self.0.insert(ptr) } } |