diff options
author | Aspen Smith <root@gws.fyi> | 2024-08-10T17·03-0400 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-08-18T19·49+0000 |
commit | 5382cbb93a94a8e68ddd2567f382661fd0f166b3 (patch) | |
tree | ebca797b26b56e466552efb0ee33e8ea778d8889 /tvix/eval/src/value/string | |
parent | e086c76ee941198c70756e1b7b383edcb3572b4b (diff) |
refactor(tvix/eval): Pull context out into its own module r/8515
I'm gonna be doing some poking around in the internals of Context, so in preparation this pulls it out into its own module. Change-Id: I72ea7df80b5f36f838934ee07bdba66874c334c9 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12189 Autosubmit: aspen <root@gws.fyi> Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/eval/src/value/string')
-rw-r--r-- | tvix/eval/src/value/string/context.rs | 161 | ||||
-rw-r--r-- | tvix/eval/src/value/string/mod.rs | 879 |
2 files changed, 1040 insertions, 0 deletions
diff --git a/tvix/eval/src/value/string/context.rs b/tvix/eval/src/value/string/context.rs new file mode 100644 index 000000000000..e1c04735ddde --- /dev/null +++ b/tvix/eval/src/value/string/context.rs @@ -0,0 +1,161 @@ +use rustc_hash::FxHashSet; +use serde::Serialize; + +use super::NixString; + +#[derive(Clone, Debug, Serialize, Hash, PartialEq, Eq)] +pub enum NixContextElement { + /// A plain store path (e.g. source files copied to the store) + Plain(String), + + /// Single output of a derivation, represented by its name and its derivation path. + Single { name: String, derivation: String }, + + /// A reference to a complete derivation + /// including its source and its binary closure. + /// It is used for the `drvPath` attribute context. + /// The referred string is the store path to + /// the derivation path. + Derivation(String), +} + +/// Nix context strings representation in Tvix. This tracks a set of different kinds of string +/// dependencies that we can come across during manipulation of our language primitives, mostly +/// strings. There's some simple algebra of context strings and how they propagate w.r.t. primitive +/// operations, e.g. concatenation, interpolation and other string operations. +#[repr(transparent)] +#[derive(Clone, Debug, Serialize, Default)] +pub struct NixContext(FxHashSet<NixContextElement>); + +impl From<NixContextElement> for NixContext { + fn from(value: NixContextElement) -> Self { + let mut set = FxHashSet::default(); + set.insert(value); + Self(set) + } +} + +impl From<FxHashSet<NixContextElement>> for NixContext { + fn from(value: FxHashSet<NixContextElement>) -> Self { + Self(value) + } +} + +impl<const N: usize> From<[NixContextElement; N]> for NixContext { + fn from(value: [NixContextElement; N]) -> Self { + let mut set = FxHashSet::default(); + for elt in value { + set.insert(elt); + } + Self(set) + } +} + +impl NixContext { + /// Creates an empty context that can be populated + /// and passed to form a contextful [NixString], albeit + /// if the context is concretly empty, the resulting [NixString] + /// will be contextless. + pub fn new() -> Self { + Self::default() + } + + /// For internal consumers, we let people observe + /// if the [NixContext] is actually empty or not + /// to decide whether they want to skip the allocation + /// of a full blown [HashSet]. + pub(crate) fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Consumes a new [NixContextElement] and add it if not already + /// present in this context. + pub fn append(mut self, other: NixContextElement) -> Self { + self.0.insert(other); + self + } + + /// Extends the existing context with more context elements. + pub fn extend<T>(&mut self, iter: T) + where + T: IntoIterator<Item = NixContextElement>, + { + self.0.extend(iter) + } + + /// Copies from another [NixString] its context strings + /// in this context. + pub fn mimic(&mut self, other: &NixString) { + if let Some(context) = other.context() { + self.extend(context.iter().cloned()); + } + } + + /// Iterates over "plain" context elements, e.g. sources imported + /// in the store without more information, i.e. `toFile` or coerced imported paths. + /// It yields paths to the store. + pub fn iter_plain(&self) -> impl Iterator<Item = &str> { + self.iter().filter_map(|elt| { + if let NixContextElement::Plain(s) = elt { + Some(s.as_str()) + } else { + None + } + }) + } + + /// Iterates over "full derivations" context elements, e.g. something + /// referring to their `drvPath`, i.e. their full sources and binary closure. + /// It yields derivation paths. + pub fn iter_derivation(&self) -> impl Iterator<Item = &str> { + self.iter().filter_map(|elt| { + if let NixContextElement::Derivation(s) = elt { + Some(s.as_str()) + } else { + None + } + }) + } + + /// Iterates over "single" context elements, e.g. single derived paths, + /// or also known as the single output of a given derivation. + /// The first element of the tuple is the output name + /// and the second element is the derivation path. + pub fn iter_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { + self.iter().filter_map(|elt| { + if let NixContextElement::Single { name, derivation } = elt { + Some((name.as_str(), derivation.as_str())) + } else { + None + } + }) + } + + /// Iterates over any element of the context. + pub fn iter(&self) -> impl Iterator<Item = &NixContextElement> { + self.0.iter() + } + + /// Produces a list of owned references to this current context, + /// no matter its type. + pub fn to_owned_references(self) -> Vec<String> { + self.0 + .into_iter() + .map(|ctx| match ctx { + NixContextElement::Derivation(drv_path) => drv_path, + NixContextElement::Plain(store_path) => store_path, + NixContextElement::Single { derivation, .. } => derivation, + }) + .collect() + } +} + +impl IntoIterator for NixContext { + type Item = NixContextElement; + + type IntoIter = std::collections::hash_set::IntoIter<NixContextElement>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} diff --git a/tvix/eval/src/value/string/mod.rs b/tvix/eval/src/value/string/mod.rs new file mode 100644 index 000000000000..5bcb4786b283 --- /dev/null +++ b/tvix/eval/src/value/string/mod.rs @@ -0,0 +1,879 @@ +//! This module implements Nix language strings. +//! +//! See [`NixString`] for more information about the internals of string values + +use bstr::{BStr, BString, ByteSlice, Chars}; +use nohash_hasher::BuildNoHashHasher; +use rnix::ast; +#[cfg(feature = "no_leak")] +use rustc_hash::FxHashSet; +use rustc_hash::FxHasher; +use std::alloc::dealloc; +use std::alloc::{alloc, handle_alloc_error, Layout}; +use std::borrow::{Borrow, Cow}; +use std::cell::RefCell; +use std::ffi::c_void; +use std::fmt::{self, Debug, Display}; +use std::hash::{Hash, Hasher}; +use std::ops::Deref; +use std::ptr::{self, NonNull}; +use std::slice; + +use serde::de::{Deserializer, Visitor}; +use serde::Deserialize; + +mod context; + +pub use context::{NixContext, NixContextElement}; + +/// This type is never instantiated, but serves to document the memory layout of the actual heap +/// allocation for Nix strings. +#[allow(dead_code)] +struct NixStringInner { + /// The string context, if any. Note that this is boxed to take advantage of the null pointer + /// niche, otherwise this field ends up being very large: + /// + /// ```notrust + /// >> std::mem::size_of::<Option<HashSet<String>>>() + /// 48 + /// + /// >> std::mem::size_of::<Option<Box<HashSet<String>>>>() + /// 8 + /// ``` + context: Option<Box<NixContext>>, + /// The length of the data, stored *inline in the allocation* + length: usize, + /// The actual data for the string itself. Will always be `length` bytes long + data: [u8], +} + +#[allow(clippy::zst_offset)] +impl NixStringInner { + /// Construct a [`Layout`] for a nix string allocation with the given length. + /// + /// Returns a tuple of: + /// 1. The layout itself. + /// 2. The offset of [`Self::length`] within the allocation, assuming the allocation starts at 0 + /// 3. The offset of the data array within the allocation, assuming the allocation starts at 0 + fn layout(len: usize) -> (Layout, usize, usize) { + let layout = Layout::new::<Option<Box<NixContext>>>(); + let (layout, len_offset) = layout.extend(Layout::new::<usize>()).unwrap(); + let (layout, data_offset) = layout.extend(Layout::array::<u8>(len).unwrap()).unwrap(); + (layout, len_offset, data_offset) + } + + /// Returns the [`Layout`] for an *already-allocated* nix string, loading the length from the + /// pointer. + /// + /// Returns a tuple of: + /// 1. The layout itself. + /// 2. The offset of [`Self::length`] within the allocation, assuming the allocation starts at 0 + /// 3. The offset of the data array within the allocation, assuming the allocation starts at 0 + /// + /// # Safety + /// + /// This function must only be called on a pointer that has been properly initialized with + /// [`Self::alloc`]. The data buffer may not necessarily be initialized + unsafe fn layout_of(this: NonNull<c_void>) -> (Layout, usize, usize) { + let layout = Layout::new::<Option<Box<NixContext>>>(); + let (_, len_offset) = layout.extend(Layout::new::<usize>()).unwrap(); + // SAFETY: Layouts are linear, so even though we haven't involved data at all yet, we know + // the len_offset is a valid offset into the second field of the allocation + let len = *(this.as_ptr().add(len_offset) as *const usize); + Self::layout(len) + } + + /// Allocate an *uninitialized* nix string with the given length. Writes the length to the + /// length value in the pointer, but leaves both context and data uninitialized + /// + /// This function is safe to call (as constructing pointers of any sort of validity is always + /// safe in Rust) but it is unsafe to use the resulting pointer to do anything other than + /// + /// 1. Read the length + /// 2. Write the context + /// 3. Write the data + /// + /// until the string is fully initialized + fn alloc(len: usize) -> NonNull<c_void> { + let (layout, len_offset, _data_offset) = Self::layout(len); + debug_assert_ne!(layout.size(), 0); + unsafe { + // SAFETY: Layout has non-zero size, since the layout of the context and the + // layout of the len both have non-zero size + let ptr = alloc(layout); + + if let Some(this) = NonNull::new(ptr as *mut _) { + // SAFETY: We've allocated with a layout that causes the len_offset to be in-bounds + // and writeable, and if the allocation succeeded it won't wrap + ((this.as_ptr() as *mut u8).add(len_offset) as *mut usize).write(len); + debug_assert_eq!(Self::len(this), len); + this + } else { + handle_alloc_error(layout); + } + } + } + + /// Deallocate the Nix string at the given pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn dealloc(this: NonNull<c_void>) { + let (layout, _, _) = Self::layout_of(this); + // SAFETY: okay because of the safety guarantees of this method + dealloc(this.as_ptr() as *mut u8, layout) + } + + /// Return the length of the Nix string at the given pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn len(this: NonNull<c_void>) -> usize { + let (_, len_offset, _) = Self::layout_of(this); + // SAFETY: As long as the safety guarantees of this method are upheld, we've allocated with + // a layout that causes the len_offset to be in-bounds and writeable, and if the allocation + // succeeded it won't wrap + *(this.as_ptr().add(len_offset) as *const usize) + } + + /// Return a pointer to the context value within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn context_ptr(this: NonNull<c_void>) -> *mut Option<Box<NixContext>> { + // SAFETY: The context is the first field in the layout of the allocation + this.as_ptr() as *mut Option<Box<NixContext>> + } + + /// Construct a shared reference to the context value within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the context has been properly initialized (by writing to the + /// pointer returned from [`Self::context_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`NonNull::as_ref`] for more. + unsafe fn context_ref<'a>(this: NonNull<c_void>) -> &'a Option<Box<NixContext>> { + Self::context_ptr(this).as_ref().unwrap() + } + + /// Construct a mutable reference to the context value within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the context has been properly initialized (by writing to the + /// pointer returned from [`Self::context_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`NonNull::as_mut`] for more. + unsafe fn context_mut<'a>(this: NonNull<c_void>) -> &'a mut Option<Box<NixContext>> { + Self::context_ptr(this).as_mut().unwrap() + } + + /// Return a pointer to the data array within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`] + unsafe fn data_ptr(this: NonNull<c_void>) -> *mut u8 { + let (_, _, data_offset) = Self::layout_of(this); + // SAFETY: data is the third field in the layout of the allocation + this.as_ptr().add(data_offset) as *mut u8 + } + + /// Construct a shared reference to the data slice within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the data array has been properly initialized (by writing to the + /// pointer returned from [`Self::data_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`slice::from_raw_parts`] for more. + unsafe fn data_slice<'a>(this: NonNull<c_void>) -> &'a [u8] { + let len = Self::len(this); + let data = Self::data_ptr(this); + slice::from_raw_parts(data, len) + } + + /// Construct a mutable reference to the data slice within the given Nix string pointer + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the data array has been properly initialized (by writing to the + /// pointer returned from [`Self::data_ptr`]). + /// + /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See + /// [`slice::from_raw_parts_mut`] for more. + #[allow(dead_code)] + unsafe fn data_slice_mut<'a>(this: NonNull<c_void>) -> &'a mut [u8] { + let len = Self::len(this); + let data = Self::data_ptr(this); + slice::from_raw_parts_mut(data, len) + } + + /// Clone the Nix string pointed to by this pointer, and return a pointer to a new Nix string + /// containing the same data and context. + /// + /// # Safety + /// + /// This function must only be called with a pointer that has been properly initialized with + /// [`Self::alloc`], and where the context has been properly initialized (by writing to the + /// pointer returned from [`Self::context_ptr`]), and the data array has been properly + /// initialized (by writing to the pointer returned from [`Self::data_ptr`]). + unsafe fn clone(this: NonNull<c_void>) -> NonNull<c_void> { + let (layout, _, _) = Self::layout_of(this); + let ptr = alloc(layout); + if let Some(new) = NonNull::new(ptr as *mut _) { + ptr::copy_nonoverlapping(this.as_ptr(), new.as_ptr(), layout.size()); + Self::context_ptr(new).write(Self::context_ref(this).clone()); + new + } else { + handle_alloc_error(layout); + } + } +} + +#[derive(Default)] +struct InternerInner { + #[allow(clippy::disallowed_types)] // Not using the default hasher + map: std::collections::HashMap<u64, NonNull<c_void>, BuildNoHashHasher<u64>>, + #[cfg(feature = "no_leak")] + #[allow(clippy::disallowed_types)] // Not using the default hasher + interned_strings: FxHashSet<NonNull<c_void>>, +} + +unsafe impl Send for InternerInner {} + +fn hash<T>(s: T) -> u64 +where + T: Hash, +{ + let mut hasher = FxHasher::default(); + s.hash(&mut hasher); + hasher.finish() +} + +impl InternerInner { + pub fn intern(&mut self, s: &[u8]) -> NixString { + let hash = hash(s); + if let Some(s) = self.map.get(&hash) { + return NixString(*s); + } + + let string = NixString::new_inner(s, None); + self.map.insert(hash, string.0); + #[cfg(feature = "no_leak")] + self.interned_strings.insert(string.0); + string + } +} + +#[derive(Default)] +struct Interner(RefCell<InternerInner>); + +impl Interner { + pub fn intern(&self, s: &[u8]) -> NixString { + self.0.borrow_mut().intern(s) + } + + #[cfg(feature = "no_leak")] + pub fn is_interned_string(&self, string: &NixString) -> bool { + self.0.borrow().interned_strings.contains(&string.0) + } +} + +thread_local! { + static INTERNER: Interner = Interner::default(); +} + +/// Nix string values +/// +/// # Internals +/// +/// For performance reasons (to keep allocations small, and to avoid indirections), [`NixString`] is +/// represented as a single *thin* pointer to a packed data structure containing the +/// [context][NixContext] and the string data itself (which is a raw byte array, to match the Nix +/// string semantics that allow any array of bytes to be represented by a string). + +/// This memory representation is documented in [`NixStringInner`], but since Rust prefers to deal +/// with slices via *fat pointers* (pointers that include the length in the *pointer*, not in the +/// heap allocation), we have to do mostly manual layout management and allocation for this +/// representation. See the documentation for the methods of [`NixStringInner`] for more information +pub struct NixString(NonNull<c_void>); + +unsafe impl Send for NixString {} +unsafe impl Sync for NixString {} + +impl Drop for NixString { + #[cfg(not(feature = "no_leak"))] + fn drop(&mut self) { + if self.context().is_some() { + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of dealloc + unsafe { + NixStringInner::dealloc(self.0); + } + } + } + + #[cfg(feature = "no_leak")] + fn drop(&mut self) { + if INTERNER.with(|i| i.is_interned_string(self)) { + return; + } + + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of dealloc + unsafe { + NixStringInner::dealloc(self.0); + } + } +} + +impl Clone for NixString { + fn clone(&self) -> Self { + if cfg!(feature = "no_leak") || self.context().is_some() { + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of clone + unsafe { Self(NixStringInner::clone(self.0)) } + } else { + // SAFETY: + // + // - NixStrings are never mutated + // - NixStrings are never freed + Self(self.0) + } + } +} + +impl Debug for NixString { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(ctx) = self.context() { + f.debug_struct("NixString") + .field("context", ctx) + .field("data", &self.as_bstr()) + .finish() + } else { + write!(f, "{:?}", self.as_bstr()) + } + } +} + +impl PartialEq for NixString { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 || self.as_bstr() == other.as_bstr() + } +} + +impl Eq for NixString {} + +impl PartialEq<&[u8]> for NixString { + fn eq(&self, other: &&[u8]) -> bool { + **self == **other + } +} + +impl PartialEq<&str> for NixString { + fn eq(&self, other: &&str) -> bool { + **self == other.as_bytes() + } +} + +impl PartialOrd for NixString { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + Some(self.cmp(other)) + } +} + +impl Ord for NixString { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + if self.0 == other.0 { + return std::cmp::Ordering::Equal; + } + self.as_bstr().cmp(other.as_bstr()) + } +} + +impl From<Box<BStr>> for NixString { + fn from(value: Box<BStr>) -> Self { + Self::new(&value, None) + } +} + +impl From<BString> for NixString { + fn from(value: BString) -> Self { + Self::new(&value, None) + } +} + +impl From<&BStr> for NixString { + fn from(value: &BStr) -> Self { + value.to_owned().into() + } +} + +impl From<&[u8]> for NixString { + fn from(value: &[u8]) -> Self { + Self::from(value.to_owned()) + } +} + +impl From<Vec<u8>> for NixString { + fn from(value: Vec<u8>) -> Self { + value.into_boxed_slice().into() + } +} + +impl From<Box<[u8]>> for NixString { + fn from(value: Box<[u8]>) -> Self { + Self::new(&value, None) + } +} + +impl From<&str> for NixString { + fn from(s: &str) -> Self { + s.as_bytes().into() + } +} + +impl From<String> for NixString { + fn from(s: String) -> Self { + s.into_bytes().into() + } +} + +impl<T> From<(T, Option<Box<NixContext>>)> for NixString +where + NixString: From<T>, +{ + fn from((s, ctx): (T, Option<Box<NixContext>>)) -> Self { + Self::new(NixString::from(s).as_ref(), ctx) + } +} + +impl From<Box<str>> for NixString { + fn from(s: Box<str>) -> Self { + s.into_boxed_bytes().into() + } +} + +impl From<ast::Ident> for NixString { + fn from(ident: ast::Ident) -> Self { + ident.ident_token().unwrap().text().into() + } +} + +impl<'a> From<&'a NixString> for &'a BStr { + fn from(s: &'a NixString) -> Self { + s.as_bstr() + } +} + +// No impl From<NixString> for String, that one quotes. + +impl From<NixString> for BString { + fn from(s: NixString) -> Self { + s.as_bstr().to_owned() + } +} + +impl AsRef<[u8]> for NixString { + fn as_ref(&self) -> &[u8] { + self.as_bytes() + } +} + +impl Borrow<BStr> for NixString { + fn borrow(&self) -> &BStr { + self.as_bstr() + } +} + +impl Hash for NixString { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.as_bstr().hash(state) + } +} + +impl<'de> Deserialize<'de> for NixString { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct StringVisitor; + + impl<'de> Visitor<'de> for StringVisitor { + type Value = NixString; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a valid Nix string") + } + + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(v.into()) + } + + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(v.into()) + } + } + + deserializer.deserialize_string(StringVisitor) + } +} + +impl Deref for NixString { + type Target = BStr; + + fn deref(&self) -> &Self::Target { + self.as_bstr() + } +} + +#[cfg(feature = "arbitrary")] +mod arbitrary { + use super::*; + use proptest::prelude::{any_with, Arbitrary}; + use proptest::strategy::{BoxedStrategy, Strategy}; + + impl Arbitrary for NixString { + type Parameters = <String as Arbitrary>::Parameters; + + type Strategy = BoxedStrategy<Self>; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + any_with::<String>(args).prop_map(Self::from).boxed() + } + } +} + +/// Set non-scientifically. TODO(aspen): think more about what this should be +const INTERN_THRESHOLD: usize = 32; + +impl NixString { + fn new(contents: &[u8], context: Option<Box<NixContext>>) -> Self { + debug_assert!( + !context.as_deref().is_some_and(NixContext::is_empty), + "BUG: initialized with empty context" + ); + + if !cfg!(feature = "no_leak") /* It's only safe to intern if we leak strings, since there's + * nothing yet preventing interned strings from getting freed + * (and then used by other copies) otherwise + */ + && contents.len() <= INTERN_THRESHOLD + && context.is_none() + { + return INTERNER.with(|i| i.intern(contents)); + } + + Self::new_inner(contents, context) + } + + fn new_inner(contents: &[u8], context: Option<Box<NixContext>>) -> Self { + // SAFETY: We're always fully initializing a NixString here: + // + // 1. NixStringInner::alloc sets up the len for us + // 2. We set the context, using ptr::write to make sure that the uninitialized memory isn't + // read or dropped + // 3. We set the data, using copy_from_nonoverlapping to make sure that the uninitialized + // memory isn't read or dropped + // + // Only *then* can we construct a NixString + unsafe { + let inner = NixStringInner::alloc(contents.len()); + NixStringInner::context_ptr(inner).write(context); + NixStringInner::data_ptr(inner) + .copy_from_nonoverlapping(contents.as_ptr(), contents.len()); + Self(inner) + } + } + + pub fn new_inherit_context_from<T>(other: &NixString, new_contents: T) -> Self + where + NixString: From<T>, + { + Self::new( + Self::from(new_contents).as_ref(), + other.context().map(|c| Box::new(c.clone())), + ) + } + + pub fn new_context_from<T>(context: NixContext, contents: T) -> Self + where + NixString: From<T>, + { + Self::new( + Self::from(contents).as_ref(), + if context.is_empty() { + None + } else { + Some(Box::new(context)) + }, + ) + } + + pub fn as_bstr(&self) -> &BStr { + BStr::new(self.as_bytes()) + } + + pub fn as_bytes(&self) -> &[u8] { + // SAFETY: There's no way to construct an uninitialized NixString (see the SAFETY comment in + // `new`) + unsafe { NixStringInner::data_slice(self.0) } + } + + pub fn into_bstring(self) -> BString { + self.as_bstr().to_owned() + } + + /// Return a displayable representation of the string as an + /// identifier. + /// + /// This is used when printing out strings used as e.g. attribute + /// set keys, as those are only escaped in the presence of special + /// characters. + pub fn ident_str(&self) -> Cow<str> { + let escaped = match self.to_str_lossy() { + Cow::Borrowed(s) => nix_escape_string(s), + Cow::Owned(s) => nix_escape_string(&s).into_owned().into(), + }; + match escaped { + // A borrowed string is unchanged and can be returned as + // is. + Cow::Borrowed(_) => { + if is_valid_nix_identifier(&escaped) && !is_keyword(&escaped) { + escaped + } else { + Cow::Owned(format!("\"{}\"", escaped)) + } + } + + // An owned string has escapes, and needs the outer quotes + // for display. + Cow::Owned(s) => Cow::Owned(format!("\"{}\"", s)), + } + } + + pub fn concat(&self, other: &Self) -> Self { + let mut s = self.to_vec(); + s.extend(&(***other)); + + let context = [self.context(), other.context()] + .into_iter() + .flatten() + .fold(NixContext::new(), |mut acc_ctx, new_ctx| { + // TODO: consume new_ctx? + acc_ctx.extend(new_ctx.iter().cloned()); + acc_ctx + }); + Self::new_context_from(context, s) + } + + pub(crate) fn context(&self) -> Option<&NixContext> { + // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY + // comment in `new`). + // + // Also, we're using the same lifetime and mutability as self, to fit the + // pointer-to-reference conversion rules + let context = unsafe { NixStringInner::context_ref(self.0).as_deref() }; + + debug_assert!( + !context.is_some_and(NixContext::is_empty), + "BUG: empty context" + ); + + context + } + + pub(crate) fn context_mut(&mut self) -> &mut Option<Box<NixContext>> { + // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY + // comment in `new`). + // + // Also, we're using the same lifetime and mutability as self, to fit the + // pointer-to-reference conversion rules + let context = unsafe { NixStringInner::context_mut(self.0) }; + + debug_assert!( + !context.as_deref().is_some_and(NixContext::is_empty), + "BUG: empty context" + ); + + context + } + + /// Iterates over all context elements. + /// See [iter_plain], [iter_derivation], [iter_single_outputs]. + pub fn iter_context(&self) -> impl Iterator<Item = &NixContext> { + self.context().into_iter() + } + + /// Iterates over "plain" context elements, e.g. sources imported + /// in the store without more information, i.e. `toFile` or coerced imported paths. + /// It yields paths to the store. + pub fn iter_ctx_plain(&self) -> impl Iterator<Item = &str> { + self.iter_context().flat_map(|context| context.iter_plain()) + } + + /// Iterates over "full derivations" context elements, e.g. something + /// referring to their `drvPath`, i.e. their full sources and binary closure. + /// It yields derivation paths. + pub fn iter_ctx_derivation(&self) -> impl Iterator<Item = &str> { + return self + .iter_context() + .flat_map(|context| context.iter_derivation()); + } + + /// Iterates over "single" context elements, e.g. single derived paths, + /// or also known as the single output of a given derivation. + /// The first element of the tuple is the output name + /// and the second element is the derivation path. + pub fn iter_ctx_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { + return self + .iter_context() + .flat_map(|context| context.iter_single_outputs()); + } + + /// Returns whether this Nix string possess a context or not. + pub fn has_context(&self) -> bool { + self.context().is_some() + } + + /// This clears the context of the string, returning + /// the removed dependency tracking information. + pub fn take_context(&mut self) -> Option<Box<NixContext>> { + self.context_mut().take() + } + + /// This clears the context of that string, losing + /// all dependency tracking information. + pub fn clear_context(&mut self) { + let _ = self.take_context(); + } + + pub fn chars(&self) -> Chars<'_> { + self.as_bstr().chars() + } +} + +fn nix_escape_char(ch: char, next: Option<&char>) -> Option<&'static str> { + match (ch, next) { + ('\\', _) => Some("\\\\"), + ('"', _) => Some("\\\""), + ('\n', _) => Some("\\n"), + ('\t', _) => Some("\\t"), + ('\r', _) => Some("\\r"), + ('$', Some('{')) => Some("\\$"), + _ => None, + } +} + +/// Return true if this string is a keyword -- character strings +/// which lexically match the "identifier" production but are not +/// parsed as identifiers. See also cppnix commit +/// b72bc4a972fe568744d98b89d63adcd504cb586c. +fn is_keyword(s: &str) -> bool { + matches!( + s, + "if" | "then" | "else" | "assert" | "with" | "let" | "in" | "rec" | "inherit" + ) +} + +/// Return true if this string can be used as an identifier in Nix. +fn is_valid_nix_identifier(s: &str) -> bool { + // adapted from rnix-parser's tokenizer.rs + let mut chars = s.chars(); + match chars.next() { + Some('a'..='z' | 'A'..='Z' | '_') => (), + _ => return false, + } + for c in chars { + match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => (), + _ => return false, + } + } + true +} + +/// Escape a Nix string for display, as most user-visible representation +/// are escaped strings. +/// +/// Note that this does not add the outer pair of surrounding quotes. +fn nix_escape_string(input: &str) -> Cow<str> { + let mut iter = input.char_indices().peekable(); + + while let Some((i, c)) = iter.next() { + if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) { + let mut escaped = String::with_capacity(input.len()); + escaped.push_str(&input[..i]); + escaped.push_str(esc); + + // In theory we calculate how many bytes it takes to represent `esc` + // in UTF-8 and use that for the offset. It is, however, safe to + // assume that to be 1, as all characters that can be escaped in a + // Nix string are ASCII. + let mut inner_iter = input[i + 1..].chars().peekable(); + while let Some(c) = inner_iter.next() { + match nix_escape_char(c, inner_iter.peek()) { + Some(esc) => escaped.push_str(esc), + None => escaped.push(c), + } + } + + return Cow::Owned(escaped); + } + } + + Cow::Borrowed(input) +} + +impl Display for NixString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("\"")?; + f.write_str(&nix_escape_string(&self.to_str_lossy()))?; + f.write_str("\"") + } +} + +#[cfg(all(test, feature = "arbitrary"))] +mod tests { + use test_strategy::proptest; + + use super::*; + + use crate::properties::{eq_laws, hash_laws, ord_laws}; + + #[test] + fn size() { + assert_eq!(std::mem::size_of::<NixString>(), 8); + } + + #[proptest] + fn clone_strings(s: NixString) { + drop(s.clone()) + } + + eq_laws!(NixString); + hash_laws!(NixString); + ord_laws!(NixString); +} |