diff options
author | Aspen Smith <root@gws.fyi> | 2024-08-10T17·03-0400 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-08-18T19·49+0000 |
commit | 5382cbb93a94a8e68ddd2567f382661fd0f166b3 (patch) | |
tree | ebca797b26b56e466552efb0ee33e8ea778d8889 /tvix/eval/src/value/string.rs | |
parent | e086c76ee941198c70756e1b7b383edcb3572b4b (diff) |
refactor(tvix/eval): Pull context out into its own module r/8515
I'm gonna be doing some poking around in the internals of Context, so in preparation this pulls it out into its own module. Change-Id: I72ea7df80b5f36f838934ee07bdba66874c334c9 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12189 Autosubmit: aspen <root@gws.fyi> Reviewed-by: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/eval/src/value/string.rs')
-rw-r--r-- | tvix/eval/src/value/string.rs | 1031 |
1 files changed, 0 insertions, 1031 deletions
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs deleted file mode 100644 index 3680df3afb2d..000000000000 --- a/tvix/eval/src/value/string.rs +++ /dev/null @@ -1,1031 +0,0 @@ -//! This module implements Nix language strings. -//! -//! See [`NixString`] for more information about the internals of string values - -use bstr::{BStr, BString, ByteSlice, Chars}; -use nohash_hasher::BuildNoHashHasher; -use rnix::ast; -use rustc_hash::FxHashSet; -use rustc_hash::FxHasher; -use std::alloc::dealloc; -use std::alloc::{alloc, handle_alloc_error, Layout}; -use std::borrow::{Borrow, Cow}; -use std::cell::RefCell; -use std::ffi::c_void; -use std::fmt::{self, Debug, Display}; -use std::hash::{Hash, Hasher}; -use std::ops::Deref; -use std::ptr::{self, NonNull}; -use std::slice; - -use serde::de::{Deserializer, Visitor}; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, Serialize, Hash, PartialEq, Eq)] -pub enum NixContextElement { - /// A plain store path (e.g. source files copied to the store) - Plain(String), - - /// Single output of a derivation, represented by its name and its derivation path. - Single { name: String, derivation: String }, - - /// A reference to a complete derivation - /// including its source and its binary closure. - /// It is used for the `drvPath` attribute context. - /// The referred string is the store path to - /// the derivation path. - Derivation(String), -} - -/// Nix context strings representation in Tvix. This tracks a set of different kinds of string -/// dependencies that we can come across during manipulation of our language primitives, mostly -/// strings. There's some simple algebra of context strings and how they propagate w.r.t. primitive -/// operations, e.g. concatenation, interpolation and other string operations. -#[repr(transparent)] -#[derive(Clone, Debug, Serialize, Default)] -pub struct NixContext(FxHashSet<NixContextElement>); - -impl From<NixContextElement> for NixContext { - fn from(value: NixContextElement) -> Self { - let mut set = FxHashSet::default(); - set.insert(value); - Self(set) - } -} - -impl From<FxHashSet<NixContextElement>> for NixContext { - fn from(value: FxHashSet<NixContextElement>) -> Self { - Self(value) - } -} - -impl<const N: usize> From<[NixContextElement; N]> for NixContext { - fn from(value: [NixContextElement; N]) -> Self { - let mut set = FxHashSet::default(); - for elt in value { - set.insert(elt); - } - Self(set) - } -} - -impl NixContext { - /// Creates an empty context that can be populated - /// and passed to form a contextful [NixString], albeit - /// if the context is concretly empty, the resulting [NixString] - /// will be contextless. - pub fn new() -> Self { - Self::default() - } - - /// For internal consumers, we let people observe - /// if the [NixContext] is actually empty or not - /// to decide whether they want to skip the allocation - /// of a full blown [HashSet]. - pub(crate) fn is_empty(&self) -> bool { - self.0.is_empty() - } - - /// Consumes a new [NixContextElement] and add it if not already - /// present in this context. - pub fn append(mut self, other: NixContextElement) -> Self { - self.0.insert(other); - self - } - - /// Extends the existing context with more context elements. - pub fn extend<T>(&mut self, iter: T) - where - T: IntoIterator<Item = NixContextElement>, - { - self.0.extend(iter) - } - - /// Copies from another [NixString] its context strings - /// in this context. - pub fn mimic(&mut self, other: &NixString) { - if let Some(context) = other.context() { - self.extend(context.iter().cloned()); - } - } - - /// Iterates over "plain" context elements, e.g. sources imported - /// in the store without more information, i.e. `toFile` or coerced imported paths. - /// It yields paths to the store. - pub fn iter_plain(&self) -> impl Iterator<Item = &str> { - self.iter().filter_map(|elt| { - if let NixContextElement::Plain(s) = elt { - Some(s.as_str()) - } else { - None - } - }) - } - - /// Iterates over "full derivations" context elements, e.g. something - /// referring to their `drvPath`, i.e. their full sources and binary closure. - /// It yields derivation paths. - pub fn iter_derivation(&self) -> impl Iterator<Item = &str> { - self.iter().filter_map(|elt| { - if let NixContextElement::Derivation(s) = elt { - Some(s.as_str()) - } else { - None - } - }) - } - - /// Iterates over "single" context elements, e.g. single derived paths, - /// or also known as the single output of a given derivation. - /// The first element of the tuple is the output name - /// and the second element is the derivation path. - pub fn iter_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { - self.iter().filter_map(|elt| { - if let NixContextElement::Single { name, derivation } = elt { - Some((name.as_str(), derivation.as_str())) - } else { - None - } - }) - } - - /// Iterates over any element of the context. - pub fn iter(&self) -> impl Iterator<Item = &NixContextElement> { - self.0.iter() - } - - /// Produces a list of owned references to this current context, - /// no matter its type. - pub fn to_owned_references(self) -> Vec<String> { - self.0 - .into_iter() - .map(|ctx| match ctx { - NixContextElement::Derivation(drv_path) => drv_path, - NixContextElement::Plain(store_path) => store_path, - NixContextElement::Single { derivation, .. } => derivation, - }) - .collect() - } -} - -impl IntoIterator for NixContext { - type Item = NixContextElement; - - type IntoIter = std::collections::hash_set::IntoIter<NixContextElement>; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} - -/// This type is never instantiated, but serves to document the memory layout of the actual heap -/// allocation for Nix strings. -#[allow(dead_code)] -struct NixStringInner { - /// The string context, if any. Note that this is boxed to take advantage of the null pointer - /// niche, otherwise this field ends up being very large: - /// - /// ```notrust - /// >> std::mem::size_of::<Option<HashSet<String>>>() - /// 48 - /// - /// >> std::mem::size_of::<Option<Box<HashSet<String>>>>() - /// 8 - /// ``` - context: Option<Box<NixContext>>, - /// The length of the data, stored *inline in the allocation* - length: usize, - /// The actual data for the string itself. Will always be `length` bytes long - data: [u8], -} - -#[allow(clippy::zst_offset)] -impl NixStringInner { - /// Construct a [`Layout`] for a nix string allocation with the given length. - /// - /// Returns a tuple of: - /// 1. The layout itself. - /// 2. The offset of [`Self::length`] within the allocation, assuming the allocation starts at 0 - /// 3. The offset of the data array within the allocation, assuming the allocation starts at 0 - fn layout(len: usize) -> (Layout, usize, usize) { - let layout = Layout::new::<Option<Box<NixContext>>>(); - let (layout, len_offset) = layout.extend(Layout::new::<usize>()).unwrap(); - let (layout, data_offset) = layout.extend(Layout::array::<u8>(len).unwrap()).unwrap(); - (layout, len_offset, data_offset) - } - - /// Returns the [`Layout`] for an *already-allocated* nix string, loading the length from the - /// pointer. - /// - /// Returns a tuple of: - /// 1. The layout itself. - /// 2. The offset of [`Self::length`] within the allocation, assuming the allocation starts at 0 - /// 3. The offset of the data array within the allocation, assuming the allocation starts at 0 - /// - /// # Safety - /// - /// This function must only be called on a pointer that has been properly initialized with - /// [`Self::alloc`]. The data buffer may not necessarily be initialized - unsafe fn layout_of(this: NonNull<c_void>) -> (Layout, usize, usize) { - let layout = Layout::new::<Option<Box<NixContext>>>(); - let (_, len_offset) = layout.extend(Layout::new::<usize>()).unwrap(); - // SAFETY: Layouts are linear, so even though we haven't involved data at all yet, we know - // the len_offset is a valid offset into the second field of the allocation - let len = *(this.as_ptr().add(len_offset) as *const usize); - Self::layout(len) - } - - /// Allocate an *uninitialized* nix string with the given length. Writes the length to the - /// length value in the pointer, but leaves both context and data uninitialized - /// - /// This function is safe to call (as constructing pointers of any sort of validity is always - /// safe in Rust) but it is unsafe to use the resulting pointer to do anything other than - /// - /// 1. Read the length - /// 2. Write the context - /// 3. Write the data - /// - /// until the string is fully initialized - fn alloc(len: usize) -> NonNull<c_void> { - let (layout, len_offset, _data_offset) = Self::layout(len); - debug_assert_ne!(layout.size(), 0); - unsafe { - // SAFETY: Layout has non-zero size, since the layout of the context and the - // layout of the len both have non-zero size - let ptr = alloc(layout); - - if let Some(this) = NonNull::new(ptr as *mut _) { - // SAFETY: We've allocated with a layout that causes the len_offset to be in-bounds - // and writeable, and if the allocation succeeded it won't wrap - ((this.as_ptr() as *mut u8).add(len_offset) as *mut usize).write(len); - debug_assert_eq!(Self::len(this), len); - this - } else { - handle_alloc_error(layout); - } - } - } - - /// Deallocate the Nix string at the given pointer - /// - /// # Safety - /// - /// This function must only be called with a pointer that has been properly initialized with - /// [`Self::alloc`] - unsafe fn dealloc(this: NonNull<c_void>) { - let (layout, _, _) = Self::layout_of(this); - // SAFETY: okay because of the safety guarantees of this method - dealloc(this.as_ptr() as *mut u8, layout) - } - - /// Return the length of the Nix string at the given pointer - /// - /// # Safety - /// - /// This function must only be called with a pointer that has been properly initialized with - /// [`Self::alloc`] - unsafe fn len(this: NonNull<c_void>) -> usize { - let (_, len_offset, _) = Self::layout_of(this); - // SAFETY: As long as the safety guarantees of this method are upheld, we've allocated with - // a layout that causes the len_offset to be in-bounds and writeable, and if the allocation - // succeeded it won't wrap - *(this.as_ptr().add(len_offset) as *const usize) - } - - /// Return a pointer to the context value within the given Nix string pointer - /// - /// # Safety - /// - /// This function must only be called with a pointer that has been properly initialized with - /// [`Self::alloc`] - unsafe fn context_ptr(this: NonNull<c_void>) -> *mut Option<Box<NixContext>> { - // SAFETY: The context is the first field in the layout of the allocation - this.as_ptr() as *mut Option<Box<NixContext>> - } - - /// Construct a shared reference to the context value within the given Nix string pointer - /// - /// # Safety - /// - /// This function must only be called with a pointer that has been properly initialized with - /// [`Self::alloc`], and where the context has been properly initialized (by writing to the - /// pointer returned from [`Self::context_ptr`]). - /// - /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See - /// [`NonNull::as_ref`] for more. - unsafe fn context_ref<'a>(this: NonNull<c_void>) -> &'a Option<Box<NixContext>> { - Self::context_ptr(this).as_ref().unwrap() - } - - /// Construct a mutable reference to the context value within the given Nix string pointer - /// - /// # Safety - /// - /// This function must only be called with a pointer that has been properly initialized with - /// [`Self::alloc`], and where the context has been properly initialized (by writing to the - /// pointer returned from [`Self::context_ptr`]). - /// - /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See - /// [`NonNull::as_mut`] for more. - unsafe fn context_mut<'a>(this: NonNull<c_void>) -> &'a mut Option<Box<NixContext>> { - Self::context_ptr(this).as_mut().unwrap() - } - - /// Return a pointer to the data array within the given Nix string pointer - /// - /// # Safety - /// - /// This function must only be called with a pointer that has been properly initialized with - /// [`Self::alloc`] - unsafe fn data_ptr(this: NonNull<c_void>) -> *mut u8 { - let (_, _, data_offset) = Self::layout_of(this); - // SAFETY: data is the third field in the layout of the allocation - this.as_ptr().add(data_offset) as *mut u8 - } - - /// Construct a shared reference to the data slice within the given Nix string pointer - /// - /// # Safety - /// - /// This function must only be called with a pointer that has been properly initialized with - /// [`Self::alloc`], and where the data array has been properly initialized (by writing to the - /// pointer returned from [`Self::data_ptr`]). - /// - /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See - /// [`slice::from_raw_parts`] for more. - unsafe fn data_slice<'a>(this: NonNull<c_void>) -> &'a [u8] { - let len = Self::len(this); - let data = Self::data_ptr(this); - slice::from_raw_parts(data, len) - } - - /// Construct a mutable reference to the data slice within the given Nix string pointer - /// - /// # Safety - /// - /// This function must only be called with a pointer that has been properly initialized with - /// [`Self::alloc`], and where the data array has been properly initialized (by writing to the - /// pointer returned from [`Self::data_ptr`]). - /// - /// Also, all the normal Rust rules about pointer-to-reference conversion apply. See - /// [`slice::from_raw_parts_mut`] for more. - #[allow(dead_code)] - unsafe fn data_slice_mut<'a>(this: NonNull<c_void>) -> &'a mut [u8] { - let len = Self::len(this); - let data = Self::data_ptr(this); - slice::from_raw_parts_mut(data, len) - } - - /// Clone the Nix string pointed to by this pointer, and return a pointer to a new Nix string - /// containing the same data and context. - /// - /// # Safety - /// - /// This function must only be called with a pointer that has been properly initialized with - /// [`Self::alloc`], and where the context has been properly initialized (by writing to the - /// pointer returned from [`Self::context_ptr`]), and the data array has been properly - /// initialized (by writing to the pointer returned from [`Self::data_ptr`]). - unsafe fn clone(this: NonNull<c_void>) -> NonNull<c_void> { - let (layout, _, _) = Self::layout_of(this); - let ptr = alloc(layout); - if let Some(new) = NonNull::new(ptr as *mut _) { - ptr::copy_nonoverlapping(this.as_ptr(), new.as_ptr(), layout.size()); - Self::context_ptr(new).write(Self::context_ref(this).clone()); - new - } else { - handle_alloc_error(layout); - } - } -} - -#[derive(Default)] -struct InternerInner { - #[allow(clippy::disallowed_types)] // Not using the default hasher - map: std::collections::HashMap<u64, NonNull<c_void>, BuildNoHashHasher<u64>>, - #[cfg(feature = "no_leak")] - #[allow(clippy::disallowed_types)] // Not using the default hasher - interned_strings: FxHashSet<NonNull<c_void>>, -} - -unsafe impl Send for InternerInner {} - -fn hash<T>(s: T) -> u64 -where - T: Hash, -{ - let mut hasher = FxHasher::default(); - s.hash(&mut hasher); - hasher.finish() -} - -impl InternerInner { - pub fn intern(&mut self, s: &[u8]) -> NixString { - let hash = hash(s); - if let Some(s) = self.map.get(&hash) { - return NixString(*s); - } - - let string = NixString::new_inner(s, None); - self.map.insert(hash, string.0); - #[cfg(feature = "no_leak")] - self.interned_strings.insert(string.0); - string - } -} - -#[derive(Default)] -struct Interner(RefCell<InternerInner>); - -impl Interner { - pub fn intern(&self, s: &[u8]) -> NixString { - self.0.borrow_mut().intern(s) - } - - #[cfg(feature = "no_leak")] - pub fn is_interned_string(&self, string: &NixString) -> bool { - self.0.borrow().interned_strings.contains(&string.0) - } -} - -thread_local! { - static INTERNER: Interner = Interner::default(); -} - -/// Nix string values -/// -/// # Internals -/// -/// For performance reasons (to keep allocations small, and to avoid indirections), [`NixString`] is -/// represented as a single *thin* pointer to a packed data structure containing the -/// [context][NixContext] and the string data itself (which is a raw byte array, to match the Nix -/// string semantics that allow any array of bytes to be represented by a string). - -/// This memory representation is documented in [`NixStringInner`], but since Rust prefers to deal -/// with slices via *fat pointers* (pointers that include the length in the *pointer*, not in the -/// heap allocation), we have to do mostly manual layout management and allocation for this -/// representation. See the documentation for the methods of [`NixStringInner`] for more information -pub struct NixString(NonNull<c_void>); - -unsafe impl Send for NixString {} -unsafe impl Sync for NixString {} - -impl Drop for NixString { - #[cfg(not(feature = "no_leak"))] - fn drop(&mut self) { - if self.context().is_some() { - // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct - // according to the rules of dealloc - unsafe { - NixStringInner::dealloc(self.0); - } - } - } - - #[cfg(feature = "no_leak")] - fn drop(&mut self) { - if INTERNER.with(|i| i.is_interned_string(self)) { - return; - } - - // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct - // according to the rules of dealloc - unsafe { - NixStringInner::dealloc(self.0); - } - } -} - -impl Clone for NixString { - fn clone(&self) -> Self { - if cfg!(feature = "no_leak") || self.context().is_some() { - // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct - // according to the rules of clone - unsafe { Self(NixStringInner::clone(self.0)) } - } else { - // SAFETY: - // - // - NixStrings are never mutated - // - NixStrings are never freed - Self(self.0) - } - } -} - -impl Debug for NixString { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - if let Some(ctx) = self.context() { - f.debug_struct("NixString") - .field("context", ctx) - .field("data", &self.as_bstr()) - .finish() - } else { - write!(f, "{:?}", self.as_bstr()) - } - } -} - -impl PartialEq for NixString { - fn eq(&self, other: &Self) -> bool { - self.0 == other.0 || self.as_bstr() == other.as_bstr() - } -} - -impl Eq for NixString {} - -impl PartialEq<&[u8]> for NixString { - fn eq(&self, other: &&[u8]) -> bool { - **self == **other - } -} - -impl PartialEq<&str> for NixString { - fn eq(&self, other: &&str) -> bool { - **self == other.as_bytes() - } -} - -impl PartialOrd for NixString { - fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { - Some(self.cmp(other)) - } -} - -impl Ord for NixString { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - if self.0 == other.0 { - return std::cmp::Ordering::Equal; - } - self.as_bstr().cmp(other.as_bstr()) - } -} - -impl From<Box<BStr>> for NixString { - fn from(value: Box<BStr>) -> Self { - Self::new(&value, None) - } -} - -impl From<BString> for NixString { - fn from(value: BString) -> Self { - Self::new(&value, None) - } -} - -impl From<&BStr> for NixString { - fn from(value: &BStr) -> Self { - value.to_owned().into() - } -} - -impl From<&[u8]> for NixString { - fn from(value: &[u8]) -> Self { - Self::from(value.to_owned()) - } -} - -impl From<Vec<u8>> for NixString { - fn from(value: Vec<u8>) -> Self { - value.into_boxed_slice().into() - } -} - -impl From<Box<[u8]>> for NixString { - fn from(value: Box<[u8]>) -> Self { - Self::new(&value, None) - } -} - -impl From<&str> for NixString { - fn from(s: &str) -> Self { - s.as_bytes().into() - } -} - -impl From<String> for NixString { - fn from(s: String) -> Self { - s.into_bytes().into() - } -} - -impl<T> From<(T, Option<Box<NixContext>>)> for NixString -where - NixString: From<T>, -{ - fn from((s, ctx): (T, Option<Box<NixContext>>)) -> Self { - Self::new(NixString::from(s).as_ref(), ctx) - } -} - -impl From<Box<str>> for NixString { - fn from(s: Box<str>) -> Self { - s.into_boxed_bytes().into() - } -} - -impl From<ast::Ident> for NixString { - fn from(ident: ast::Ident) -> Self { - ident.ident_token().unwrap().text().into() - } -} - -impl<'a> From<&'a NixString> for &'a BStr { - fn from(s: &'a NixString) -> Self { - s.as_bstr() - } -} - -// No impl From<NixString> for String, that one quotes. - -impl From<NixString> for BString { - fn from(s: NixString) -> Self { - s.as_bstr().to_owned() - } -} - -impl AsRef<[u8]> for NixString { - fn as_ref(&self) -> &[u8] { - self.as_bytes() - } -} - -impl Borrow<BStr> for NixString { - fn borrow(&self) -> &BStr { - self.as_bstr() - } -} - -impl Hash for NixString { - fn hash<H: std::hash::Hasher>(&self, state: &mut H) { - self.as_bstr().hash(state) - } -} - -impl<'de> Deserialize<'de> for NixString { - fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> - where - D: Deserializer<'de>, - { - struct StringVisitor; - - impl<'de> Visitor<'de> for StringVisitor { - type Value = NixString; - - fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { - formatter.write_str("a valid Nix string") - } - - fn visit_string<E>(self, v: String) -> Result<Self::Value, E> - where - E: serde::de::Error, - { - Ok(v.into()) - } - - fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> - where - E: serde::de::Error, - { - Ok(v.into()) - } - } - - deserializer.deserialize_string(StringVisitor) - } -} - -impl Deref for NixString { - type Target = BStr; - - fn deref(&self) -> &Self::Target { - self.as_bstr() - } -} - -#[cfg(feature = "arbitrary")] -mod arbitrary { - use super::*; - use proptest::prelude::{any_with, Arbitrary}; - use proptest::strategy::{BoxedStrategy, Strategy}; - - impl Arbitrary for NixString { - type Parameters = <String as Arbitrary>::Parameters; - - type Strategy = BoxedStrategy<Self>; - - fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { - any_with::<String>(args).prop_map(Self::from).boxed() - } - } -} - -/// Set non-scientifically. TODO(aspen): think more about what this should be -const INTERN_THRESHOLD: usize = 32; - -impl NixString { - fn new(contents: &[u8], context: Option<Box<NixContext>>) -> Self { - debug_assert!( - !context.as_deref().is_some_and(NixContext::is_empty), - "BUG: initialized with empty context" - ); - - if !cfg!(feature = "no_leak") /* It's only safe to intern if we leak strings, since there's - * nothing yet preventing interned strings from getting freed - * (and then used by other copies) otherwise - */ - && contents.len() <= INTERN_THRESHOLD - && context.is_none() - { - return INTERNER.with(|i| i.intern(contents)); - } - - Self::new_inner(contents, context) - } - - fn new_inner(contents: &[u8], context: Option<Box<NixContext>>) -> Self { - // SAFETY: We're always fully initializing a NixString here: - // - // 1. NixStringInner::alloc sets up the len for us - // 2. We set the context, using ptr::write to make sure that the uninitialized memory isn't - // read or dropped - // 3. We set the data, using copy_from_nonoverlapping to make sure that the uninitialized - // memory isn't read or dropped - // - // Only *then* can we construct a NixString - unsafe { - let inner = NixStringInner::alloc(contents.len()); - NixStringInner::context_ptr(inner).write(context); - NixStringInner::data_ptr(inner) - .copy_from_nonoverlapping(contents.as_ptr(), contents.len()); - Self(inner) - } - } - - pub fn new_inherit_context_from<T>(other: &NixString, new_contents: T) -> Self - where - NixString: From<T>, - { - Self::new( - Self::from(new_contents).as_ref(), - other.context().map(|c| Box::new(c.clone())), - ) - } - - pub fn new_context_from<T>(context: NixContext, contents: T) -> Self - where - NixString: From<T>, - { - Self::new( - Self::from(contents).as_ref(), - if context.is_empty() { - None - } else { - Some(Box::new(context)) - }, - ) - } - - pub fn as_bstr(&self) -> &BStr { - BStr::new(self.as_bytes()) - } - - pub fn as_bytes(&self) -> &[u8] { - // SAFETY: There's no way to construct an uninitialized NixString (see the SAFETY comment in - // `new`) - unsafe { NixStringInner::data_slice(self.0) } - } - - pub fn into_bstring(self) -> BString { - self.as_bstr().to_owned() - } - - /// Return a displayable representation of the string as an - /// identifier. - /// - /// This is used when printing out strings used as e.g. attribute - /// set keys, as those are only escaped in the presence of special - /// characters. - pub fn ident_str(&self) -> Cow<str> { - let escaped = match self.to_str_lossy() { - Cow::Borrowed(s) => nix_escape_string(s), - Cow::Owned(s) => nix_escape_string(&s).into_owned().into(), - }; - match escaped { - // A borrowed string is unchanged and can be returned as - // is. - Cow::Borrowed(_) => { - if is_valid_nix_identifier(&escaped) && !is_keyword(&escaped) { - escaped - } else { - Cow::Owned(format!("\"{}\"", escaped)) - } - } - - // An owned string has escapes, and needs the outer quotes - // for display. - Cow::Owned(s) => Cow::Owned(format!("\"{}\"", s)), - } - } - - pub fn concat(&self, other: &Self) -> Self { - let mut s = self.to_vec(); - s.extend(&(***other)); - - let context = [self.context(), other.context()] - .into_iter() - .flatten() - .fold(NixContext::new(), |mut acc_ctx, new_ctx| { - // TODO: consume new_ctx? - acc_ctx.extend(new_ctx.iter().cloned()); - acc_ctx - }); - Self::new_context_from(context, s) - } - - pub(crate) fn context(&self) -> Option<&NixContext> { - // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY - // comment in `new`). - // - // Also, we're using the same lifetime and mutability as self, to fit the - // pointer-to-reference conversion rules - let context = unsafe { NixStringInner::context_ref(self.0).as_deref() }; - - debug_assert!( - !context.is_some_and(NixContext::is_empty), - "BUG: empty context" - ); - - context - } - - pub(crate) fn context_mut(&mut self) -> &mut Option<Box<NixContext>> { - // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY - // comment in `new`). - // - // Also, we're using the same lifetime and mutability as self, to fit the - // pointer-to-reference conversion rules - let context = unsafe { NixStringInner::context_mut(self.0) }; - - debug_assert!( - !context.as_deref().is_some_and(NixContext::is_empty), - "BUG: empty context" - ); - - context - } - - /// Iterates over all context elements. - /// See [iter_plain], [iter_derivation], [iter_single_outputs]. - pub fn iter_context(&self) -> impl Iterator<Item = &NixContext> { - self.context().into_iter() - } - - /// Iterates over "plain" context elements, e.g. sources imported - /// in the store without more information, i.e. `toFile` or coerced imported paths. - /// It yields paths to the store. - pub fn iter_ctx_plain(&self) -> impl Iterator<Item = &str> { - self.iter_context().flat_map(|context| context.iter_plain()) - } - - /// Iterates over "full derivations" context elements, e.g. something - /// referring to their `drvPath`, i.e. their full sources and binary closure. - /// It yields derivation paths. - pub fn iter_ctx_derivation(&self) -> impl Iterator<Item = &str> { - return self - .iter_context() - .flat_map(|context| context.iter_derivation()); - } - - /// Iterates over "single" context elements, e.g. single derived paths, - /// or also known as the single output of a given derivation. - /// The first element of the tuple is the output name - /// and the second element is the derivation path. - pub fn iter_ctx_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { - return self - .iter_context() - .flat_map(|context| context.iter_single_outputs()); - } - - /// Returns whether this Nix string possess a context or not. - pub fn has_context(&self) -> bool { - self.context().is_some() - } - - /// This clears the context of the string, returning - /// the removed dependency tracking information. - pub fn take_context(&mut self) -> Option<Box<NixContext>> { - self.context_mut().take() - } - - /// This clears the context of that string, losing - /// all dependency tracking information. - pub fn clear_context(&mut self) { - let _ = self.take_context(); - } - - pub fn chars(&self) -> Chars<'_> { - self.as_bstr().chars() - } -} - -fn nix_escape_char(ch: char, next: Option<&char>) -> Option<&'static str> { - match (ch, next) { - ('\\', _) => Some("\\\\"), - ('"', _) => Some("\\\""), - ('\n', _) => Some("\\n"), - ('\t', _) => Some("\\t"), - ('\r', _) => Some("\\r"), - ('$', Some('{')) => Some("\\$"), - _ => None, - } -} - -/// Return true if this string is a keyword -- character strings -/// which lexically match the "identifier" production but are not -/// parsed as identifiers. See also cppnix commit -/// b72bc4a972fe568744d98b89d63adcd504cb586c. -fn is_keyword(s: &str) -> bool { - matches!( - s, - "if" | "then" | "else" | "assert" | "with" | "let" | "in" | "rec" | "inherit" - ) -} - -/// Return true if this string can be used as an identifier in Nix. -fn is_valid_nix_identifier(s: &str) -> bool { - // adapted from rnix-parser's tokenizer.rs - let mut chars = s.chars(); - match chars.next() { - Some('a'..='z' | 'A'..='Z' | '_') => (), - _ => return false, - } - for c in chars { - match c { - 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => (), - _ => return false, - } - } - true -} - -/// Escape a Nix string for display, as most user-visible representation -/// are escaped strings. -/// -/// Note that this does not add the outer pair of surrounding quotes. -fn nix_escape_string(input: &str) -> Cow<str> { - let mut iter = input.char_indices().peekable(); - - while let Some((i, c)) = iter.next() { - if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) { - let mut escaped = String::with_capacity(input.len()); - escaped.push_str(&input[..i]); - escaped.push_str(esc); - - // In theory we calculate how many bytes it takes to represent `esc` - // in UTF-8 and use that for the offset. It is, however, safe to - // assume that to be 1, as all characters that can be escaped in a - // Nix string are ASCII. - let mut inner_iter = input[i + 1..].chars().peekable(); - while let Some(c) = inner_iter.next() { - match nix_escape_char(c, inner_iter.peek()) { - Some(esc) => escaped.push_str(esc), - None => escaped.push(c), - } - } - - return Cow::Owned(escaped); - } - } - - Cow::Borrowed(input) -} - -impl Display for NixString { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.write_str("\"")?; - f.write_str(&nix_escape_string(&self.to_str_lossy()))?; - f.write_str("\"") - } -} - -#[cfg(all(test, feature = "arbitrary"))] -mod tests { - use test_strategy::proptest; - - use super::*; - - use crate::properties::{eq_laws, hash_laws, ord_laws}; - - #[test] - fn size() { - assert_eq!(std::mem::size_of::<NixString>(), 8); - } - - #[proptest] - fn clone_strings(s: NixString) { - drop(s.clone()) - } - - eq_laws!(NixString); - hash_laws!(NixString); - ord_laws!(NixString); -} |