about summary refs log tree commit diff
path: root/tvix/eval/src/value/string.rs
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/eval/src/value/string.rs')
-rw-r--r--tvix/eval/src/value/string.rs520
1 files changed, 520 insertions, 0 deletions
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs
new file mode 100644
index 000000000000..e2767dd22cdb
--- /dev/null
+++ b/tvix/eval/src/value/string.rs
@@ -0,0 +1,520 @@
+//! This module implements Nix language strings.
+//!
+//! Nix language strings never need to be modified on the language
+//! level, allowing us to shave off some memory overhead and only
+//! paying the cost when creating new strings.
+use bstr::{BStr, BString, ByteSlice, Chars};
+use rnix::ast;
+use std::borrow::{Borrow, Cow};
+use std::collections::HashSet;
+use std::fmt::Display;
+use std::hash::Hash;
+use std::ops::Deref;
+
+use serde::de::{Deserializer, Visitor};
+use serde::{Deserialize, Serialize};
+
+#[derive(Clone, Debug, Serialize, Hash, PartialEq, Eq)]
+pub enum NixContextElement {
+    /// A plain store path (e.g. source files copied to the store)
+    Plain(String),
+
+    /// Single output of a derivation, represented by its name and its derivation path.
+    Single { name: String, derivation: String },
+
+    /// A reference to a complete derivation
+    /// including its source and its binary closure.
+    /// It is used for the `drvPath` attribute context.
+    /// The referred string is the store path to
+    /// the derivation path.
+    Derivation(String),
+}
+
+/// Nix context strings representation in Tvix. This tracks a set of different kinds of string
+/// dependencies that we can come across during manipulation of our language primitives, mostly
+/// strings. There's some simple algebra of context strings and how they propagate w.r.t. primitive
+/// operations, e.g. concatenation, interpolation and other string operations.
+#[repr(transparent)]
+#[derive(Clone, Debug, Serialize, Default)]
+pub struct NixContext(HashSet<NixContextElement>);
+
+impl From<NixContextElement> for NixContext {
+    fn from(value: NixContextElement) -> Self {
+        Self([value].into())
+    }
+}
+
+impl NixContext {
+    /// Creates an empty context that can be populated
+    /// and passed to form a contextful [NixString], albeit
+    /// if the context is concretly empty, the resulting [NixString]
+    /// will be contextless.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// For internal consumers, we let people observe
+    /// if the [NixContext] is actually empty or not
+    /// to decide whether they want to skip the allocation
+    /// of a full blown [HashSet].
+    pub(crate) fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    /// Consumes a new [NixContextElement] and add it if not already
+    /// present in this context.
+    pub fn append(mut self, other: NixContextElement) -> Self {
+        self.0.insert(other);
+        self
+    }
+
+    /// Consumes both ends of the join into a new NixContent
+    /// containing the union of elements of both ends.
+    pub fn join(mut self, other: &mut NixContext) -> Self {
+        let other_set = std::mem::take(&mut other.0);
+        let mut set: HashSet<NixContextElement> = std::mem::take(&mut self.0);
+        set.extend(other_set);
+        Self(set)
+    }
+
+    /// Copies from another [NixString] its context strings
+    /// in this context.
+    pub fn mimic(&mut self, other: &NixString) {
+        if let Some(ref context) = other.1 {
+            self.0.extend(context.iter().cloned());
+        }
+    }
+
+    /// Iterates over "plain" context elements, e.g. sources imported
+    /// in the store without more information, i.e. `toFile` or coerced imported paths.
+    /// It yields paths to the store.
+    pub fn iter_plain(&self) -> impl Iterator<Item = &str> {
+        self.iter().filter_map(|elt| {
+            if let NixContextElement::Plain(s) = elt {
+                Some(s.as_str())
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Iterates over "full derivations" context elements, e.g. something
+    /// referring to their `drvPath`, i.e. their full sources and binary closure.
+    /// It yields derivation paths.
+    pub fn iter_derivation(&self) -> impl Iterator<Item = &str> {
+        self.iter().filter_map(|elt| {
+            if let NixContextElement::Derivation(s) = elt {
+                Some(s.as_str())
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Iterates over "single" context elements, e.g. single derived paths,
+    /// or also known as the single output of a given derivation.
+    /// The first element of the tuple is the output name
+    /// and the second element is the derivation path.
+    pub fn iter_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> {
+        self.iter().filter_map(|elt| {
+            if let NixContextElement::Single { name, derivation } = elt {
+                Some((name.as_str(), derivation.as_str()))
+            } else {
+                None
+            }
+        })
+    }
+
+    /// Iterates over any element of the context.
+    pub fn iter(&self) -> impl Iterator<Item = &NixContextElement> {
+        self.0.iter()
+    }
+
+    /// Produces a list of owned references to this current context,
+    /// no matter its type.
+    pub fn to_owned_references(self) -> Vec<String> {
+        self.0
+            .into_iter()
+            .map(|ctx| match ctx {
+                NixContextElement::Derivation(drv_path) => drv_path,
+                NixContextElement::Plain(store_path) => store_path,
+                NixContextElement::Single { derivation, .. } => derivation,
+            })
+            .collect()
+    }
+}
+
+// FIXME: when serializing, ignore the context?
+#[derive(Clone, Debug, Serialize)]
+pub struct NixString(BString, Option<NixContext>);
+
+impl PartialEq for NixString {
+    fn eq(&self, other: &Self) -> bool {
+        self.as_bstr() == other.as_bstr()
+    }
+}
+
+impl Eq for NixString {}
+
+impl PartialEq<&[u8]> for NixString {
+    fn eq(&self, other: &&[u8]) -> bool {
+        **self == **other
+    }
+}
+
+impl PartialEq<&str> for NixString {
+    fn eq(&self, other: &&str) -> bool {
+        **self == other.as_bytes()
+    }
+}
+
+impl PartialOrd for NixString {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for NixString {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.as_bstr().cmp(other.as_bstr())
+    }
+}
+
+impl From<&BStr> for NixString {
+    fn from(value: &BStr) -> Self {
+        Self(value.to_owned(), None)
+    }
+}
+
+impl From<&[u8]> for NixString {
+    fn from(value: &[u8]) -> Self {
+        Self(value.into(), None)
+    }
+}
+
+impl From<Vec<u8>> for NixString {
+    fn from(value: Vec<u8>) -> Self {
+        Self(value.into(), None)
+    }
+}
+
+impl From<&str> for NixString {
+    fn from(s: &str) -> Self {
+        Self(s.as_bytes().into(), None)
+    }
+}
+
+impl From<String> for NixString {
+    fn from(s: String) -> Self {
+        Self(s.into(), None)
+    }
+}
+
+impl From<(String, Option<NixContext>)> for NixString {
+    fn from((s, ctx): (String, Option<NixContext>)) -> Self {
+        NixString(s.into(), ctx)
+    }
+}
+
+impl From<Box<str>> for NixString {
+    fn from(s: Box<str>) -> Self {
+        Self(s.into_boxed_bytes().into_vec().into(), None)
+    }
+}
+
+impl From<ast::Ident> for NixString {
+    fn from(ident: ast::Ident) -> Self {
+        ident.ident_token().unwrap().text().into()
+    }
+}
+
+impl<'a> From<&'a NixString> for &'a BStr {
+    fn from(s: &'a NixString) -> Self {
+        BStr::new(&*s.0)
+    }
+}
+
+impl From<NixString> for BString {
+    fn from(s: NixString) -> Self {
+        s.0
+    }
+}
+
+impl AsRef<[u8]> for NixString {
+    fn as_ref(&self) -> &[u8] {
+        &self.0
+    }
+}
+
+impl Borrow<BStr> for NixString {
+    fn borrow(&self) -> &BStr {
+        self.as_bstr()
+    }
+}
+
+impl Hash for NixString {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.as_bstr().hash(state)
+    }
+}
+
+impl<'de> Deserialize<'de> for NixString {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        struct StringVisitor;
+
+        impl<'de> Visitor<'de> for StringVisitor {
+            type Value = NixString;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                formatter.write_str("a valid Nix string")
+            }
+
+            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                Ok(v.into())
+            }
+
+            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                Ok(v.into())
+            }
+        }
+
+        deserializer.deserialize_string(StringVisitor)
+    }
+}
+
+impl Deref for NixString {
+    type Target = BString;
+
+    fn deref(&self) -> &Self::Target {
+        &self.0
+    }
+}
+
+#[cfg(feature = "arbitrary")]
+mod arbitrary {
+    use super::*;
+    use proptest::prelude::{any_with, Arbitrary};
+    use proptest::strategy::{BoxedStrategy, Strategy};
+
+    impl Arbitrary for NixString {
+        type Parameters = <String as Arbitrary>::Parameters;
+
+        type Strategy = BoxedStrategy<Self>;
+
+        fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
+            any_with::<String>(args).prop_map(Self::from).boxed()
+        }
+    }
+}
+
+impl NixString {
+    pub fn new_inherit_context_from(other: &NixString, new_contents: BString) -> Self {
+        Self(new_contents, other.1.clone())
+    }
+
+    pub fn new_context_from(context: NixContext, contents: BString) -> Self {
+        Self(
+            contents,
+            if context.is_empty() {
+                None
+            } else {
+                Some(context)
+            },
+        )
+    }
+
+    pub fn as_mut_bstring(&mut self) -> &mut BString {
+        &mut self.0
+    }
+
+    pub fn as_bstr(&self) -> &BStr {
+        BStr::new(self.as_bytes())
+    }
+
+    pub fn as_bytes(&self) -> &[u8] {
+        &self.0
+    }
+
+    pub fn into_bstring(self) -> BString {
+        self.0
+    }
+
+    /// Return a displayable representation of the string as an
+    /// identifier.
+    ///
+    /// This is used when printing out strings used as e.g. attribute
+    /// set keys, as those are only escaped in the presence of special
+    /// characters.
+    pub fn ident_str(&self) -> Cow<str> {
+        let escaped = match self.to_str_lossy() {
+            Cow::Borrowed(s) => nix_escape_string(s),
+            Cow::Owned(s) => nix_escape_string(&s).into_owned().into(),
+        };
+        match escaped {
+            // A borrowed string is unchanged and can be returned as
+            // is.
+            Cow::Borrowed(_) => {
+                if is_valid_nix_identifier(&escaped) && !is_keyword(&escaped) {
+                    escaped
+                } else {
+                    Cow::Owned(format!("\"{}\"", escaped))
+                }
+            }
+
+            // An owned string has escapes, and needs the outer quotes
+            // for display.
+            Cow::Owned(s) => Cow::Owned(format!("\"{}\"", s)),
+        }
+    }
+
+    pub fn concat(&self, other: &Self) -> Self {
+        let mut s = self.to_vec();
+        s.extend(other.0.as_slice());
+
+        let context = [&self.1, &other.1]
+            .into_iter()
+            .flatten()
+            .fold(NixContext::new(), |acc_ctx, new_ctx| {
+                acc_ctx.join(&mut new_ctx.clone())
+            });
+        Self::new_context_from(context, s.into())
+    }
+
+    pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> {
+        return self.1.as_mut();
+    }
+
+    pub fn iter_context(&self) -> impl Iterator<Item = &NixContext> {
+        return self.1.iter();
+    }
+
+    pub fn iter_plain(&self) -> impl Iterator<Item = &str> {
+        return self.1.iter().flat_map(|context| context.iter_plain());
+    }
+
+    pub fn iter_derivation(&self) -> impl Iterator<Item = &str> {
+        return self.1.iter().flat_map(|context| context.iter_derivation());
+    }
+
+    pub fn iter_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> {
+        return self
+            .1
+            .iter()
+            .flat_map(|context| context.iter_single_outputs());
+    }
+
+    /// Returns whether this Nix string possess a context or not.
+    pub fn has_context(&self) -> bool {
+        self.1.is_some()
+    }
+
+    /// This clears the context of that string, losing
+    /// all dependency tracking information.
+    pub fn clear_context(&mut self) {
+        self.1 = None;
+    }
+
+    pub fn chars(&self) -> Chars<'_> {
+        self.0.chars()
+    }
+}
+
+fn nix_escape_char(ch: char, next: Option<&char>) -> Option<&'static str> {
+    match (ch, next) {
+        ('\\', _) => Some("\\\\"),
+        ('"', _) => Some("\\\""),
+        ('\n', _) => Some("\\n"),
+        ('\t', _) => Some("\\t"),
+        ('\r', _) => Some("\\r"),
+        ('$', Some('{')) => Some("\\$"),
+        _ => None,
+    }
+}
+
+/// Return true if this string is a keyword -- character strings
+/// which lexically match the "identifier" production but are not
+/// parsed as identifiers.  See also cppnix commit
+/// b72bc4a972fe568744d98b89d63adcd504cb586c.
+fn is_keyword(s: &str) -> bool {
+    matches!(
+        s,
+        "if" | "then" | "else" | "assert" | "with" | "let" | "in" | "rec" | "inherit"
+    )
+}
+
+/// Return true if this string can be used as an identifier in Nix.
+fn is_valid_nix_identifier(s: &str) -> bool {
+    // adapted from rnix-parser's tokenizer.rs
+    let mut chars = s.chars();
+    match chars.next() {
+        Some('a'..='z' | 'A'..='Z' | '_') => (),
+        _ => return false,
+    }
+    for c in chars {
+        match c {
+            'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => (),
+            _ => return false,
+        }
+    }
+    true
+}
+
+/// Escape a Nix string for display, as most user-visible representation
+/// are escaped strings.
+///
+/// Note that this does not add the outer pair of surrounding quotes.
+fn nix_escape_string(input: &str) -> Cow<str> {
+    let mut iter = input.char_indices().peekable();
+
+    while let Some((i, c)) = iter.next() {
+        if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) {
+            let mut escaped = String::with_capacity(input.len());
+            escaped.push_str(&input[..i]);
+            escaped.push_str(esc);
+
+            // In theory we calculate how many bytes it takes to represent `esc`
+            // in UTF-8 and use that for the offset. It is, however, safe to
+            // assume that to be 1, as all characters that can be escaped in a
+            // Nix string are ASCII.
+            let mut inner_iter = input[i + 1..].chars().peekable();
+            while let Some(c) = inner_iter.next() {
+                match nix_escape_char(c, inner_iter.peek()) {
+                    Some(esc) => escaped.push_str(esc),
+                    None => escaped.push(c),
+                }
+            }
+
+            return Cow::Owned(escaped);
+        }
+    }
+
+    Cow::Borrowed(input)
+}
+
+impl Display for NixString {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str("\"")?;
+        f.write_str(&nix_escape_string(&self.to_str_lossy()))?;
+        f.write_str("\"")
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::properties::{eq_laws, hash_laws, ord_laws};
+
+    eq_laws!(NixString);
+    hash_laws!(NixString);
+    ord_laws!(NixString);
+}