about summary refs log tree commit diff
path: root/tvix/eval/src/value/string.rs
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/eval/src/value/string.rs')
-rw-r--r--tvix/eval/src/value/string.rs277
1 files changed, 277 insertions, 0 deletions
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs
new file mode 100644
index 000000000000..8ffbc2a5325c
--- /dev/null
+++ b/tvix/eval/src/value/string.rs
@@ -0,0 +1,277 @@
+//! This module implements Nix language strings.
+//!
+//! Nix language strings never need to be modified on the language
+//! level, allowing us to shave off some memory overhead and only
+//! paying the cost when creating new strings.
+use rnix::ast;
+use std::ffi::OsStr;
+use std::hash::Hash;
+use std::ops::Deref;
+use std::path::Path;
+use std::{borrow::Cow, fmt::Display, str::Chars};
+
+use serde::de::{Deserializer, Visitor};
+use serde::{Deserialize, Serialize};
+
+#[repr(transparent)]
+#[derive(Clone, Debug, Serialize)]
+pub struct NixString(Box<str>);
+
+impl PartialEq for NixString {
+    fn eq(&self, other: &Self) -> bool {
+        self.as_str() == other.as_str()
+    }
+}
+
+impl Eq for NixString {}
+
+impl PartialOrd for NixString {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        self.as_str().partial_cmp(other.as_str())
+    }
+}
+
+impl Ord for NixString {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.as_str().cmp(other.as_str())
+    }
+}
+
+impl From<&str> for NixString {
+    fn from(s: &str) -> Self {
+        NixString(Box::from(s))
+    }
+}
+
+impl From<String> for NixString {
+    fn from(s: String) -> Self {
+        NixString(s.into_boxed_str())
+    }
+}
+
+impl From<Box<str>> for NixString {
+    fn from(s: Box<str>) -> Self {
+        Self(s)
+    }
+}
+
+impl From<ast::Ident> for NixString {
+    fn from(ident: ast::Ident) -> Self {
+        ident.ident_token().unwrap().text().into()
+    }
+}
+
+impl Hash for NixString {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.as_str().hash(state)
+    }
+}
+
+impl<'de> Deserialize<'de> for NixString {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        struct StringVisitor;
+
+        impl<'de> Visitor<'de> for StringVisitor {
+            type Value = NixString;
+
+            fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
+                formatter.write_str("a valid Nix string")
+            }
+
+            fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                Ok(v.into())
+            }
+
+            fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
+            where
+                E: serde::de::Error,
+            {
+                Ok(v.into())
+            }
+        }
+
+        deserializer.deserialize_string(StringVisitor)
+    }
+}
+
+#[cfg(feature = "arbitrary")]
+mod arbitrary {
+    use super::*;
+    use proptest::prelude::{any_with, Arbitrary};
+    use proptest::strategy::{BoxedStrategy, Strategy};
+
+    impl Arbitrary for NixString {
+        type Parameters = <String as Arbitrary>::Parameters;
+
+        type Strategy = BoxedStrategy<Self>;
+
+        fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
+            any_with::<String>(args).prop_map(Self::from).boxed()
+        }
+    }
+}
+
+impl NixString {
+    pub fn as_str(&self) -> &str {
+        &self.0
+    }
+
+    /// Return a displayable representation of the string as an
+    /// identifier.
+    ///
+    /// This is used when printing out strings used as e.g. attribute
+    /// set keys, as those are only escaped in the presence of special
+    /// characters.
+    pub fn ident_str(&self) -> Cow<str> {
+        let escaped = nix_escape_string(self.as_str());
+
+        match escaped {
+            // A borrowed string is unchanged and can be returned as
+            // is.
+            Cow::Borrowed(_) => {
+                if is_valid_nix_identifier(&escaped) && !is_keyword(&escaped) {
+                    escaped
+                } else {
+                    Cow::Owned(format!("\"{}\"", escaped))
+                }
+            }
+
+            // An owned string has escapes, and needs the outer quotes
+            // for display.
+            Cow::Owned(s) => Cow::Owned(format!("\"{}\"", s)),
+        }
+    }
+
+    pub fn concat(&self, other: &Self) -> Self {
+        let mut s = self.as_str().to_owned();
+        s.push_str(other.as_str());
+        NixString(s.into_boxed_str())
+    }
+
+    pub fn chars(&self) -> Chars<'_> {
+        self.0.chars()
+    }
+}
+
+fn nix_escape_char(ch: char, next: Option<&char>) -> Option<&'static str> {
+    match (ch, next) {
+        ('\\', _) => Some("\\\\"),
+        ('"', _) => Some("\\\""),
+        ('\n', _) => Some("\\n"),
+        ('\t', _) => Some("\\t"),
+        ('\r', _) => Some("\\r"),
+        ('$', Some('{')) => Some("\\$"),
+        _ => None,
+    }
+}
+
+/// Return true if this string is a keyword -- character strings
+/// which lexically match the "identifier" production but are not
+/// parsed as identifiers.  See also cppnix commit
+/// b72bc4a972fe568744d98b89d63adcd504cb586c.
+fn is_keyword(s: &str) -> bool {
+    match s {
+        "if" | "then" | "else" | "assert" | "with" | "let" | "in" | "rec" | "inherit" => true,
+        _ => false,
+    }
+}
+
+/// Return true if this string can be used as an identifier in Nix.
+fn is_valid_nix_identifier(s: &str) -> bool {
+    // adapted from rnix-parser's tokenizer.rs
+    let mut chars = s.chars();
+    match chars.next() {
+        Some('a'..='z' | 'A'..='Z' | '_') => (),
+        _ => return false,
+    }
+    for c in chars {
+        match c {
+            'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => (),
+            _ => return false,
+        }
+    }
+    true
+}
+
+/// Escape a Nix string for display, as most user-visible representation
+/// are escaped strings.
+///
+/// Note that this does not add the outer pair of surrounding quotes.
+fn nix_escape_string(input: &str) -> Cow<str> {
+    let mut iter = input.char_indices().peekable();
+
+    while let Some((i, c)) = iter.next() {
+        if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) {
+            let mut escaped = String::with_capacity(input.len());
+            escaped.push_str(&input[..i]);
+            escaped.push_str(esc);
+
+            // In theory we calculate how many bytes it takes to represent `esc`
+            // in UTF-8 and use that for the offset. It is, however, safe to
+            // assume that to be 1, as all characters that can be escaped in a
+            // Nix string are ASCII.
+            let mut inner_iter = input[i + 1..].chars().peekable();
+            while let Some(c) = inner_iter.next() {
+                match nix_escape_char(c, inner_iter.peek()) {
+                    Some(esc) => escaped.push_str(esc),
+                    None => escaped.push(c),
+                }
+            }
+
+            return Cow::Owned(escaped);
+        }
+    }
+
+    Cow::Borrowed(input)
+}
+
+impl Display for NixString {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str("\"")?;
+        f.write_str(&nix_escape_string(self.as_str()))?;
+        f.write_str("\"")
+    }
+}
+
+impl AsRef<str> for NixString {
+    fn as_ref(&self) -> &str {
+        self.as_str()
+    }
+}
+
+impl AsRef<OsStr> for NixString {
+    fn as_ref(&self) -> &OsStr {
+        self.as_str().as_ref()
+    }
+}
+
+impl AsRef<Path> for NixString {
+    fn as_ref(&self) -> &Path {
+        self.as_str().as_ref()
+    }
+}
+
+impl Deref for NixString {
+    type Target = str;
+
+    fn deref(&self) -> &Self::Target {
+        self.as_str()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    use crate::properties::{eq_laws, hash_laws, ord_laws};
+
+    eq_laws!(NixString);
+    hash_laws!(NixString);
+    ord_laws!(NixString);
+}