diff options
Diffstat (limited to 'tvix/eval/src/value/string.rs')
-rw-r--r-- | tvix/eval/src/value/string.rs | 277 |
1 files changed, 277 insertions, 0 deletions
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs new file mode 100644 index 000000000000..8ffbc2a5325c --- /dev/null +++ b/tvix/eval/src/value/string.rs @@ -0,0 +1,277 @@ +//! This module implements Nix language strings. +//! +//! Nix language strings never need to be modified on the language +//! level, allowing us to shave off some memory overhead and only +//! paying the cost when creating new strings. +use rnix::ast; +use std::ffi::OsStr; +use std::hash::Hash; +use std::ops::Deref; +use std::path::Path; +use std::{borrow::Cow, fmt::Display, str::Chars}; + +use serde::de::{Deserializer, Visitor}; +use serde::{Deserialize, Serialize}; + +#[repr(transparent)] +#[derive(Clone, Debug, Serialize)] +pub struct NixString(Box<str>); + +impl PartialEq for NixString { + fn eq(&self, other: &Self) -> bool { + self.as_str() == other.as_str() + } +} + +impl Eq for NixString {} + +impl PartialOrd for NixString { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + self.as_str().partial_cmp(other.as_str()) + } +} + +impl Ord for NixString { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.as_str().cmp(other.as_str()) + } +} + +impl From<&str> for NixString { + fn from(s: &str) -> Self { + NixString(Box::from(s)) + } +} + +impl From<String> for NixString { + fn from(s: String) -> Self { + NixString(s.into_boxed_str()) + } +} + +impl From<Box<str>> for NixString { + fn from(s: Box<str>) -> Self { + Self(s) + } +} + +impl From<ast::Ident> for NixString { + fn from(ident: ast::Ident) -> Self { + ident.ident_token().unwrap().text().into() + } +} + +impl Hash for NixString { + fn hash<H: std::hash::Hasher>(&self, state: &mut H) { + self.as_str().hash(state) + } +} + +impl<'de> Deserialize<'de> for NixString { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + struct StringVisitor; + + impl<'de> Visitor<'de> for StringVisitor { + type Value = NixString; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + formatter.write_str("a valid Nix string") + } + + fn visit_string<E>(self, v: String) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(v.into()) + } + + fn visit_str<E>(self, v: &str) -> Result<Self::Value, E> + where + E: serde::de::Error, + { + Ok(v.into()) + } + } + + deserializer.deserialize_string(StringVisitor) + } +} + +#[cfg(feature = "arbitrary")] +mod arbitrary { + use super::*; + use proptest::prelude::{any_with, Arbitrary}; + use proptest::strategy::{BoxedStrategy, Strategy}; + + impl Arbitrary for NixString { + type Parameters = <String as Arbitrary>::Parameters; + + type Strategy = BoxedStrategy<Self>; + + fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { + any_with::<String>(args).prop_map(Self::from).boxed() + } + } +} + +impl NixString { + pub fn as_str(&self) -> &str { + &self.0 + } + + /// Return a displayable representation of the string as an + /// identifier. + /// + /// This is used when printing out strings used as e.g. attribute + /// set keys, as those are only escaped in the presence of special + /// characters. + pub fn ident_str(&self) -> Cow<str> { + let escaped = nix_escape_string(self.as_str()); + + match escaped { + // A borrowed string is unchanged and can be returned as + // is. + Cow::Borrowed(_) => { + if is_valid_nix_identifier(&escaped) && !is_keyword(&escaped) { + escaped + } else { + Cow::Owned(format!("\"{}\"", escaped)) + } + } + + // An owned string has escapes, and needs the outer quotes + // for display. + Cow::Owned(s) => Cow::Owned(format!("\"{}\"", s)), + } + } + + pub fn concat(&self, other: &Self) -> Self { + let mut s = self.as_str().to_owned(); + s.push_str(other.as_str()); + NixString(s.into_boxed_str()) + } + + pub fn chars(&self) -> Chars<'_> { + self.0.chars() + } +} + +fn nix_escape_char(ch: char, next: Option<&char>) -> Option<&'static str> { + match (ch, next) { + ('\\', _) => Some("\\\\"), + ('"', _) => Some("\\\""), + ('\n', _) => Some("\\n"), + ('\t', _) => Some("\\t"), + ('\r', _) => Some("\\r"), + ('$', Some('{')) => Some("\\$"), + _ => None, + } +} + +/// Return true if this string is a keyword -- character strings +/// which lexically match the "identifier" production but are not +/// parsed as identifiers. See also cppnix commit +/// b72bc4a972fe568744d98b89d63adcd504cb586c. +fn is_keyword(s: &str) -> bool { + match s { + "if" | "then" | "else" | "assert" | "with" | "let" | "in" | "rec" | "inherit" => true, + _ => false, + } +} + +/// Return true if this string can be used as an identifier in Nix. +fn is_valid_nix_identifier(s: &str) -> bool { + // adapted from rnix-parser's tokenizer.rs + let mut chars = s.chars(); + match chars.next() { + Some('a'..='z' | 'A'..='Z' | '_') => (), + _ => return false, + } + for c in chars { + match c { + 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => (), + _ => return false, + } + } + true +} + +/// Escape a Nix string for display, as most user-visible representation +/// are escaped strings. +/// +/// Note that this does not add the outer pair of surrounding quotes. +fn nix_escape_string(input: &str) -> Cow<str> { + let mut iter = input.char_indices().peekable(); + + while let Some((i, c)) = iter.next() { + if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) { + let mut escaped = String::with_capacity(input.len()); + escaped.push_str(&input[..i]); + escaped.push_str(esc); + + // In theory we calculate how many bytes it takes to represent `esc` + // in UTF-8 and use that for the offset. It is, however, safe to + // assume that to be 1, as all characters that can be escaped in a + // Nix string are ASCII. + let mut inner_iter = input[i + 1..].chars().peekable(); + while let Some(c) = inner_iter.next() { + match nix_escape_char(c, inner_iter.peek()) { + Some(esc) => escaped.push_str(esc), + None => escaped.push(c), + } + } + + return Cow::Owned(escaped); + } + } + + Cow::Borrowed(input) +} + +impl Display for NixString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str("\"")?; + f.write_str(&nix_escape_string(self.as_str()))?; + f.write_str("\"") + } +} + +impl AsRef<str> for NixString { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl AsRef<OsStr> for NixString { + fn as_ref(&self) -> &OsStr { + self.as_str().as_ref() + } +} + +impl AsRef<Path> for NixString { + fn as_ref(&self) -> &Path { + self.as_str().as_ref() + } +} + +impl Deref for NixString { + type Target = str; + + fn deref(&self) -> &Self::Target { + self.as_str() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::properties::{eq_laws, hash_laws, ord_laws}; + + eq_laws!(NixString); + hash_laws!(NixString); + ord_laws!(NixString); +} |