//! This module implements Nix language strings.
//!
//! Nix language strings never need to be modified on the language
//! level, allowing us to shave off some memory overhead and only
//! paying the cost when creating new strings.
use rnix::ast;
use std::ffi::OsStr;
use std::hash::Hash;
use std::ops::Deref;
use std::path::Path;
use std::{borrow::Cow, fmt::Display, str::Chars};
use serde::de::{Deserializer, Visitor};
use serde::{Deserialize, Serialize};
#[repr(transparent)]
#[derive(Clone, Debug, Serialize)]
pub struct NixString(Box<str>);
impl PartialEq for NixString {
fn eq(&self, other: &Self) -> bool {
self.as_str() == other.as_str()
}
}
impl Eq for NixString {}
impl PartialOrd for NixString {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.as_str().partial_cmp(other.as_str())
}
}
impl Ord for NixString {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.as_str().cmp(other.as_str())
}
}
impl From<&str> for NixString {
fn from(s: &str) -> Self {
NixString(Box::from(s))
}
}
impl From<String> for NixString {
fn from(s: String) -> Self {
NixString(s.into_boxed_str())
}
}
impl From<Box<str>> for NixString {
fn from(s: Box<str>) -> Self {
Self(s)
}
}
impl From<ast::Ident> for NixString {
fn from(ident: ast::Ident) -> Self {
ident.ident_token().unwrap().text().into()
}
}
impl Hash for NixString {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.as_str().hash(state)
}
}
impl<'de> Deserialize<'de> for NixString {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: Deserializer<'de>,
{
struct StringVisitor;
impl<'de> Visitor<'de> for StringVisitor {
type Value = NixString;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
formatter.write_str("a valid Nix string")
}
fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(v.into())
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
Ok(v.into())
}
}
deserializer.deserialize_string(StringVisitor)
}
}
#[cfg(feature = "arbitrary")]
mod arbitrary {
use super::*;
use proptest::prelude::{any_with, Arbitrary};
use proptest::strategy::{BoxedStrategy, Strategy};
impl Arbitrary for NixString {
type Parameters = <String as Arbitrary>::Parameters;
type Strategy = BoxedStrategy<Self>;
fn arbitrary_with(args: Self::Parameters) -> Self::Strategy {
any_with::<String>(args).prop_map(Self::from).boxed()
}
}
}
impl NixString {
pub fn as_str(&self) -> &str {
&self.0
}
/// Return a displayable representation of the string as an
/// identifier.
///
/// This is used when printing out strings used as e.g. attribute
/// set keys, as those are only escaped in the presence of special
/// characters.
pub fn ident_str(&self) -> Cow<str> {
let escaped = nix_escape_string(self.as_str());
match escaped {
// A borrowed string is unchanged and can be returned as
// is.
Cow::Borrowed(_) => {
if is_valid_nix_identifier(&escaped) && !is_keyword(&escaped) {
escaped
} else {
Cow::Owned(format!("\"{}\"", escaped))
}
}
// An owned string has escapes, and needs the outer quotes
// for display.
Cow::Owned(s) => Cow::Owned(format!("\"{}\"", s)),
}
}
pub fn concat(&self, other: &Self) -> Self {
let mut s = self.as_str().to_owned();
s.push_str(other.as_str());
NixString(s.into_boxed_str())
}
pub fn chars(&self) -> Chars<'_> {
self.0.chars()
}
}
fn nix_escape_char(ch: char, next: Option<&char>) -> Option<&'static str> {
match (ch, next) {
('\\', _) => Some("\\\\"),
('"', _) => Some("\\\""),
('\n', _) => Some("\\n"),
('\t', _) => Some("\\t"),
('\r', _) => Some("\\r"),
('$', Some('{')) => Some("\\$"),
_ => None,
}
}
/// Return true if this string is a keyword -- character strings
/// which lexically match the "identifier" production but are not
/// parsed as identifiers. See also cppnix commit
/// b72bc4a972fe568744d98b89d63adcd504cb586c.
fn is_keyword(s: &str) -> bool {
matches!(
s,
"if" | "then" | "else" | "assert" | "with" | "let" | "in" | "rec" | "inherit"
)
}
/// Return true if this string can be used as an identifier in Nix.
fn is_valid_nix_identifier(s: &str) -> bool {
// adapted from rnix-parser's tokenizer.rs
let mut chars = s.chars();
match chars.next() {
Some('a'..='z' | 'A'..='Z' | '_') => (),
_ => return false,
}
for c in chars {
match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => (),
_ => return false,
}
}
true
}
/// Escape a Nix string for display, as most user-visible representation
/// are escaped strings.
///
/// Note that this does not add the outer pair of surrounding quotes.
fn nix_escape_string(input: &str) -> Cow<str> {
let mut iter = input.char_indices().peekable();
while let Some((i, c)) = iter.next() {
if let Some(esc) = nix_escape_char(c, iter.peek().map(|(_, c)| c)) {
let mut escaped = String::with_capacity(input.len());
escaped.push_str(&input[..i]);
escaped.push_str(esc);
// In theory we calculate how many bytes it takes to represent `esc`
// in UTF-8 and use that for the offset. It is, however, safe to
// assume that to be 1, as all characters that can be escaped in a
// Nix string are ASCII.
let mut inner_iter = input[i + 1..].chars().peekable();
while let Some(c) = inner_iter.next() {
match nix_escape_char(c, inner_iter.peek()) {
Some(esc) => escaped.push_str(esc),
None => escaped.push(c),
}
}
return Cow::Owned(escaped);
}
}
Cow::Borrowed(input)
}
impl Display for NixString {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("\"")?;
f.write_str(&nix_escape_string(self.as_str()))?;
f.write_str("\"")
}
}
impl AsRef<str> for NixString {
fn as_ref(&self) -> &str {
self.as_str()
}
}
impl AsRef<OsStr> for NixString {
fn as_ref(&self) -> &OsStr {
self.as_str().as_ref()
}
}
impl AsRef<Path> for NixString {
fn as_ref(&self) -> &Path {
self.as_str().as_ref()
}
}
impl Deref for NixString {
type Target = str;
fn deref(&self) -> &Self::Target {
self.as_str()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::properties::{eq_laws, hash_laws, ord_laws};
eq_laws!(NixString);
hash_laws!(NixString);
ord_laws!(NixString);
}