//! This module implements `builtins.toXML`, which is a serialisation //! of value information as well as internal tvix state that several //! things in nixpkgs rely on. use bstr::ByteSlice; use std::borrow::Cow; use std::{io::Write, rc::Rc}; use crate::{ErrorKind, NixContext, NixContextElement, Value}; /// Recursively serialise a value to XML. The value *must* have been /// deep-forced before being passed to this function. /// On success, returns the NixContext. pub fn value_to_xml<W: Write>(mut writer: W, value: &Value) -> Result<NixContext, ErrorKind> { // Write a literal document declaration, using C++-Nix-style // single quotes. writeln!(writer, "<?xml version='1.0' encoding='utf-8'?>")?; let mut emitter = XmlEmitter::new(writer); emitter.write_open_tag("expr", &[])?; value_variant_to_xml(&mut emitter, value)?; emitter.write_closing_tag("expr")?; Ok(emitter.into_context()) } fn write_typed_value<W: Write, V: ToString>( w: &mut XmlEmitter<W>, name_unescaped: &str, value: V, ) -> Result<(), ErrorKind> { w.write_self_closing_tag(name_unescaped, &[("value", &value.to_string())])?; Ok(()) } fn value_variant_to_xml<W: Write>(w: &mut XmlEmitter<W>, value: &Value) -> Result<(), ErrorKind> { match value { Value::Thunk(t) => return value_variant_to_xml(w, &t.value()), Value::Null => { w.write_open_tag("null", &[])?; w.write_closing_tag("null")?; } Value::Bool(b) => return write_typed_value(w, "bool", b), Value::Integer(i) => return write_typed_value(w, "int", i), Value::Float(f) => return write_typed_value(w, "float", f), Value::String(s) => { if let Some(context) = s.context() { w.extend_context(context.iter().cloned()); } return write_typed_value(w, "string", s.to_str()?); } Value::Path(p) => return write_typed_value(w, "path", p.to_string_lossy()), Value::List(list) => { w.write_open_tag("list", &[])?; for elem in list.into_iter() { value_variant_to_xml(w, elem)?; } w.write_closing_tag("list")?; } Value::Attrs(attrs) => { w.write_open_tag("attrs", &[])?; for elem in attrs.iter() { w.write_open_tag("attr", &[("name", &elem.0.to_str_lossy())])?; value_variant_to_xml(w, elem.1)?; w.write_closing_tag("attr")?; } w.write_closing_tag("attrs")?; } Value::Closure(c) => { w.write_open_tag("function", &[])?; match &c.lambda.formals { Some(formals) => { let mut attrs: Vec<(&str, &str)> = Vec::with_capacity(2); if formals.ellipsis { attrs.push(("ellipsis", "1")); } if let Some(ref name) = &formals.name { attrs.push(("name", name.as_str())); } w.write_open_tag("attrspat", &attrs)?; for arg in formals.arguments.iter() { w.write_self_closing_tag("attr", &[("name", &arg.0.to_str_lossy())])?; } w.write_closing_tag("attrspat")?; } None => { // TODO(tazjin): tvix does not currently persist function // argument names anywhere (whereas we do for formals, as // that is required for other runtime behaviour). Because of // this the implementation here is fake, always returning // the same argument name. // // If we don't want to persist the data, we can re-parse the // AST from the spans of the lambda's bytecode and figure it // out that way, but it needs some investigating. w.write_self_closing_tag("varpat", &[("name", /* fake: */ "x")])?; } } w.write_closing_tag("function")?; } Value::Builtin(_) => { w.write_open_tag("unevaluated", &[])?; w.write_closing_tag("unevaluated")?; } Value::AttrNotFound | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) | Value::FinaliseRequest(_) => { return Err(ErrorKind::TvixBug { msg: "internal value variant encountered in builtins.toXML", metadata: Some(Rc::new(value.clone())), }) } Value::Catchable(_) => { panic!("tvix bug: value_to_xml() called on a value which had not been deep-forced") } }; Ok(()) } /// A simple-stupid XML emitter, which implements only the subset needed for byte-by-byte compat with C++ nix’ `builtins.toXML`. struct XmlEmitter<W> { /// The current indentation cur_indent: usize, writer: W, context: NixContext, } impl<W: Write> XmlEmitter<W> { pub fn new(writer: W) -> Self { XmlEmitter { cur_indent: 0, writer, context: Default::default(), } } /// Write an open tag with the given name (which is not escaped!) /// and attributes (Keys are not escaped! Only attribute values are.) pub fn write_open_tag( &mut self, name_unescaped: &str, attrs: &[(&str, &str)], ) -> std::io::Result<()> { self.add_indent()?; self.writer.write_all(b"<")?; self.writer.write_all(name_unescaped.as_bytes())?; self.write_attrs_escape_vals(attrs)?; self.writer.write_all(b">\n")?; self.cur_indent += 2; Ok(()) } /// Write a self-closing open tag with the given name (which is not escaped!) /// and attributes (Keys are not escaped! Only attribute values are.) pub fn write_self_closing_tag( &mut self, name_unescaped: &str, attrs: &[(&str, &str)], ) -> std::io::Result<()> { self.add_indent()?; self.writer.write_all(b"<")?; self.writer.write_all(name_unescaped.as_bytes())?; self.write_attrs_escape_vals(attrs)?; self.writer.write_all(b" />\n")?; Ok(()) } /// Write a closing tag with the given name (which is not escaped!) pub fn write_closing_tag(&mut self, name_unescaped: &str) -> std::io::Result<()> { self.cur_indent -= 2; self.add_indent()?; self.writer.write_all(b"</")?; self.writer.write_all(name_unescaped.as_bytes())?; self.writer.write_all(b">\n")?; Ok(()) } #[inline] fn add_indent(&mut self) -> std::io::Result<()> { self.writer.write_all(&b" ".repeat(self.cur_indent)) } /// Write an attribute list fn write_attrs_escape_vals(&mut self, attrs: &[(&str, &str)]) -> std::io::Result<()> { for (name, val) in attrs { self.writer.write_all(b" ")?; self.writer.write_all(name.as_bytes())?; self.writer.write_all(br#"=""#)?; self.writer .write_all(Self::escape_attr_value(val).as_bytes())?; self.writer.write_all(b"\"")?; } Ok(()) } /// Escape the given attribute value, making sure we only actually clone the string if we needed to replace something. fn escape_attr_value(s: &str) -> Cow<str> { let mut last_escape: usize = 0; let mut res: Cow<str> = Cow::Borrowed(""); // iterating via char_indices gives us the ability to index the original string slice at character boundaries for (idx, c) in s.char_indices() { match Self::should_escape_char(c) { None => {} Some(new) => { // add characters since the last escape we did res += &s[last_escape..idx]; // add the escaped value res += new; last_escape = idx + 1; } } } // we did not need to escape anything, so borrow original string if last_escape == 0 { Cow::Borrowed(s) } else { // add the remaining characters res += &s[last_escape..]; res } } fn should_escape_char(c: char) -> Option<&'static str> { match c { '<' => Some("<"), '>' => Some(">"), '"' => Some("""), '\'' => Some("'"), '&' => Some("&"), '\n' => Some("
"), '\r' => Some("
"), _ => None, } } /// Extends the existing context with more context elements. fn extend_context<T>(&mut self, iter: T) where T: IntoIterator<Item = NixContextElement>, { self.context.extend(iter) } /// Consumes [Self] and returns the [NixContext] collected. fn into_context(self) -> NixContext { self.context } } #[cfg(test)] mod tests { use bytes::buf::Writer; use pretty_assertions::assert_eq; use crate::builtins::to_xml::XmlEmitter; use std::borrow::Cow; #[test] fn xml_gen() { let mut buf = Vec::new(); let mut x = XmlEmitter::new(&mut buf); x.write_open_tag("hello", &[("hi", "it’s me"), ("no", "<escape>")]) .unwrap(); x.write_self_closing_tag("self-closing", &[("tag", "yay")]) .unwrap(); x.write_closing_tag("hello").unwrap(); assert_eq!( std::str::from_utf8(&buf).unwrap(), r##"<hello hi="it’s me" no="<escape>"> <self-closing tag="yay" /> </hello> "## ); } #[test] fn xml_escape() { match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("ab<>c&de") { Cow::Owned(s) => assert_eq!(s, "ab<>c&de".to_string(), "escape stuff"), Cow::Borrowed(s) => panic!("s should be owned {}", s), } match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("") { Cow::Borrowed(s) => assert_eq!(s, "", "empty escape is borrowed"), Cow::Owned(s) => panic!("s should be borrowed {}", s), } match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!ŷbla") { Cow::Borrowed(s) => assert_eq!(s, "hi!ŷbla", "no escape is borrowed"), Cow::Owned(s) => panic!("s should be borrowed {}", s), } match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!<ŷ>bla") { Cow::Owned(s) => assert_eq!( s, "hi!<ŷ>bla".to_string(), "multi-byte chars are correctly used" ), Cow::Borrowed(s) => panic!("s should be owned {}", s), } } }