From 79531c3dab1c24ff3171c0aa067004c8e6c92e3f Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 29 Jul 2023 21:14:44 +0200 Subject: refactor(tvix/nix-compat): support non-unicode Derivations Derivations can have non-unicode strings in their env values, so the ATerm representations are not necessarily String anymore, but Vec. Change-Id: Ic23839471eb7f68d9c3c30667c878830946b6607 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8990 Tested-by: BuildkiteCI Reviewed-by: raitobezarius Autosubmit: flokli --- tvix/nix-compat/src/derivation/write.rs | 145 +++++++++++++++++++------------- 1 file changed, 87 insertions(+), 58 deletions(-) (limited to 'tvix/nix-compat/src/derivation/write.rs') diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs index 52166294e0..cf62f85022 100644 --- a/tvix/nix-compat/src/derivation/write.rs +++ b/tvix/nix-compat/src/derivation/write.rs @@ -3,10 +3,12 @@ //! //! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html +use crate::derivation::escape::escape_bstr; use crate::derivation::output::Output; -use crate::derivation::string_escape::escape_string; +use bstr::BString; use std::collections::BTreeSet; -use std::{collections::BTreeMap, fmt, fmt::Write}; +use std::io::Cursor; +use std::{collections::BTreeMap, io, io::Error, io::Write}; pub const DERIVATION_PREFIX: &str = "Derive"; pub const PAREN_OPEN: char = '('; @@ -16,32 +18,46 @@ pub const BRACKET_CLOSE: char = ']'; pub const COMMA: char = ','; pub const QUOTE: char = '"'; +// Writes a character to the writer. +pub(crate) fn write_char(writer: &mut impl Write, c: char) -> io::Result<()> { + let mut buf = [0; 4]; + let b = c.encode_utf8(&mut buf).as_bytes(); + io::copy(&mut Cursor::new(b), writer)?; + Ok(()) +} + +// Writes a string to the writer (as unicode) +pub(crate) fn write_str(writer: &mut impl Write, s: &str) -> io::Result<()> { + io::copy(&mut Cursor::new(s.as_bytes()), writer)?; + Ok(()) +} + fn write_array_elements( writer: &mut impl Write, quote: bool, open: &str, closing: &str, - elements: Vec<&str>, -) -> Result<(), fmt::Error> { - writer.write_str(open)?; + elements: &[BString], +) -> Result<(), io::Error> { + write_str(writer, open)?; for (index, element) in elements.iter().enumerate() { if index > 0 { - writer.write_char(COMMA)?; + write_char(writer, COMMA)?; } if quote { - writer.write_char(QUOTE)?; + write_char(writer, QUOTE)?; } - writer.write_str(element)?; + io::copy(&mut Cursor::new(element), writer)?; if quote { - writer.write_char(QUOTE)?; + write_char(writer, QUOTE)?; } } - writer.write_str(closing)?; + write_str(writer, closing)?; Ok(()) } @@ -49,41 +65,44 @@ fn write_array_elements( pub fn write_outputs( writer: &mut impl Write, outputs: &BTreeMap, -) -> Result<(), fmt::Error> { - writer.write_char(BRACKET_OPEN)?; +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; for (ii, (output_name, output)) in outputs.iter().enumerate() { if ii > 0 { - writer.write_char(COMMA)?; + write_char(writer, COMMA)?; } - let mut elements: Vec<&str> = vec![output_name, &output.path]; + let mut elements: Vec = vec![ + output_name.as_bytes().to_vec().into(), + output.path.as_bytes().to_vec().into(), + ]; let (e2, e3) = match &output.hash_with_mode { Some(hash) => match hash { crate::nixhash::NixHashWithMode::Flat(h) => ( - h.algo.to_string(), - data_encoding::HEXLOWER.encode(&h.digest), + h.algo.to_string().as_bytes().to_vec(), + data_encoding::HEXLOWER.encode(&h.digest).as_bytes().into(), ), crate::nixhash::NixHashWithMode::Recursive(h) => ( - format!("r:{}", h.algo), - data_encoding::HEXLOWER.encode(&h.digest), + format!("r:{}", h.algo).as_bytes().to_vec(), + data_encoding::HEXLOWER.encode(&h.digest).as_bytes().into(), ), }, - None => ("".to_string(), "".to_string()), + None => (vec![], vec![]), }; - elements.push(&e2); - elements.push(&e3); + elements.push(e2.into()); + elements.push(e3.into()); write_array_elements( writer, true, &PAREN_OPEN.to_string(), &PAREN_CLOSE.to_string(), - elements, + &elements, )? } - writer.write_char(BRACKET_CLOSE)?; + write_char(writer, BRACKET_CLOSE)?; Ok(()) } @@ -91,33 +110,37 @@ pub fn write_outputs( pub fn write_input_derivations( writer: &mut impl Write, input_derivations: &BTreeMap>, -) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; - writer.write_char(BRACKET_OPEN)?; +) -> Result<(), io::Error> { + write_char(writer, COMMA)?; + write_char(writer, BRACKET_OPEN)?; - for (ii, (input_derivation_path, input_derivation)) in input_derivations.iter().enumerate() { + for (ii, (input_derivation_path, input_derivation)) in input_derivations.into_iter().enumerate() + { if ii > 0 { - writer.write_char(COMMA)?; + write_char(writer, COMMA)?; } - writer.write_char(PAREN_OPEN)?; - writer.write_char(QUOTE)?; - writer.write_str(input_derivation_path.as_str())?; - writer.write_char(QUOTE)?; - writer.write_char(COMMA)?; + write_char(writer, PAREN_OPEN)?; + write_char(writer, QUOTE)?; + write_str(writer, input_derivation_path.as_str())?; + write_char(writer, QUOTE)?; + write_char(writer, COMMA)?; write_array_elements( writer, true, &BRACKET_OPEN.to_string(), &BRACKET_CLOSE.to_string(), - input_derivation.iter().map(|s| &**s).collect(), + &input_derivation + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::>(), )?; - writer.write_char(PAREN_CLOSE)?; + write_char(writer, PAREN_CLOSE)?; } - writer.write_char(BRACKET_CLOSE)?; + write_char(writer, BRACKET_CLOSE)?; Ok(()) } @@ -125,39 +148,45 @@ pub fn write_input_derivations( pub fn write_input_sources( writer: &mut impl Write, input_sources: &BTreeSet, -) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; +) -> Result<(), io::Error> { + write_char(writer, COMMA)?; write_array_elements( writer, true, &BRACKET_OPEN.to_string(), &BRACKET_CLOSE.to_string(), - input_sources.iter().map(|s| &**s).collect(), + &input_sources + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::>(), )?; Ok(()) } -pub fn write_system(writer: &mut impl Write, platform: &str) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; - writer.write_str(escape_string(platform).as_str())?; +pub fn write_system(writer: &mut impl Write, platform: &str) -> Result<(), Error> { + write_char(writer, COMMA)?; + io::copy(&mut Cursor::new(escape_bstr(platform.as_bytes())), writer)?; Ok(()) } -pub fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; - writer.write_str(escape_string(builder).as_str())?; +pub fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), Error> { + write_char(writer, COMMA)?; + io::copy(&mut Cursor::new(escape_bstr(builder.as_bytes())), writer)?; Ok(()) } -pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; +pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result<(), io::Error> { + write_char(writer, COMMA)?; write_array_elements( writer, true, &BRACKET_OPEN.to_string(), &BRACKET_CLOSE.to_string(), - arguments.iter().map(|s| &**s).collect(), + &arguments + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::>(), )?; Ok(()) @@ -165,14 +194,14 @@ pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result< pub fn write_enviroment( writer: &mut impl Write, - environment: &BTreeMap, -) -> Result<(), fmt::Error> { - writer.write_char(COMMA)?; - writer.write_char(BRACKET_OPEN)?; - - for (ii, (key, environment)) in environment.iter().enumerate() { - if ii > 0 { - writer.write_char(COMMA)?; + environment: &BTreeMap, +) -> Result<(), io::Error> { + write_char(writer, COMMA)?; + write_char(writer, BRACKET_OPEN)?; + + for (i, (k, v)) in environment.into_iter().enumerate() { + if i > 0 { + write_char(writer, COMMA)?; } write_array_elements( @@ -180,11 +209,11 @@ pub fn write_enviroment( false, &PAREN_OPEN.to_string(), &PAREN_CLOSE.to_string(), - vec![&escape_string(key), &escape_string(environment)], + &[escape_bstr(k.as_bytes()), escape_bstr(v)], )?; } - writer.write_char(BRACKET_CLOSE)?; + write_char(writer, BRACKET_CLOSE)?; Ok(()) } -- cgit 1.4.1