From a44a8985cc0ae126f86d26493d97d80a09b211f8 Mon Sep 17 00:00:00 2001 From: Peter Kolloch Date: Wed, 21 Feb 2024 18:16:36 +0700 Subject: feat(tvix/nix-compat): generalize aterm writing for derivation ...so that we can also use `StorePath`s in derivation.input_derivations. Towards https://b.tvl.fyi/issues/264 Change-Id: I71d296ca273979c70f277a7f4f88a5f76de3d8be Reviewed-on: https://cl.tvl.fyi/c/depot/+/10973 Reviewed-by: Peter Kolloch Autosubmit: Peter Kolloch Tested-by: BuildkiteCI Reviewed-by: flokli --- tvix/nix-compat/src/derivation/mod.rs | 25 +++++++++++---- tvix/nix-compat/src/derivation/write.rs | 54 +++++++++++++++++++++++++++++++-- tvix/nix-compat/src/nixhash/mod.rs | 30 ++++++++++++++++++ 3 files changed, 100 insertions(+), 9 deletions(-) (limited to 'tvix/nix-compat/src') diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index 4d27bbb26d66..3a09c9d56632 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -22,6 +22,8 @@ pub use crate::nixhash::{CAHash, NixHash}; pub use errors::{DerivationError, OutputError}; pub use output::Output; +use self::write::AtermWriteable; + #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] pub struct Derivation { #[serde(rename = "args")] @@ -58,7 +60,7 @@ impl Derivation { fn serialize_with_replacements( &self, writer: &mut impl std::io::Write, - input_derivations: &BTreeMap>, + input_derivations: &BTreeMap>, ) -> Result<(), io::Error> { use write::*; @@ -68,7 +70,19 @@ impl Derivation { write_outputs(writer, &self.outputs)?; write_char(writer, COMMA)?; - write_input_derivations(writer, input_derivations)?; + // To reproduce the exact hashes of original nix, we need to sort + // these by their aterm representation. + // StorePath has a different sort order, so we convert it here. + let input_derivations: BTreeMap> = input_derivations + .iter() + .map(|(k, v)| { + let mut aterm_k = Vec::new(); + k.aterm_write(&mut aterm_k).expect("no write error to Vec"); + (BString::new(aterm_k), v) + }) + .collect(); + + write_input_derivations(writer, &input_derivations)?; write_char(writer, COMMA)?; write_input_sources(writer, &self.input_sources)?; @@ -98,7 +112,7 @@ impl Derivation { /// Like `to_aterm_bytes` but allow input_derivation replacements for hashing. fn to_aterm_bytes_with_replacements( &self, - input_derivations: &BTreeMap>, + input_derivations: &BTreeMap>, ) -> Vec { let mut buffer: Vec = Vec::new(); @@ -202,10 +216,9 @@ impl Derivation { let drv_path = StorePathRef::from_absolute_path(drv_path_str.as_bytes()) .expect("invalid input derivation path"); - let encoded_hash = data_encoding::HEXLOWER - .encode(fn_get_derivation_or_fod_hash(&drv_path).digest_as_bytes()); + let hash = fn_get_derivation_or_fod_hash(&drv_path); - (encoded_hash, output_names.to_owned()) + (hash, output_names.to_owned()) }, )); diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs index e6df40424ea7..f3b16d9cf9a4 100644 --- a/tvix/nix-compat/src/derivation/write.rs +++ b/tvix/nix-compat/src/derivation/write.rs @@ -5,7 +5,10 @@ use crate::aterm::escape_bytes; use crate::derivation::{ca_kind_prefix, output::Output}; +use crate::nixbase32; +use crate::store_path::{StorePath, StorePathRef, STORE_DIR_WITH_SLASH}; use bstr::BString; +use std::fmt::Display; use std::{ collections::{BTreeMap, BTreeSet}, io, @@ -13,6 +16,8 @@ use std::{ io::Write, }; +use super::NixHash; + pub const DERIVATION_PREFIX: &str = "Derive"; pub const PAREN_OPEN: char = '('; pub const PAREN_CLOSE: char = ')'; @@ -21,6 +26,49 @@ pub const BRACKET_CLOSE: char = ']'; pub const COMMA: char = ','; pub const QUOTE: char = '"'; +/// Something that can be written as ATerm. +/// +/// Note that we mostly use explicit `write_*` calls +/// instead since the serialization of the items depends on +/// the context a lot. +pub(crate) trait AtermWriteable: Display { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()>; +} + +impl AtermWriteable for StorePathRef<'_> { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { + write_char(writer, QUOTE)?; + writer.write_all(STORE_DIR_WITH_SLASH.as_bytes())?; + writer.write_all(nixbase32::encode(self.digest()).as_bytes())?; + write_char(writer, '-')?; + writer.write_all(self.name().as_bytes())?; + write_char(writer, QUOTE)?; + Ok(()) + } +} + +impl AtermWriteable for StorePath { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { + let r: StorePathRef = self.into(); + r.aterm_write(writer) + } +} + +impl AtermWriteable for String { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { + write_field(writer, self, true) + } +} + +impl AtermWriteable for NixHash { + fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> { + // When we serialize the placeholder hashes, + // they need to be SHA256. + debug_assert!(matches!(self, NixHash::Sha256(_))); + write_field(writer, self.to_plain_hex_string(), false) + } +} + // Writes a character to the writer. pub(crate) fn write_char(writer: &mut impl Write, c: char) -> io::Result<()> { let mut buf = [0; 4]; @@ -101,17 +149,17 @@ pub(crate) fn write_outputs( pub(crate) fn write_input_derivations( writer: &mut impl Write, - input_derivations: &BTreeMap>, + input_derivations: &BTreeMap>, ) -> Result<(), io::Error> { write_char(writer, BRACKET_OPEN)?; - for (ii, (input_derivation, output_names)) in input_derivations.iter().enumerate() { + for (ii, (input_derivation_aterm, output_names)) in input_derivations.iter().enumerate() { if ii > 0 { write_char(writer, COMMA)?; } write_char(writer, PAREN_OPEN)?; - write_field(writer, input_derivation.as_str(), false)?; + writer.write_all(input_derivation_aterm)?; write_char(writer, COMMA)?; write_char(writer, BRACKET_OPEN)?; diff --git a/tvix/nix-compat/src/nixhash/mod.rs b/tvix/nix-compat/src/nixhash/mod.rs index 727bf77dc57d..a6f2b967996a 100644 --- a/tvix/nix-compat/src/nixhash/mod.rs +++ b/tvix/nix-compat/src/nixhash/mod.rs @@ -1,5 +1,7 @@ use crate::nixbase32; use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER}; +use std::cmp::Ordering; +use std::fmt::Display; use thiserror; mod algos; @@ -17,6 +19,34 @@ pub enum NixHash { Sha512(Box<[u8; 64]>), } +/// Same order as sorting the corresponding nixbase32 strings. +/// +/// This order is used in the ATerm serialization of a derivation +/// and thus affects the calculated output hash. +impl Ord for NixHash { + fn cmp(&self, other: &NixHash) -> Ordering { + self.digest_as_bytes().cmp(other.digest_as_bytes()) + } +} + +// See Ord for reason to implement this manually. +impl PartialOrd for NixHash { + fn partial_cmp(&self, other: &NixHash) -> Option { + Some(self.cmp(other)) + } +} + +impl Display for NixHash { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::result::Result<(), std::fmt::Error> { + write!( + f, + "{}-{}", + self.algo(), + nixbase32::encode(self.digest_as_bytes()) + ) + } +} + /// convenience Result type for all nixhash parsing Results. pub type Result = std::result::Result; -- cgit 1.4.1