From b55d1f97ce8762711ff44f9b5695452cc8083c44 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Tue, 14 Mar 2023 17:16:05 +0100 Subject: refactor(tvix/nix-compat): -derivation::Hash, +NixHash This stops using our own custom Hash structure, which was mostly only used because we had to parse the JSON representation somehow. Since cl/8217, there's a `NixHash` struct, which is better suited to hold this data. Converting the format requires a bit of serde labor though, but that only really matters when interacting with JSON representations (which we mostly don't). Change-Id: Idc5ee511e36e6726c71f66face8300a441b0bf4c Reviewed-on: https://cl.tvl.fyi/c/depot/+/8304 Autosubmit: flokli Tested-by: BuildkiteCI Reviewed-by: tazjin --- tvix/Cargo.nix | 4 + tvix/cli/src/derivation.rs | 21 +++--- tvix/nix-compat/Cargo.toml | 1 + tvix/nix-compat/src/derivation/mod.rs | 67 +++++++++-------- tvix/nix-compat/src/derivation/output.rs | 38 +++------- tvix/nix-compat/src/derivation/tests/mod.rs | 19 +++-- tvix/nix-compat/src/derivation/write.rs | 26 ++++--- tvix/nix-compat/src/nixhash.rs | 112 +++++++++++++++++++++++++++- 8 files changed, 201 insertions(+), 87 deletions(-) (limited to 'tvix') diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 7dcd3cef5347..25869404fe94 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -3610,6 +3610,10 @@ rec { packageId = "serde"; features = [ "derive" ]; } + { + name = "serde_json"; + packageId = "serde_json"; + } { name = "sha2"; packageId = "sha2 0.10.6"; diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs index 61c4489954f1..1508e3e6d935 100644 --- a/tvix/cli/src/derivation.rs +++ b/tvix/cli/src/derivation.rs @@ -1,5 +1,5 @@ //! Implements `builtins.derivation`, the core of what makes Nix build packages. -use nix_compat::derivation::{Derivation, Hash}; +use nix_compat::derivation::Derivation; use nix_compat::{hash_placeholder, nixhash}; use std::cell::RefCell; use std::collections::{btree_map, BTreeSet}; @@ -126,19 +126,18 @@ fn populate_output_configuration( let output_hash = nixhash::from_str(&hash, a).map_err(Error::InvalidOutputHash)?; - // construct the algo string. Depending on hashMode, we prepend a `r:`. - let algo = match hash_mode.as_deref() { - None | Some("flat") => format!("{}", &output_hash.algo), - Some("recursive") => format!("r:{}", &output_hash.algo), + // construct the NixHashWithMode. + out.hash_with_mode = match hash_mode.as_deref() { + None | Some("flat") => Some(nixhash::NixHashWithMode::Flat( + nixhash::NixHash::new(output_hash.algo, output_hash.digest), + )), + Some("recursive") => Some(nixhash::NixHashWithMode::Recursive( + nixhash::NixHash::new(output_hash.algo, output_hash.digest), + )), Some(other) => { return Err(Error::InvalidOutputHashMode(other.to_string()).into()) } - }; - - out.hash = Some(Hash { - algo, - digest: data_encoding::HEXLOWER.encode(&output_hash.digest), - }); + } } } } diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml index 7758ea995cc8..49ddbf4728e9 100644 --- a/tvix/nix-compat/Cargo.toml +++ b/tvix/nix-compat/Cargo.toml @@ -10,6 +10,7 @@ anyhow = "1.0.68" data-encoding = "2.3.3" glob = "0.3.0" serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" sha2 = "0.10.6" thiserror = "1.0.38" diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index 2b549536b247..06b7ba02f4d8 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -1,5 +1,5 @@ use crate::{ - nixhash::NixHash, + nixhash::HashAlgo, store_path::{self, StorePath}, }; use serde::{Deserialize, Serialize}; @@ -17,8 +17,9 @@ mod write; mod tests; // Public API of the crate. +pub use crate::nixhash::{NixHash, NixHashWithMode}; pub use errors::{DerivationError, OutputError}; -pub use output::{Hash, Output}; +pub use output::Output; pub use utils::path_with_references; #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] @@ -75,23 +76,22 @@ impl Derivation { buffer } - /// Returns the fixed output path and its hash - // (if the Derivation is fixed output), - /// or None if there is no fixed output. + /// Returns the fixed output path and its [NixHashWithMode] + /// (if the Derivation is fixed output), or None if there is no fixed output. /// This takes some shortcuts in case more than one output exists, as this /// can't be a valid fixed-output Derivation. - pub fn get_fixed_output(&self) -> Option<(&String, &Hash)> { + pub fn get_fixed_output(&self) -> Option<(&String, &NixHashWithMode)> { if self.outputs.len() != 1 { return None; } if let Some(out_output) = self.outputs.get("out") { - if let Some(out_output_hash) = &out_output.hash { + if let Some(out_output_hash) = &out_output.hash_with_mode { return Some((&out_output.path, out_output_hash)); } // There has to be a hash, otherwise it would not be FOD } - return None; + None } /// Returns the drv path of a Derivation struct. @@ -160,8 +160,6 @@ impl Derivation { /// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of /// the A-Term. /// - /// TODO: what's the representation of ${digest}? - /// /// If the derivation is not a fixed derivation, it's up to the caller of /// this function to provide a lookup function to lookup these calculation /// results of parent derivations at `fn_get_hash_derivation_modulo` (by @@ -175,8 +173,9 @@ impl Derivation { // Fixed-output derivations return a fixed hash Some((fixed_output_path, fixed_output_hash)) => { hasher.update(format!( - "fixed:out:{}:{}:{}", - &fixed_output_hash.algo, &fixed_output_hash.digest, fixed_output_path, + "fixed:out:{}:{}", + fixed_output_hash.to_nix_hash_string(), + fixed_output_path )); hasher.finalize() } @@ -196,7 +195,7 @@ impl Derivation { for (drv_path, output_names) in &self.input_derivations { replaced_input_derivations.insert( data_encoding::HEXLOWER - .encode(&fn_get_derivation_or_fod_hash(&drv_path).digest), + .encode(&fn_get_derivation_or_fod_hash(drv_path).digest), output_names.clone(), ); } @@ -255,7 +254,7 @@ impl Derivation { output_path_name.push_str(output_name); } - let s = &format!( + let fp = &format!( "output:{}:{}:{}:{}", output_name, derivation_or_fod_hash.to_nix_hash_string(), @@ -264,7 +263,7 @@ impl Derivation { ); let abs_store_path = - utils::build_store_path(false, s, &output_path_name)?.to_absolute_path(); + utils::build_store_path(false, fp, &output_path_name)?.to_absolute_path(); output.path = abs_store_path.clone(); self.environment @@ -277,33 +276,41 @@ impl Derivation { // footgun prevention mechanism. assert!(fixed_output_path.is_empty()); - let s = { - let mut s = String::new(); + let fp = { + let mut fp = String::new(); // Fixed-output derivation. - // There's two different hashing strategies in place, depending on the value of hash.algo. + // There's two different hashing strategies in place, + // depending on whether the hash is recursive AND sha256 or anything else. // This code is _weird_ but it is what Nix is doing. See: // https://github.com/NixOS/nix/blob/1385b2007804c8a0370f2a6555045a00e34b07c7/src/libstore/store-api.cc#L178-L196 - if fixed_output_hash.algo == "r:sha256" { - s.push_str(&format!( - "source:sha256:{}", - fixed_output_hash.digest, // lowerhex - )); + if let NixHashWithMode::Recursive(recursive_hash) = fixed_output_hash { + if recursive_hash.algo == HashAlgo::Sha256 { + fp.push_str(&format!("source:{}", recursive_hash.to_nix_hash_string())); + } else { + // This is similar to the FOD case, with an empty fixed_output_path. + fp.push_str(&format!( + "output:out:{}", + derivation_or_fod_hash.to_nix_hash_string(), + )); + } } else { - s.push_str("output:out:"); - // This is drv_replacement for FOD, with an empty fixed_output_path. - s.push_str(&derivation_or_fod_hash.to_nix_hash_string()); + // This is similar to the FOD case, with an empty fixed_output_path. + fp.push_str(&format!( + "output:out:{}", + derivation_or_fod_hash.to_nix_hash_string(), + )); } - s.push_str(&format!(":{}:{}", store_path::STORE_DIR, name)); - s + fp.push_str(&format!(":{}:{}", store_path::STORE_DIR, name)); + fp }; - let abs_store_path = utils::build_store_path(false, &s, name)?.to_absolute_path(); + let abs_store_path = utils::build_store_path(false, &fp, name)?.to_absolute_path(); self.outputs.insert( "out".to_string(), Output { path: abs_store_path.clone(), - hash: Some(fixed_output_hash.clone()), + hash_with_mode: Some(fixed_output_hash.clone()), }, ); self.environment.insert("out".to_string(), abs_store_path); diff --git a/tvix/nix-compat/src/derivation/output.rs b/tvix/nix-compat/src/derivation/output.rs index 39c0dbde5a23..4bfc7bf8014d 100644 --- a/tvix/nix-compat/src/derivation/output.rs +++ b/tvix/nix-compat/src/derivation/output.rs @@ -1,5 +1,6 @@ use crate::derivation::OutputError; -use crate::{nixbase32, store_path::StorePath}; +use crate::nixhash::{HashAlgo, NixHashWithMode}; +use crate::store_path::StorePath; use serde::{Deserialize, Serialize}; #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] @@ -7,42 +8,23 @@ pub struct Output { pub path: String, #[serde(flatten)] - pub hash: Option, -} - -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] -pub struct Hash { - #[serde(rename = "hash")] - pub digest: String, - #[serde(rename = "hashAlgo")] - pub algo: String, + pub hash_with_mode: Option, } impl Output { pub fn is_fixed(&self) -> bool { - self.hash.is_some() + self.hash_with_mode.is_some() } pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> { - if let Some(hash) = &self.hash { - // try to decode digest - let result = nixbase32::decode(hash.digest.as_bytes()); - match result { - Err(e) => return Err(OutputError::InvalidHashEncoding(hash.digest.clone(), e)), - Ok(digest) => { - if hash.algo != "sha1" && hash.algo != "sha256" { - return Err(OutputError::InvalidHashAlgo(hash.algo.to_string())); - } - if (hash.algo == "sha1" && digest.len() != 20) - || (hash.algo == "sha256" && digest.len() != 32) - { - return Err(OutputError::InvalidDigestSizeForAlgo( - digest.len(), - hash.algo.to_string(), - )); + if let Some(hash) = &self.hash_with_mode { + match hash { + NixHashWithMode::Flat(h) | NixHashWithMode::Recursive(h) => { + if h.algo != HashAlgo::Sha1 || h.algo != HashAlgo::Sha256 { + return Err(OutputError::InvalidHashAlgo(h.algo.to_string())); } } - }; + } } if validate_output_paths { if let Err(e) = StorePath::from_absolute_path(&self.path) { diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index 7f94f5e690ce..d7b63a45ad9c 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -1,5 +1,6 @@ -use crate::derivation::output::{Hash, Output}; +use crate::derivation::output::Output; use crate::derivation::Derivation; +use crate::nixhash::NixHash; use crate::store_path::StorePath; use std::collections::BTreeSet; use std::fs::File; @@ -218,11 +219,15 @@ fn output_path_construction() { "out".to_string(), Output { path: "".to_string(), // will be calculated - hash: Some(Hash { - digest: "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" - .to_string(), - algo: "r:sha256".to_string(), - }), + hash_with_mode: Some(crate::nixhash::NixHashWithMode::Recursive(NixHash { + digest: data_encoding::HEXLOWER + .decode( + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" + .as_bytes(), + ) + .unwrap(), + algo: crate::nixhash::HashAlgo::Sha256, + })), }, ); @@ -271,7 +276,7 @@ fn output_path_construction() { "out".to_string(), Output { path: "".to_string(), // will be calculated - hash: None, + hash_with_mode: None, }, ); diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs index 9423ef2e6af4..52166294e078 100644 --- a/tvix/nix-compat/src/derivation/write.rs +++ b/tvix/nix-compat/src/derivation/write.rs @@ -58,16 +58,22 @@ pub fn write_outputs( let mut elements: Vec<&str> = vec![output_name, &output.path]; - match &output.hash { - Some(hash) => { - elements.push(&hash.algo); - elements.push(&hash.digest); - } - None => { - elements.push(""); - elements.push(""); - } - } + let (e2, e3) = match &output.hash_with_mode { + Some(hash) => match hash { + crate::nixhash::NixHashWithMode::Flat(h) => ( + h.algo.to_string(), + data_encoding::HEXLOWER.encode(&h.digest), + ), + crate::nixhash::NixHashWithMode::Recursive(h) => ( + format!("r:{}", h.algo), + data_encoding::HEXLOWER.encode(&h.digest), + ), + }, + None => ("".to_string(), "".to_string()), + }; + + elements.push(&e2); + elements.push(&e3); write_array_elements( writer, diff --git a/tvix/nix-compat/src/nixhash.rs b/tvix/nix-compat/src/nixhash.rs index 4cb076ed16b0..0da90fe4ccf5 100644 --- a/tvix/nix-compat/src/nixhash.rs +++ b/tvix/nix-compat/src/nixhash.rs @@ -1,14 +1,124 @@ use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER}; +use serde::ser::SerializeMap; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt::Display; use thiserror::Error; use crate::nixbase32; +/// A Nix Hash can either be flat or recursive. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum NixHashWithMode { + Flat(NixHash), + Recursive(NixHash), +} + +impl NixHashWithMode { + /// Formats a [NixHashWithMode] in the Nix default hash format, + /// which is the algo, followed by a colon, then the lower hex encoded digest. + /// In case the hash itself is recursive, a `r:` is added as prefix + pub fn to_nix_hash_string(&self) -> String { + match self { + NixHashWithMode::Flat(h) => h.to_nix_hash_string(), + NixHashWithMode::Recursive(h) => format!("r:{}", h.to_nix_hash_string()), + } + } +} + +impl Serialize for NixHashWithMode { + /// map a NixHashWithMode into the serde data model. + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut map = serializer.serialize_map(Some(2))?; + match self { + NixHashWithMode::Flat(h) => { + map.serialize_entry("hash", &nixbase32::encode(&h.digest))?; + map.serialize_entry("hashAlgo", &h.algo.to_string())?; + } + NixHashWithMode::Recursive(h) => { + map.serialize_entry("hash", &nixbase32::encode(&h.digest))?; + map.serialize_entry("hashAlgo", &format!("r:{}", &h.algo.to_string()))?; + } + }; + map.end() + } +} + +impl<'de> Deserialize<'de> for NixHashWithMode { + /// map the serde data model into a NixHashWithMode. + /// + /// The serde data model has a `hash` field (containing a digest in nixbase32), + /// and a `hashAlgo` field, containing the stringified hash algo. + /// In case the hash is recursive, hashAlgo also has a `r:` prefix. + /// + /// This is to match how `nix show-derivation` command shows them in JSON + /// representation. + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + // TODO: don't use serde_json here? + // TODO: serde seems to simply set `hash_with_mode` to None if hash + // and hashAlgo fail, but that should be a proper deserialization error + // that should be propagated to the user! + + let json = serde_json::Value::deserialize(deserializer)?; + match json.as_object() { + None => Err(serde::de::Error::custom("couldn't parse as map"))?, + Some(map) => { + let digest: Vec = { + if let Some(v) = map.get("hash") { + if let Some(s) = v.as_str() { + data_encoding::HEXLOWER + .decode(s.as_bytes()) + .map_err(|e| serde::de::Error::custom(e.to_string()))? + } else { + return Err(serde::de::Error::custom( + "couldn't parse 'hash' as string", + )); + } + } else { + return Err(serde::de::Error::custom("couldn't extract 'hash' key")); + } + }; + + if let Some(v) = map.get("hashAlgo") { + if let Some(s) = v.as_str() { + match s.strip_prefix("r:") { + Some(rest) => Ok(NixHashWithMode::Recursive(NixHash::new( + HashAlgo::try_from(rest).map_err(|e| { + serde::de::Error::custom(format!("unable to parse algo: {}", e)) + })?, + digest, + ))), + None => Ok(NixHashWithMode::Flat(NixHash::new( + HashAlgo::try_from(s).map_err(|e| { + serde::de::Error::custom(format!("unable to parse algo: {}", e)) + })?, + digest, + ))), + } + } else { + Err(serde::de::Error::custom( + "couldn't parse 'hashAlgo' as string", + )) + } + } else { + Err(serde::de::Error::custom("couldn't extract 'hashAlgo' key")) + } + } + } + } +} + /// Nix allows specifying hashes in various encodings, and magically just /// derives the encoding. #[derive(Clone, Debug, Eq, PartialEq)] pub struct NixHash { pub digest: Vec, + pub algo: HashAlgo, } @@ -26,7 +136,7 @@ impl NixHash { } /// This are the hash algorithms supported by cppnix. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum HashAlgo { Md5, Sha1, -- cgit 1.4.1