From 34fc4637ebbb906d38647ca8a12fdb80cd2baf18 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Wed, 18 Oct 2023 11:39:36 +0100 Subject: refactor(tvix/nix-compat): rename NixHashWithMode -> CAHash This specific struct is only used to represent content-addressed paths (in case a Derivation has a fixed-output hash, for example). Rename `Output`'s `hash_with_mode` to `ca_hash`. We now also include `CAHash::Text`, and update the `validate` function of the `Output` struct to reject text hashes there. This allows cleaning up the various output path calculation functions inside nix-compat/src/store_path/utils.rs, as they can now match on the type. `make_type` is renamed to `make_references_string`, `build_regular_ca_path` is renamed to `build_ca_path`, and `build_text_path` has a disclaimer added, because you might not actually want to use it. Change-Id: I674d065f2ed5c804012ddfed56e161ac49d23931 Reviewed-on: https://cl.tvl.fyi/c/depot/+/9814 Tested-by: BuildkiteCI Reviewed-by: raitobezarius --- tvix/cli/src/derivation.rs | 10 +- tvix/nix-compat/src/derivation/errors.rs | 12 +- tvix/nix-compat/src/derivation/mod.rs | 36 +++-- tvix/nix-compat/src/derivation/output.rs | 20 +-- tvix/nix-compat/src/derivation/parser.rs | 74 ++++++--- tvix/nix-compat/src/derivation/tests/mod.rs | 4 +- tvix/nix-compat/src/derivation/write.rs | 14 +- tvix/nix-compat/src/nixhash/ca_hash.rs | 177 +++++++++++++++++++++ tvix/nix-compat/src/nixhash/mod.rs | 4 +- tvix/nix-compat/src/nixhash/with_mode.rs | 234 ---------------------------- tvix/nix-compat/src/store_path/utils.rs | 134 +++++++++------- 11 files changed, 366 insertions(+), 353 deletions(-) create mode 100644 tvix/nix-compat/src/nixhash/ca_hash.rs delete mode 100644 tvix/nix-compat/src/nixhash/with_mode.rs diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs index 76cc3f60f77e..15df6ed76102 100644 --- a/tvix/cli/src/derivation.rs +++ b/tvix/cli/src/derivation.rs @@ -115,7 +115,7 @@ fn populate_output_configuration( ) -> Result<(), ErrorKind> { // We only do something when `digest` and `algo` are `Some(_)``, and // there's an `out` output. - if let (Some(hash), Some(algo), hash_mode) = (hash, hash_algo, hash_mode) { + if let (Some(nixhash_str), Some(algo), hash_mode) = (hash, hash_algo, hash_mode) { match drv.outputs.get_mut("out") { None => return Err(Error::ConflictingOutputTypes.into()), Some(out) => { @@ -126,12 +126,12 @@ fn populate_output_configuration( Some(algo.as_ref()) }; - let output_hash = nixhash::from_str(&hash, a).map_err(Error::InvalidOutputHash)?; + let hash = nixhash::from_str(&nixhash_str, a).map_err(Error::InvalidOutputHash)?; // construct the NixHashWithMode. - out.hash_with_mode = match hash_mode.as_deref() { - None | Some("flat") => Some(nixhash::NixHashWithMode::Flat(output_hash)), - Some("recursive") => Some(nixhash::NixHashWithMode::Recursive(output_hash)), + out.ca_hash = match hash_mode.as_deref() { + None | Some("flat") => Some(nixhash::CAHash::Flat(hash)), + Some("recursive") => Some(nixhash::CAHash::Nar(hash)), Some(other) => { return Err(Error::InvalidOutputHashMode(other.to_string()).into()) } diff --git a/tvix/nix-compat/src/derivation/errors.rs b/tvix/nix-compat/src/derivation/errors.rs index 8e9e6a121096..305957b10f6d 100644 --- a/tvix/nix-compat/src/derivation/errors.rs +++ b/tvix/nix-compat/src/derivation/errors.rs @@ -1,6 +1,8 @@ -use crate::{nixbase32::Nixbase32DecodeError, store_path}; +use crate::store_path; use thiserror::Error; +use super::CAHash; + /// Errors that can occur during the validation of Derivation structs. #[derive(Debug, Error, PartialEq)] pub enum DerivationError { @@ -50,10 +52,6 @@ pub enum DerivationError { pub enum OutputError { #[error("Invalid output path {0}: {1}")] InvalidOutputPath(String, store_path::Error), - #[error("Invalid hash encoding: {0}")] - InvalidHashEncoding(String, Nixbase32DecodeError), - #[error("Invalid hash algo: {0}")] - InvalidHashAlgo(String), - #[error("Invalid Digest size {0} for algo {1}")] - InvalidDigestSizeForAlgo(usize, String), + #[error("Invalid CAHash: {:?}", .0)] + InvalidCAHash(CAHash), } diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index d7f894c89a8f..a765e343096e 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -1,6 +1,4 @@ -use crate::store_path::{ - self, build_output_path, build_regular_ca_path, build_text_path, StorePath, -}; +use crate::store_path::{self, build_ca_path, build_output_path, build_text_path, StorePath}; use bstr::BString; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; @@ -18,7 +16,7 @@ mod write; mod tests; // Public API of the crate. -pub use crate::nixhash::{NixHash, NixHashWithMode}; +pub use crate::nixhash::{CAHash, NixHash}; pub use errors::{DerivationError, OutputError}; pub use output::Output; @@ -122,16 +120,22 @@ impl Derivation { /// Returns the FOD digest, if the derivation is fixed-output, or None if /// it's not. + /// TODO: this is kinda the string from [build_ca_path] with a + /// [CAHash::Flat], what's fed to `build_store_path_from_fingerprint_parts` + /// (except the out_output.path being an empty string) fn fod_digest(&self) -> Option<[u8; 32]> { if self.outputs.len() != 1 { return None; } let out_output = self.outputs.get("out")?; + let ca_hash = &out_output.ca_hash.as_ref()?; + Some( Sha256::new_with_prefix(format!( - "fixed:out:{}:{}", - out_output.hash_with_mode.clone()?.to_nix_hash_string(), + "fixed:out:{}{}:{}", + ca_kind_prefix(ca_hash), + ca_hash.digest().to_nix_hash_string(), out_output.path )) .finalize() @@ -229,10 +233,10 @@ impl Derivation { // For fixed output derivation we use the per-output info, otherwise we use the // derivation hash. - let abs_store_path = if let Some(ref hwm) = output.hash_with_mode { - build_regular_ca_path(&path_name, hwm, Vec::::new(), false).map_err( - |e| DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e), - )? + let abs_store_path = if let Some(ref hwm) = output.ca_hash { + build_ca_path(&path_name, hwm, Vec::::new(), false).map_err(|e| { + DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e) + })? } else { build_output_path(derivation_or_fod_hash, output_name, &path_name).map_err(|e| { DerivationError::InvalidOutputDerivationPath( @@ -265,3 +269,15 @@ fn output_path_name(derivation_name: &str, output_name: &str) -> String { } output_path_name } + +/// For a [CAHash], return the "prefix" used for NAR purposes. +/// For [CAHash::Flat], this is an empty string, for [CAHash::Nar], it's "r:". +/// Panics for other [CAHash] kinds, as they're not valid in a derivation +/// context. +fn ca_kind_prefix(ca_hash: &CAHash) -> &'static str { + match ca_hash { + CAHash::Flat(_) => "", + CAHash::Nar(_) => "r:", + _ => panic!("invalid ca hash in derivation context: {:?}", ca_hash), + } +} diff --git a/tvix/nix-compat/src/derivation/output.rs b/tvix/nix-compat/src/derivation/output.rs index 78a83b03be45..c13f94859dc1 100644 --- a/tvix/nix-compat/src/derivation/output.rs +++ b/tvix/nix-compat/src/derivation/output.rs @@ -1,5 +1,5 @@ use crate::derivation::OutputError; -use crate::nixhash::{HashAlgo, NixHashWithMode}; +use crate::nixhash::CAHash; use crate::store_path::StorePath; use serde::{Deserialize, Serialize}; use serde_json::Map; @@ -9,7 +9,7 @@ pub struct Output { pub path: String, #[serde(flatten)] - pub hash_with_mode: Option, + pub ca_hash: Option, // we can only represent a subset here. } impl<'de> Deserialize<'de> for Output { @@ -30,26 +30,26 @@ impl<'de> Deserialize<'de> for Output { &"a string", ))? .to_owned(), - hash_with_mode: NixHashWithMode::from_map::(&fields)?, + ca_hash: CAHash::from_map::(&fields)?, }) } } impl Output { pub fn is_fixed(&self) -> bool { - self.hash_with_mode.is_some() + self.ca_hash.is_some() } pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> { - if let Some(hash) = &self.hash_with_mode { - match hash { - NixHashWithMode::Flat(h) | NixHashWithMode::Recursive(h) => { - if h.algo() != HashAlgo::Sha1 || h.algo() != HashAlgo::Sha256 { - return Err(OutputError::InvalidHashAlgo(h.algo().to_string())); - } + if let Some(fixed_output_hash) = &self.ca_hash { + match fixed_output_hash { + CAHash::Flat(_) | CAHash::Nar(_) => { + // all hashes allowed for Flat, and Nar. } + _ => return Err(OutputError::InvalidCAHash(fixed_output_hash.clone())), } } + if validate_output_paths { if let Err(e) = StorePath::from_absolute_path(self.path.as_bytes()) { return Err(OutputError::InvalidOutputPath(self.path.to_string(), e)); diff --git a/tvix/nix-compat/src/derivation/parser.rs b/tvix/nix-compat/src/derivation/parser.rs index 48f2b92d9168..b04187c433dc 100644 --- a/tvix/nix-compat/src/derivation/parser.rs +++ b/tvix/nix-compat/src/derivation/parser.rs @@ -12,8 +12,8 @@ use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; use std::collections::{BTreeMap, BTreeSet}; use thiserror; -use super::parse_error::{into_nomerror, ErrorKind, NomError, NomResult}; -use super::{write, Derivation, NixHashWithMode, Output}; +use crate::derivation::parse_error::{into_nomerror, ErrorKind, NomError, NomResult}; +use crate::derivation::{write, CAHash, Derivation, Output}; use crate::{aterm, nixhash}; #[derive(Debug, thiserror::Error)] @@ -42,6 +42,24 @@ pub(crate) fn parse(i: &[u8]) -> Result> { } } +/// Consume a string containing the algo, and optionally a `r:` +/// prefix, and a digest (bytes), return a [CAHash::Nar] or [CAHash::Flat]. +fn from_algo_and_mode_and_digest>( + algo_and_mode: &str, + digest: B, +) -> crate::nixhash::Result { + Ok(match algo_and_mode.strip_prefix("r:") { + Some(algo) => nixhash::CAHash::Nar(nixhash::from_algo_and_digest( + algo.try_into()?, + digest.as_ref(), + )?), + None => nixhash::CAHash::Flat(nixhash::from_algo_and_digest( + algo_and_mode.try_into()?, + digest.as_ref(), + )?), + }) +} + /// Parse one output in ATerm. This is 4 string fields inside parans: /// output name, output path, algo (and mode), digest. /// Returns the output name and [Output] struct. @@ -60,27 +78,26 @@ fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> { }, |(output_name, output_path, algo_and_mode, encoded_digest)| { // convert these 4 fields into an [Output]. - let hash_with_mode_res = { + let ca_hash_res = { if algo_and_mode.is_empty() && encoded_digest.is_empty() { None } else { match data_encoding::HEXLOWER.decode(&encoded_digest) { - Ok(digest) => Some(NixHashWithMode::from_algo_mode_hash( - &algo_and_mode, - &digest, - )), + Ok(digest) => { + Some(from_algo_and_mode_and_digest(&algo_and_mode, digest)) + } Err(e) => Some(Err(nixhash::Error::InvalidBase64Encoding(e))), } } } .transpose(); - match hash_with_mode_res { + match ca_hash_res { Ok(hash_with_mode) => Ok(( output_name, Output { path: output_path, - hash_with_mode, + ca_hash: hash_with_mode, }, )), Err(e) => Err(nom::Err::Failure(NomError { @@ -279,12 +296,20 @@ where mod tests { use std::collections::{BTreeMap, BTreeSet}; - use crate::derivation::{parse_error::ErrorKind, Output}; + use crate::derivation::{ + parse_error::ErrorKind, parser::from_algo_and_mode_and_digest, CAHash, NixHash, Output, + }; use bstr::{BString, ByteSlice}; use lazy_static::lazy_static; use test_case::test_case; + const DIGEST_SHA256: [u8; 32] = [ + 0xa5, 0xce, 0x9c, 0x15, 0x5e, 0xd0, 0x93, 0x97, 0x61, 0x46, 0x46, 0xc9, 0x71, 0x7f, 0xc7, + 0xcd, 0x94, 0xb1, 0x02, 0x3d, 0x7b, 0x76, 0xb6, 0x18, 0xd4, 0x09, 0xe4, 0xfe, 0xfd, 0x6e, + 0x9d, 0x39, + ]; lazy_static! { + pub static ref NIXHASH_SHA256: NixHash = NixHash::Sha256(DIGEST_SHA256); static ref EXP_MULTI_OUTPUTS: BTreeMap = { let mut b = BTreeMap::new(); b.insert( @@ -292,14 +317,14 @@ mod tests { Output { path: "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib" .to_string(), - hash_with_mode: None, + ca_hash: None, }, ); b.insert( "out".to_string(), Output { path: "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".to_string(), - hash_with_mode: None, + ca_hash: None, }, ); b @@ -441,19 +466,15 @@ mod tests { br#"("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")"#, ("out".to_string(), Output { path: "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo".to_string(), - hash_with_mode: None + ca_hash: None }); "simple" )] #[test_case( br#"("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")"#, ("out".to_string(), Output { path: "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar".to_string(), - hash_with_mode: Some(crate::derivation::NixHashWithMode::Recursive( - crate::nixhash::from_algo_and_digest ( - crate::nixhash::HashAlgo::Sha256, - &data_encoding::HEXLOWER.decode(b"08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba").unwrap() - ).unwrap() - )), + ca_hash: Some(from_algo_and_mode_and_digest("r:sha256", + &data_encoding::HEXLOWER.decode(b"08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba").unwrap() ).unwrap()), }); "fod" )] fn parse_output(input: &[u8], expected: (String, Output)) { @@ -472,4 +493,19 @@ mod tests { assert!(rest.is_empty()); assert_eq!(*expected, parsed); } + + #[test_case("sha256", &DIGEST_SHA256, CAHash::Flat(NIXHASH_SHA256.clone()); "sha256 flat")] + #[test_case("r:sha256", &DIGEST_SHA256, CAHash::Nar(NIXHASH_SHA256.clone()); "sha256 recursive")] + fn test_from_algo_and_mode_and_digest(algo_and_mode: &str, digest: &[u8], expected: CAHash) { + assert_eq!( + expected, + from_algo_and_mode_and_digest(algo_and_mode, digest).unwrap() + ); + } + + #[test] + fn from_algo_and_mode_and_digest_failure() { + assert!(from_algo_and_mode_and_digest("r:sha256", &[]).is_err()); + assert!(from_algo_and_mode_and_digest("ha256", &DIGEST_SHA256).is_err()); + } } diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index 36b44e047f9a..b969625d9760 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -315,7 +315,7 @@ fn output_path_construction() { "out".to_string(), Output { path: "".to_string(), // will be calculated - hash_with_mode: Some(crate::nixhash::NixHashWithMode::Recursive( + ca_hash: Some(crate::nixhash::CAHash::Nar( crate::nixhash::from_algo_and_digest( crate::nixhash::HashAlgo::Sha256, &data_encoding::HEXLOWER @@ -376,7 +376,7 @@ fn output_path_construction() { "out".to_string(), Output { path: "".to_string(), // will be calculated - hash_with_mode: None, + ca_hash: None, }, ); diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs index 7ebbbffa4b55..087227c99998 100644 --- a/tvix/nix-compat/src/derivation/write.rs +++ b/tvix/nix-compat/src/derivation/write.rs @@ -4,7 +4,7 @@ //! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html use crate::aterm::escape_bytes; -use crate::derivation::output::Output; +use crate::derivation::{ca_kind_prefix, output::Output}; use bstr::BString; use std::{ collections::{BTreeMap, BTreeSet}, @@ -79,14 +79,10 @@ pub fn write_outputs( let mut elements: Vec<&str> = vec![output_name, &output.path]; - let (mode_and_algo, digest) = match &output.hash_with_mode { - Some(crate::nixhash::NixHashWithMode::Flat(h)) => ( - h.algo().to_string(), - data_encoding::HEXLOWER.encode(h.digest_as_bytes()), - ), - Some(crate::nixhash::NixHashWithMode::Recursive(h)) => ( - format!("r:{}", h.algo()), - data_encoding::HEXLOWER.encode(h.digest_as_bytes()), + let (mode_and_algo, digest) = match &output.ca_hash { + Some(ca_hash) => ( + format!("{}{}", ca_kind_prefix(ca_hash), ca_hash.digest().algo()), + data_encoding::HEXLOWER.encode(ca_hash.digest().digest_as_bytes()), ), None => ("".to_string(), "".to_string()), }; diff --git a/tvix/nix-compat/src/nixhash/ca_hash.rs b/tvix/nix-compat/src/nixhash/ca_hash.rs new file mode 100644 index 000000000000..5d9ae3f3a861 --- /dev/null +++ b/tvix/nix-compat/src/nixhash/ca_hash.rs @@ -0,0 +1,177 @@ +use crate::nixbase32; +use crate::nixhash::{self, HashAlgo, NixHash}; +use serde::de::Unexpected; +use serde::ser::SerializeMap; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde_json::{Map, Value}; +use std::borrow::Cow; + +use super::algos::SUPPORTED_ALGOS; +use super::from_algo_and_digest; + +/// A Nix CAHash describes a content-addressed hash of a path. +/// Semantically, it can be split into the following components: +/// +/// - "content address prefix". Currently, "fixed" and "text" are supported. +/// - "hash mode". Currently, "flat" and "recursive" are supported. +/// - "hash type". The underlying hash function used. +/// Currently, sha1, md5, sha256, sha512. +/// - "digest". The digest itself. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum CAHash { + Flat(NixHash), // "fixed flat" + Nar(NixHash), // "fixed recursive" + Text(Box<[u8; 32]>), // "text", only supports sha256 +} + +impl CAHash { + pub fn digest(&self) -> Cow { + match self { + CAHash::Nar(ref digest) => Cow::Borrowed(digest), + CAHash::Text(ref digest) => Cow::Owned(NixHash::Sha256(*digest.clone())), + CAHash::Flat(ref digest) => Cow::Borrowed(digest), + } + } + + /// This takes a serde_json::Map and turns it into this structure. This is necessary to do such + /// shenigans because we have external consumers, like the Derivation parser, who would like to + /// know whether we have a invalid or a missing NixHashWithMode structure in another structure, + /// e.g. Output. + /// This means we have this combinatorial situation: + /// - no hash, no hashAlgo: no [CAHash] so we return Ok(None). + /// - present hash, missing hashAlgo: invalid, we will return missing_field + /// - missing hash, present hashAlgo: same + /// - present hash, present hashAlgo: either we return ourselves or a type/value validation + /// error. + /// This function is for internal consumption regarding those needs until we have a better + /// solution. Now this is said, let's explain how this works. + /// + /// We want to map the serde data model into a [CAHash]. + /// + /// The serde data model has a `hash` field (containing a digest in nixbase32), + /// and a `hashAlgo` field, containing the stringified hash algo. + /// In case the hash is recursive, hashAlgo also has a `r:` prefix. + /// + /// This is to match how `nix show-derivation` command shows them in JSON + /// representation. + pub(crate) fn from_map<'de, D>(map: &Map) -> Result, D::Error> + where + D: Deserializer<'de>, + { + // If we don't have hash neither hashAlgo, let's just return None. + if !map.contains_key("hash") && !map.contains_key("hashAlgo") { + return Ok(None); + } + + let digest: Vec = { + if let Some(v) = map.get("hash") { + if let Some(s) = v.as_str() { + data_encoding::HEXLOWER + .decode(s.as_bytes()) + .map_err(|e| serde::de::Error::custom(e.to_string()))? + } else { + return Err(serde::de::Error::invalid_type( + Unexpected::Other(&v.to_string()), + &"a string", + )); + } + } else { + return Err(serde::de::Error::missing_field( + "couldn't extract `hash` key but `hashAlgo` key present", + )); + } + }; + + if let Some(v) = map.get("hashAlgo") { + if let Some(s) = v.as_str() { + match s.strip_prefix("r:") { + Some(rest) => Ok(Some(Self::Nar( + from_algo_and_digest( + HashAlgo::try_from(rest).map_err(|e| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(), + ) + })?, + &digest, + ) + .map_err(|e: nixhash::Error| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &"a digest with right length", + ) + })?, + ))), + None => Ok(Some(Self::Flat( + from_algo_and_digest( + HashAlgo::try_from(s).map_err(|e| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(), + ) + })?, + &digest, + ) + .map_err(|e: nixhash::Error| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &"a digest with right length", + ) + })?, + ))), + } + } else { + Err(serde::de::Error::invalid_type( + Unexpected::Other(&v.to_string()), + &"a string", + )) + } + } else { + Err(serde::de::Error::missing_field( + "couldn't extract `hashAlgo` key, but `hash` key present", + )) + } + } +} + +impl Serialize for CAHash { + /// map a CAHash into the serde data model. + fn serialize(&self, serializer: S) -> Result + where + S: Serializer, + { + let mut map = serializer.serialize_map(Some(2))?; + match self { + CAHash::Flat(h) => { + map.serialize_entry("hash", &nixbase32::encode(h.digest_as_bytes()))?; + map.serialize_entry("hashAlgo", &h.algo())?; + } + CAHash::Nar(h) => { + map.serialize_entry("hash", &nixbase32::encode(h.digest_as_bytes()))?; + map.serialize_entry("hashAlgo", &format!("r:{}", &h.algo()))?; + } + // It is not legal for derivations to use this (which is where + // we're currently exercising [Serialize] mostly, + // but it's still good to be able to serialize other CA hashes too. + CAHash::Text(h) => { + map.serialize_entry("hash", &nixbase32::encode(h.as_ref()))?; + map.serialize_entry("hashAlgo", "text")?; + } + }; + map.end() + } +} + +impl<'de> Deserialize<'de> for CAHash { + fn deserialize(deserializer: D) -> Result + where + D: Deserializer<'de>, + { + let value = Self::from_map::(&Map::deserialize(deserializer)?)?; + + match value { + None => Err(serde::de::Error::custom("couldn't parse as map")), + Some(v) => Ok(v), + } + } +} diff --git a/tvix/nix-compat/src/nixhash/mod.rs b/tvix/nix-compat/src/nixhash/mod.rs index 5b12f466e3fb..1983d16feee9 100644 --- a/tvix/nix-compat/src/nixhash/mod.rs +++ b/tvix/nix-compat/src/nixhash/mod.rs @@ -3,10 +3,10 @@ use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER}; use thiserror; mod algos; -mod with_mode; +mod ca_hash; pub use algos::HashAlgo; -pub use with_mode::NixHashWithMode; +pub use ca_hash::CAHash; /// NixHash represents hashes known by Nix. #[derive(Clone, Debug, Eq, PartialEq)] diff --git a/tvix/nix-compat/src/nixhash/with_mode.rs b/tvix/nix-compat/src/nixhash/with_mode.rs deleted file mode 100644 index 6e6d2e76b4a4..000000000000 --- a/tvix/nix-compat/src/nixhash/with_mode.rs +++ /dev/null @@ -1,234 +0,0 @@ -use crate::nixbase32; -use crate::nixhash::{self, HashAlgo, NixHash}; -use serde::de::Unexpected; -use serde::ser::SerializeMap; -use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use serde_json::{Map, Value}; - -use super::algos::SUPPORTED_ALGOS; -use super::from_algo_and_digest; - -pub enum NixHashMode { - Flat, - Recursive, -} - -impl NixHashMode { - pub fn prefix(self) -> &'static str { - match self { - Self::Flat => "", - Self::Recursive => "r:", - } - } -} - -/// A Nix Hash can either be flat or recursive. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum NixHashWithMode { - Flat(NixHash), - Recursive(NixHash), -} - -impl NixHashWithMode { - /// Construct a [NixHashWithMode] from a string containing the algo, and - /// optionally a `r:` prefix, and a digest (bytes). - pub fn from_algo_mode_hash(algo_and_mode: &str, digest: &[u8]) -> super::Result { - Ok(match algo_and_mode.strip_prefix("r:") { - Some(algo) => nixhash::NixHashWithMode::Recursive(nixhash::from_algo_and_digest( - algo.try_into()?, - digest, - )?), - None => nixhash::NixHashWithMode::Flat(nixhash::from_algo_and_digest( - algo_and_mode.try_into()?, - digest, - )?), - }) - } - - pub fn mode(&self) -> NixHashMode { - match self { - Self::Flat(_) => NixHashMode::Flat, - Self::Recursive(_) => NixHashMode::Recursive, - } - } - - pub fn digest(&self) -> &NixHash { - match self { - Self::Flat(ref h) => h, - Self::Recursive(ref h) => h, - } - } - - /// Formats a [NixHashWithMode] in the Nix default hash format, - /// which is the algo, followed by a colon, then the lower hex encoded digest. - /// In case the hash itself is recursive, a `r:` is added as prefix - pub fn to_nix_hash_string(&self) -> String { - String::from(self.mode().prefix()) + &self.digest().to_nix_hash_string() - } - - /// This takes a serde_json::Map and turns it into this structure. This is necessary to do such - /// shenigans because we have external consumers, like the Derivation parser, who would like to - /// know whether we have a invalid or a missing NixHashWithMode structure in another structure, - /// e.g. Output. - /// This means we have this combinatorial situation: - /// - no hash, no hashAlgo: no NixHashWithMode so we return Ok(None). - /// - present hash, missing hashAlgo: invalid, we will return missing_field - /// - missing hash, present hashAlgo: same - /// - present hash, present hashAlgo: either we return ourselves or a type/value validation - /// error. - /// This function is for internal consumption regarding those needs until we have a better - /// solution. Now this is said, let's explain how this works. - /// - /// We want to map the serde data model into a NixHashWithMode. - /// - /// The serde data model has a `hash` field (containing a digest in nixbase32), - /// and a `hashAlgo` field, containing the stringified hash algo. - /// In case the hash is recursive, hashAlgo also has a `r:` prefix. - /// - /// This is to match how `nix show-derivation` command shows them in JSON - /// representation. - pub(crate) fn from_map<'de, D>(map: &Map) -> Result, D::Error> - where - D: Deserializer<'de>, - { - // If we don't have hash neither hashAlgo, let's just return None. - if !map.contains_key("hash") && !map.contains_key("hashAlgo") { - return Ok(None); - } - - let digest: Vec = { - if let Some(v) = map.get("hash") { - if let Some(s) = v.as_str() { - data_encoding::HEXLOWER - .decode(s.as_bytes()) - .map_err(|e| serde::de::Error::custom(e.to_string()))? - } else { - return Err(serde::de::Error::invalid_type( - Unexpected::Other(&v.to_string()), - &"a string", - )); - } - } else { - return Err(serde::de::Error::missing_field( - "couldn't extract `hash` key but `hashAlgo` key present", - )); - } - }; - - if let Some(v) = map.get("hashAlgo") { - if let Some(s) = v.as_str() { - match s.strip_prefix("r:") { - Some(rest) => Ok(Some(Self::Recursive( - from_algo_and_digest( - HashAlgo::try_from(rest).map_err(|e| { - serde::de::Error::invalid_value( - Unexpected::Other(&e.to_string()), - &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(), - ) - })?, - &digest, - ) - .map_err(|e: nixhash::Error| { - serde::de::Error::invalid_value( - Unexpected::Other(&e.to_string()), - &"a digest with right length", - ) - })?, - ))), - None => Ok(Some(Self::Flat( - from_algo_and_digest( - HashAlgo::try_from(s).map_err(|e| { - serde::de::Error::invalid_value( - Unexpected::Other(&e.to_string()), - &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(), - ) - })?, - &digest, - ) - .map_err(|e: nixhash::Error| { - serde::de::Error::invalid_value( - Unexpected::Other(&e.to_string()), - &"a digest with right length", - ) - })?, - ))), - } - } else { - Err(serde::de::Error::invalid_type( - Unexpected::Other(&v.to_string()), - &"a string", - )) - } - } else { - Err(serde::de::Error::missing_field( - "couldn't extract `hashAlgo` key, but `hash` key present", - )) - } - } -} - -impl Serialize for NixHashWithMode { - /// map a NixHashWithMode into the serde data model. - fn serialize(&self, serializer: S) -> Result - where - S: Serializer, - { - let mut map = serializer.serialize_map(Some(2))?; - match self { - NixHashWithMode::Flat(h) => { - map.serialize_entry("hash", &nixbase32::encode(h.digest_as_bytes()))?; - map.serialize_entry("hashAlgo", &h.algo())?; - } - NixHashWithMode::Recursive(h) => { - map.serialize_entry("hash", &nixbase32::encode(h.digest_as_bytes()))?; - map.serialize_entry("hashAlgo", &format!("r:{}", &h.algo()))?; - } - }; - map.end() - } -} - -impl<'de> Deserialize<'de> for NixHashWithMode { - fn deserialize(deserializer: D) -> Result - where - D: Deserializer<'de>, - { - let value = Self::from_map::(&Map::deserialize(deserializer)?)?; - - match value { - None => Err(serde::de::Error::custom("couldn't parse as map")), - Some(v) => Ok(v), - } - } -} - -#[cfg(test)] -mod tests { - use crate::nixhash::{NixHash, NixHashWithMode}; - use lazy_static::lazy_static; - use test_case::test_case; - - const DIGEST_SHA256: [u8; 32] = [ - 0xa5, 0xce, 0x9c, 0x15, 0x5e, 0xd0, 0x93, 0x97, 0x61, 0x46, 0x46, 0xc9, 0x71, 0x7f, 0xc7, - 0xcd, 0x94, 0xb1, 0x02, 0x3d, 0x7b, 0x76, 0xb6, 0x18, 0xd4, 0x09, 0xe4, 0xfe, 0xfd, 0x6e, - 0x9d, 0x39, - ]; - lazy_static! { - pub static ref NIXHASH_SHA256: NixHash = NixHash::Sha256(DIGEST_SHA256); - } - - #[test_case("sha256", &DIGEST_SHA256, NixHashWithMode::Flat(NIXHASH_SHA256.clone()); "sha256 flat")] - #[test_case("r:sha256", &DIGEST_SHA256, NixHashWithMode::Recursive(NIXHASH_SHA256.clone()); "sha256 recursive")] - fn from_from_algo_mode_hash(algo_and_mode: &str, digest: &[u8], expected: NixHashWithMode) { - assert_eq!( - expected, - NixHashWithMode::from_algo_mode_hash(algo_and_mode, digest).unwrap() - ); - } - - #[test] - fn from_algo_mode_failure() { - assert!(NixHashWithMode::from_algo_mode_hash("r:sha256", &[]).is_err()); - assert!(NixHashWithMode::from_algo_mode_hash("ha256", &DIGEST_SHA256).is_err()); - } -} diff --git a/tvix/nix-compat/src/store_path/utils.rs b/tvix/nix-compat/src/store_path/utils.rs index b774fbdb662d..b625abdfd0b9 100644 --- a/tvix/nix-compat/src/store_path/utils.rs +++ b/tvix/nix-compat/src/store_path/utils.rs @@ -1,7 +1,6 @@ -use super::{Error, STORE_DIR}; use crate::nixbase32; -use crate::nixhash::{NixHash, NixHashWithMode}; -use crate::store_path::StorePath; +use crate::nixhash::{CAHash, NixHash}; +use crate::store_path::{Error, StorePath, STORE_DIR}; use sha2::{Digest, Sha256}; use thiserror; @@ -41,43 +40,51 @@ pub fn compress_hash(input: &[u8]) -> [u8; OUTPUT_SIZE /// This builds a store path, by calculating the text_hash_string of either a /// derivation or a literal text file that may contain references. +/// If you don't want to have to pass the entire contents, you might want to use +/// [build_ca_path] instead. pub fn build_text_path, I: IntoIterator, C: AsRef<[u8]>>( name: &str, content: C, references: I, -) -> Result { - build_store_path_from_fingerprint_parts( - &make_type("text", references, false), - // the nix_hash_string representation of the sha256 digest of some contents - &{ - let content_digest = { - let hasher = Sha256::new_with_prefix(content); - hasher.finalize() - }; +) -> Result { + // produce the sha256 digest of the contents + let content_digest = Sha256::new_with_prefix(content).finalize().into(); - NixHash::Sha256(content_digest.into()) - }, + build_ca_path( name, + &CAHash::Text(Box::new(content_digest)), + references, + false, ) } -/// This builds a more "regular" content-addressed store path -pub fn build_regular_ca_path, I: IntoIterator>( - name: &str, - hash_with_mode: &NixHashWithMode, +/// This builds a store path from a [CAHash] and a list of references. +pub fn build_ca_path, S: AsRef, I: IntoIterator>( + name: B, + ca_hash: &CAHash, references: I, self_reference: bool, ) -> Result { - match &hash_with_mode { - NixHashWithMode::Recursive(ref hash @ NixHash::Sha256(_)) => { + match &ca_hash { + CAHash::Text(ref digest) => { + if self_reference { + return Err(BuildStorePathError::InvalidReference()); + } build_store_path_from_fingerprint_parts( - &make_type("source", references, self_reference), - hash, + &make_references_string("text", references, false), + &NixHash::Sha256(*digest.to_owned()), name, ) .map_err(BuildStorePathError::InvalidStorePath) } - _ => { + CAHash::Nar(ref hash @ NixHash::Sha256(_)) => build_store_path_from_fingerprint_parts( + &make_references_string("source", references, self_reference), + hash, + name, + ) + .map_err(BuildStorePathError::InvalidStorePath), + // for all other CAHash::Nar, another custom scheme is used. + CAHash::Nar(ref hash) => { if references.into_iter().next().is_some() { return Err(BuildStorePathError::InvalidReference()); } @@ -87,14 +94,38 @@ pub fn build_regular_ca_path, I: IntoIterator>( build_store_path_from_fingerprint_parts( "output:out", &{ - let content_digest = { - let mut hasher = Sha256::new_with_prefix("fixed:out:"); - hasher.update(hash_with_mode.to_nix_hash_string()); - hasher.update(":"); - hasher.finalize() - }; - - NixHash::Sha256(content_digest.into()) + NixHash::Sha256( + Sha256::new_with_prefix(format!( + "fixed:out:r:{}:", + hash.to_nix_hash_string() + )) + .finalize() + .into(), + ) + }, + name, + ) + .map_err(BuildStorePathError::InvalidStorePath) + } + // CaHash::Flat is using something very similar, except the `r:` prefix. + CAHash::Flat(ref hash) => { + if references.into_iter().next().is_some() { + return Err(BuildStorePathError::InvalidReference()); + } + if self_reference { + return Err(BuildStorePathError::InvalidReference()); + } + build_store_path_from_fingerprint_parts( + "output:out", + &{ + NixHash::Sha256( + Sha256::new_with_prefix(format!( + "fixed:out:{}:", + hash.to_nix_hash_string() + )) + .finalize() + .into(), + ) }, name, ) @@ -105,13 +136,12 @@ pub fn build_regular_ca_path, I: IntoIterator>( /// For given NAR sha256 digest and name, return the new [StorePath] this would have. pub fn build_nar_based_store_path(nar_sha256_digest: &[u8; 32], name: &str) -> StorePath { - let nar_hash_with_mode = - NixHashWithMode::Recursive(NixHash::Sha256(nar_sha256_digest.to_owned())); + let nar_hash_with_mode = CAHash::Nar(NixHash::Sha256(nar_sha256_digest.to_owned())); - build_regular_ca_path(name, &nar_hash_with_mode, Vec::::new(), false).unwrap() + build_ca_path(name, &nar_hash_with_mode, Vec::::new(), false).unwrap() } -/// This builds an input-addressed store path +/// This builds an input-addressed store path. /// /// Input-addresed store paths are always derivation outputs, the "input" in question is the /// derivation and its closure. @@ -135,22 +165,20 @@ pub fn build_output_path( /// /// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes, /// and nixbase32-encoded (32 characters). -fn build_store_path_from_fingerprint_parts( +fn build_store_path_from_fingerprint_parts>( ty: &str, hash: &NixHash, - name: &str, + name: B, ) -> Result { + let name = super::validate_name(name.as_ref())?; let fingerprint = - String::from(ty) + ":" + &hash.to_nix_hash_string() + ":" + STORE_DIR + ":" + name; - let digest = { - let hasher = Sha256::new_with_prefix(fingerprint); - hasher.finalize() - }; + String::from(ty) + ":" + &hash.to_nix_hash_string() + ":" + STORE_DIR + ":" + &name; + let digest = Sha256::new_with_prefix(fingerprint).finalize(); let compressed = compress_hash::<20>(&digest); - super::validate_name(name.as_bytes())?; + Ok(StorePath { digest: compressed, - name: name.to_string(), + name, }) } @@ -164,7 +192,7 @@ fn build_store_path_from_fingerprint_parts( /// - the nix_hash_string representation of the sha256 digest of some contents /// - the value of `storeDir` /// - the name -fn make_type, I: IntoIterator>( +fn make_references_string, I: IntoIterator>( ty: &str, references: I, self_ref: bool, @@ -190,11 +218,7 @@ fn make_type, I: IntoIterator>( /// The actual placeholder is basically just a SHA256 hash encoded in /// cppnix format. pub fn hash_placeholder(name: &str) -> String { - let digest = { - let mut hasher = Sha256::new(); - hasher.update(format!("nix-output:{}", name)); - hasher.finalize() - }; + let digest = Sha256::new_with_prefix(format!("nix-output:{}", name)).finalize(); format!("/{}", nixbase32::encode(&digest)) } @@ -202,7 +226,7 @@ pub fn hash_placeholder(name: &str) -> String { #[cfg(test)] mod test { use super::*; - use crate::nixhash::{NixHash, NixHashWithMode}; + use crate::nixhash::{CAHash, NixHash}; #[test] fn build_text_path_with_zero_references() { @@ -242,9 +266,9 @@ mod test { #[test] fn build_sha1_path() { - let outer = build_regular_ca_path( + let outer = build_ca_path( "bar", - &NixHashWithMode::Recursive(NixHash::Sha1( + &CAHash::Nar(NixHash::Sha1( data_encoding::HEXLOWER .decode(b"0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33") .expect("hex should decode") @@ -271,9 +295,9 @@ mod test { // // $ nix store make-content-addressed /nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz // rewrote '/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz' to '/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz' - let outer = build_regular_ca_path( + let outer = build_ca_path( "baz", - &NixHashWithMode::Recursive(NixHash::Sha256( + &CAHash::Nar(NixHash::Sha256( nixbase32::decode(b"1xqkzcb3909fp07qngljr4wcdnrh1gdam1m2n29i6hhrxlmkgkv1") .expect("hex should decode") .try_into() -- cgit 1.4.1