From 26c68f8e892633bde4aeebbfc0e4ae7ee571687d Mon Sep 17 00:00:00 2001 From: John Ericson Date: Fri, 31 Mar 2023 10:20:04 -0400 Subject: refactor(nix-compat): Properly encapsulate store path construction Before there was code scattered about (e.g. text hashing module and derivation output computation) constructing store paths from low level building blocks --- there was some duplication and it was easy to make nonsense store paths. Now, we have roughly the same "safe-ish" ways of constructing them as C++ Nix, and only those are exposed: - Make text hashed content-addressed store paths - Make other content-addressed store paths - Make input-addressed fixed output hashes Change-Id: I122a3ee0802b4f45ae386306b95b698991be89c8 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8411 Reviewed-by: flokli Tested-by: BuildkiteCI --- tvix/cli/src/derivation.rs | 18 ++- tvix/nix-compat/src/derivation/errors.rs | 2 + tvix/nix-compat/src/derivation/mod.rs | 92 +++++------- tvix/nix-compat/src/derivation/tests/mod.rs | 2 +- tvix/nix-compat/src/nixhash/algos.rs | 2 +- tvix/nix-compat/src/nixhash/with_mode.rs | 33 ++++- tvix/nix-compat/src/store_path/mod.rs | 30 ++-- tvix/nix-compat/src/store_path/utils.rs | 208 ++++++++++++++++++++++------ tvix/store/src/proto/tests/pathinfo.rs | 6 +- 9 files changed, 268 insertions(+), 125 deletions(-) (limited to 'tvix') diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs index 23c49d3d0974..cf15ebbb0dc3 100644 --- a/tvix/cli/src/derivation.rs +++ b/tvix/cli/src/derivation.rs @@ -412,16 +412,14 @@ mod derivation_builtins { // TODO: fail on derivation references (only "plain" is allowed here) - let path = nix_compat::store_path::build_store_path_from_references( - name.as_str(), - content.as_str(), - refs, - ) - .map_err(|_e| { - nix_compat::derivation::DerivationError::InvalidOutputName(name.as_str().to_string()) - }) - .map_err(Error::InvalidDerivation)? - .to_absolute_path(); + let path = nix_compat::store_path::build_text_path(name.as_str(), content.as_str(), refs) + .map_err(|_e| { + nix_compat::derivation::DerivationError::InvalidOutputName( + name.as_str().to_string(), + ) + }) + .map_err(Error::InvalidDerivation)? + .to_absolute_path(); state.borrow_mut().plain(&path); diff --git a/tvix/nix-compat/src/derivation/errors.rs b/tvix/nix-compat/src/derivation/errors.rs index 2ffd56cf64da..8e9e6a121096 100644 --- a/tvix/nix-compat/src/derivation/errors.rs +++ b/tvix/nix-compat/src/derivation/errors.rs @@ -15,6 +15,8 @@ pub enum DerivationError { InvalidOutputNameForFixed(String), #[error("unable to validate output {0}: {1}")] InvalidOutput(String, OutputError), + #[error("unable to validate output {0}: {1}")] + InvalidOutputDerivationPath(String, store_path::BuildStorePathError), // input derivation #[error("unable to parse input derivation path {0}: {1}")] InvalidInputDerivationPath(String, store_path::Error), diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index 2fa77cd26f49..f3a4cd6b04e4 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -1,8 +1,5 @@ -use crate::{ - nixhash::HashAlgo, - store_path::{ - self, build_store_path_from_fingerprint, build_store_path_from_references, StorePath, - }, +use crate::store_path::{ + self, build_output_path, build_regular_ca_path, build_text_path, StorePath, }; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; @@ -78,7 +75,7 @@ impl Derivation { /// Returns the drv path of a [Derivation] struct. /// - /// The drv path is calculated by invoking [build_store_path_from_references], using + /// The drv path is calculated by invoking [build_text_path], using /// the `name` with a `.drv` suffix as name, all [Derivation::input_sources] and /// keys of [Derivation::input_derivations] as references, and the ATerm string of /// the [Derivation] as content. @@ -96,7 +93,7 @@ impl Derivation { inputs }; - build_store_path_from_references(name, self.to_aterm_string(), references) + build_text_path(name, self.to_aterm_string(), references) .map_err(|_e| DerivationError::InvalidOutputName(name.to_string())) } @@ -196,7 +193,6 @@ impl Derivation { name: &str, derivation_or_fod_hash: &NixHash, ) -> Result<(), DerivationError> { - let num_outputs = self.outputs.len(); // The fingerprint and hash differs per output for (output_name, output) in self.outputs.iter_mut() { // Assert that outputs are not yet populated, to avoid using this function wrongly. @@ -204,59 +200,43 @@ impl Derivation { // footgun prevention mechanism. assert!(output.path.is_empty()); - // calculate the output_name_path, which is the part of the NixPath after the digest. - // It's the name, and (if it's the non-out output), the output name after a `-`. - let output_path_name = { - let mut output_path_name = name.to_string(); - if output_name != "out" { - output_path_name.push('-'); - output_path_name.push_str(output_name); - } - output_path_name + let path_name = output_path_name(name, output_name); + + // For fixed output derivation we use the per-output info, otherwise we use the + // derivation hash. + let abs_store_path = if let Some(ref hwm) = output.hash_with_mode { + build_regular_ca_path(&path_name, hwm, Vec::::new(), false).map_err( + |e| DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e), + )? + } else { + build_output_path(derivation_or_fod_hash, &output_name, &path_name).map_err( + |e| { + DerivationError::InvalidOutputDerivationPath( + output_name.to_string(), + store_path::BuildStorePathError::InvalidName(e), + ) + }, + )? }; - // In the case this is a fixed-output derivation AND the - // hash mode is recursive AND the hash algo is sha256, a - // custom fingerprint is used to calculate the output path. - // - // In all other cases, the fingerprint is derived from the - // derivation_or_fod_hash. - let custom_fp: Option = - if num_outputs == 1 && output_name == "out" && output.hash_with_mode.is_some() { - match &output.hash_with_mode { - Some(NixHashWithMode::Recursive(NixHash { - digest, - algo: HashAlgo::Sha256, - })) => Some(format!( - "source:sha256:{}:{}:{}", - data_encoding::HEXLOWER.encode(digest), - store_path::STORE_DIR, - output_path_name - )), - _ => None, - } - } else { - None - }; - - // If no custom_fp has been determined, use the default one. - let fp = custom_fp.unwrap_or(format!( - "output:{}:{}:{}:{}", - output_name, - derivation_or_fod_hash.to_nix_hash_string(), - store_path::STORE_DIR, - output_path_name, - )); - - let abs_store_path = build_store_path_from_fingerprint(&output_path_name, &fp) - .map_err(|_e| DerivationError::InvalidOutputName(output_path_name.to_string()))? - .to_absolute_path(); - - output.path = abs_store_path.clone(); + output.path = abs_store_path.to_absolute_path(); self.environment - .insert(output_name.to_string(), abs_store_path); + .insert(output_name.to_string(), abs_store_path.to_absolute_path()); } Ok(()) } } + +/// Calculate the name part of the store path of a derivation [Output]. +/// +/// It's the name, and (if it's the non-out output), the output name +/// after a `-`. +fn output_path_name(derivation_name: &str, output_name: &str) -> String { + let mut output_path_name = derivation_name.to_string(); + if output_name != "out" { + output_path_name.push('-'); + output_path_name.push_str(output_name); + } + output_path_name +} diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index 18aa23534c77..5daa16da03cb 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -1,7 +1,7 @@ use crate::derivation::output::Output; use crate::derivation::Derivation; use crate::nixhash::NixHash; -use crate::store_path::{build_store_path_from_references, StorePath}; +use crate::store_path::StorePath; use std::collections::BTreeSet; use std::fs::File; use std::io::Read; diff --git a/tvix/nix-compat/src/nixhash/algos.rs b/tvix/nix-compat/src/nixhash/algos.rs index 1864b4ef975f..d6b0bf47bdb7 100644 --- a/tvix/nix-compat/src/nixhash/algos.rs +++ b/tvix/nix-compat/src/nixhash/algos.rs @@ -5,7 +5,7 @@ use serde::{Deserialize, Serialize}; use crate::nixhash::Error; /// This are the hash algorithms supported by cppnix. -#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] pub enum HashAlgo { Md5, Sha1, diff --git a/tvix/nix-compat/src/nixhash/with_mode.rs b/tvix/nix-compat/src/nixhash/with_mode.rs index e895f0c62704..1908f27b4759 100644 --- a/tvix/nix-compat/src/nixhash/with_mode.rs +++ b/tvix/nix-compat/src/nixhash/with_mode.rs @@ -3,6 +3,20 @@ use crate::nixhash::{HashAlgo, NixHash}; use serde::ser::SerializeMap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; +pub enum NixHashMode { + Flat, + Recursive, +} + +impl NixHashMode { + pub fn prefix(self) -> &'static str { + match self { + Self::Flat => "", + Self::Recursive => "r:", + } + } +} + /// A Nix Hash can either be flat or recursive. #[derive(Clone, Debug, Eq, PartialEq)] pub enum NixHashWithMode { @@ -11,14 +25,25 @@ pub enum NixHashWithMode { } impl NixHashWithMode { + pub fn mode(&self) -> NixHashMode { + match self { + Self::Flat(_) => NixHashMode::Flat, + Self::Recursive(_) => NixHashMode::Recursive, + } + } + + pub fn digest(&self) -> &NixHash { + match self { + Self::Flat(ref h) => h, + Self::Recursive(ref h) => h, + } + } + /// Formats a [NixHashWithMode] in the Nix default hash format, /// which is the algo, followed by a colon, then the lower hex encoded digest. /// In case the hash itself is recursive, a `r:` is added as prefix pub fn to_nix_hash_string(&self) -> String { - match self { - NixHashWithMode::Flat(h) => h.to_nix_hash_string(), - NixHashWithMode::Recursive(h) => format!("r:{}", h.to_nix_hash_string()), - } + String::from(self.mode().prefix()) + &self.digest().to_nix_hash_string() } } diff --git a/tvix/nix-compat/src/store_path/mod.rs b/tvix/nix-compat/src/store_path/mod.rs index 0e004ccd7a10..7fe425d8edb2 100644 --- a/tvix/nix-compat/src/store_path/mod.rs +++ b/tvix/nix-compat/src/store_path/mod.rs @@ -4,10 +4,7 @@ use thiserror::Error; mod utils; -pub use utils::{ - build_store_path_from_fingerprint, build_store_path_from_references, compress_hash, - hash_placeholder, -}; +pub use utils::*; pub const DIGEST_SIZE: usize = 20; // lazy_static doesn't allow us to call NIXBASE32.encode_len(), so we ran it @@ -19,19 +16,32 @@ pub const ENCODED_DIGEST_SIZE: usize = 32; pub const STORE_DIR: &str = "/nix/store"; pub const STORE_DIR_WITH_SLASH: &str = "/nix/store/"; -/// Errors that can occur during the validation of name characters. +/// Errors that can occur when parsing a literal store path #[derive(Debug, PartialEq, Eq, Error)] pub enum Error { #[error("Dash is missing between hash and name")] MissingDash(), #[error("Hash encoding is invalid: {0}")] InvalidHashEncoding(Nixbase32DecodeError), - #[error("Invalid name: {0}")] - InvalidName(String), + #[error("{0}")] + InvalidName(NameError), #[error("Tried to parse an absolute path which was missing the store dir prefix.")] MissingStoreDir(), } +/// Errors that can occur during the validation of name characters. +#[derive(Debug, PartialEq, Eq, Error)] +pub enum NameError { + #[error("Invalid name: {0}")] + InvalidName(String), +} + +impl From for Error { + fn from(e: NameError) -> Self { + Self::InvalidName(e) + } +} + /// Represents a path in the Nix store (a direct child of [STORE_DIR]). /// /// It starts with a digest (20 bytes), [crate::nixbase32]-encoded, @@ -56,7 +66,7 @@ impl StorePath { // - 1 dash // - 1 character for the name if s.len() < ENCODED_DIGEST_SIZE + 2 { - return Err(Error::InvalidName("".to_string())); + Err(NameError::InvalidName("".to_string()))?; } let digest = match nixbase32::decode(s[..ENCODED_DIGEST_SIZE].as_bytes()) { @@ -92,7 +102,7 @@ impl StorePath { } /// Checks a given &str to match the restrictions for store path names. - pub fn validate_name(s: &str) -> Result<(), Error> { + pub fn validate_name(s: &str) -> Result<(), NameError> { for c in s.chars() { if c.is_ascii_alphanumeric() || c == '-' @@ -105,7 +115,7 @@ impl StorePath { continue; } - return Err(Error::InvalidName(s.to_string())); + return Err(NameError::InvalidName(s.to_string())); } Ok(()) diff --git a/tvix/nix-compat/src/store_path/utils.rs b/tvix/nix-compat/src/store_path/utils.rs index fa1908ee3552..c5fc48a94ffe 100644 --- a/tvix/nix-compat/src/store_path/utils.rs +++ b/tvix/nix-compat/src/store_path/utils.rs @@ -1,9 +1,24 @@ use crate::nixbase32; -use crate::nixhash::NixHash; +use crate::nixhash::{HashAlgo, NixHash, NixHashWithMode}; use crate::store_path::StorePath; use sha2::{Digest, Sha256}; +use thiserror::Error; -use super::Error; +use super::{NameError, STORE_DIR}; + +/// Errors that can occur when creating a content-addressed store path. +/// +/// This wraps the main [Error] which is just about invalid store path names. +#[derive(Debug, PartialEq, Eq, Error)] +pub enum BuildStorePathError { + #[error("{0}")] + InvalidName(NameError), + /// This error occurs when we have references outside the SHA-256 + + /// Recursive case. The restriction comes from upstream Nix. It may be + /// lifted at some point but there isn't a pressing need to anticipate that. + #[error("References were not supported as much as requested")] + InvalidReference(), +} /// compress_hash takes an arbitrarily long sequence of bytes (usually /// a hash digest), and returns a sequence of bytes of length @@ -27,33 +42,107 @@ pub fn compress_hash(input: &[u8]) -> [u8; OUTPUT_SIZE /// This builds a store path, by calculating the text_hash_string of either a /// derivation or a literal text file that may contain references. -pub fn build_store_path_from_references< - S: AsRef, - I: IntoIterator, - C: AsRef<[u8]>, ->( +pub fn build_text_path, I: IntoIterator, C: AsRef<[u8]>>( name: &str, content: C, references: I, -) -> Result { - let text_hash_str = text_hash_string(name, content, references); - build_store_path_from_fingerprint(name, &text_hash_str) +) -> Result { + build_store_path_from_fingerprint_parts( + &make_type("text", references, false), + // the nix_hash_string representation of the sha256 digest of some contents + &{ + let content_digest = { + let hasher = Sha256::new_with_prefix(content); + hasher.finalize() + }; + NixHash::new(crate::nixhash::HashAlgo::Sha256, content_digest.to_vec()) + }, + name, + ) +} + +/// This builds a more "regular" content-addressed store path +pub fn build_regular_ca_path, I: IntoIterator>( + name: &str, + hash_with_mode: &NixHashWithMode, + references: I, + self_reference: bool, +) -> Result { + match &hash_with_mode { + NixHashWithMode::Recursive( + ref hash @ NixHash { + algo: HashAlgo::Sha256, + .. + }, + ) => build_store_path_from_fingerprint_parts( + &make_type("source", references, self_reference), + hash, + name, + ) + .map_err(BuildStorePathError::InvalidName), + _ => { + if references.into_iter().next().is_some() { + return Err(BuildStorePathError::InvalidReference()); + } + if self_reference { + return Err(BuildStorePathError::InvalidReference()); + } + build_store_path_from_fingerprint_parts( + "output:out", + &{ + let content_digest = { + let mut hasher = Sha256::new_with_prefix("fixed:out:"); + hasher.update(hash_with_mode.mode().prefix()); + hasher.update(hash_with_mode.digest().algo.to_string()); + hasher.update(":"); + hasher.update( + &data_encoding::HEXLOWER.encode(&hash_with_mode.digest().digest), + ); + hasher.update(":"); + hasher.finalize() + }; + NixHash::new(crate::nixhash::HashAlgo::Sha256, content_digest.to_vec()) + }, + name, + ) + .map_err(BuildStorePathError::InvalidName) + } + } } -/// This builds a store path from a fingerprint. -/// Usually, that function is used from [build_store_path_from_references] and +/// This builds an input-addressed store path +/// +/// Input-addresed store paths are always derivation outputs, the "input" in question is the +/// derivation and its closure. +pub fn build_output_path( + drv_hash: &NixHash, + output_name: &str, + output_path_name: &str, +) -> Result { + build_store_path_from_fingerprint_parts( + &(String::from("output:") + output_name), + drv_hash, + &output_path_name, + ) +} + +/// This builds a store path from fingerprint parts. +/// Usually, that function is used from [build_text_path] and /// passed a "text hash string" (starting with "text:" as fingerprint), /// but other fingerprints starting with "output:" are also used in Derivation /// output path calculation. /// /// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes, /// and nixbase32-encoded (32 characters). -pub fn build_store_path_from_fingerprint( +fn build_store_path_from_fingerprint_parts( + ty: &str, + hash: &NixHash, name: &str, - fingerprint: &str, -) -> Result { +) -> Result { + let fingerprint = + String::from(ty) + ":" + &hash.to_nix_hash_string() + ":" + STORE_DIR + ":" + name; let digest = { - let hasher = Sha256::new_with_prefix(fingerprint); + let hasher = Sha256::new_with_prefix(&fingerprint); hasher.finalize() }; let compressed = compress_hash::<20>(&digest); @@ -74,31 +163,21 @@ pub fn build_store_path_from_fingerprint( /// - the nix_hash_string representation of the sha256 digest of some contents /// - the value of `storeDir` /// - the name -pub fn text_hash_string, I: IntoIterator, C: AsRef<[u8]>>( - name: &str, - content: C, +fn make_type, I: IntoIterator>( + ty: &str, references: I, + self_ref: bool, ) -> String { - let mut s = String::from("text:"); + let mut s = String::from(ty); for reference in references { - s.push_str(reference.as_ref()); s.push(':'); + s.push_str(reference.as_ref()); } - // the nix_hash_string representation of the sha256 digest of some contents - s.push_str( - &{ - let content_digest = { - let hasher = Sha256::new_with_prefix(content); - hasher.finalize() - }; - NixHash::new(crate::nixhash::HashAlgo::Sha256, content_digest.to_vec()) - } - .to_nix_hash_string(), - ); - - s.push_str(&format!(":{}:{}", crate::store_path::STORE_DIR, name)); + if self_ref { + s.push_str(":self"); + } s } @@ -121,16 +200,17 @@ pub fn hash_placeholder(name: &str) -> String { #[cfg(test)] mod test { - use crate::store_path::build_store_path_from_references; + use super::*; + use crate::nixhash::{NixHash, NixHashWithMode}; #[test] - fn build_store_path_with_zero_references() { + fn build_text_path_with_zero_references() { // This hash should match `builtins.toFile`, e.g.: // // nix-repl> builtins.toFile "foo" "bar" // "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo" - let store_path = build_store_path_from_references("foo", "bar", Vec::::new()) + let store_path = build_text_path("foo", "bar", Vec::::new()) .expect("build_store_path() should succeed"); assert_eq!( @@ -140,17 +220,17 @@ mod test { } #[test] - fn build_store_path_with_non_zero_references() { + fn build_text_path_with_non_zero_references() { // This hash should match: // // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" - let inner = build_store_path_from_references("foo", "bar", Vec::::new()) + let inner = build_text_path("foo", "bar", Vec::::new()) .expect("path_with_references() should succeed"); let inner_path = inner.to_absolute_path(); - let outer = build_store_path_from_references("baz", &inner_path, vec![inner_path.as_str()]) + let outer = build_text_path("baz", &inner_path, vec![inner_path.as_str()]) .expect("path_with_references() should succeed"); assert_eq!( @@ -158,4 +238,52 @@ mod test { "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" ); } + + #[test] + fn build_sha1_path() { + let outer = build_regular_ca_path( + "bar", + &NixHashWithMode::Recursive(NixHash { + algo: HashAlgo::Sha1, + digest: data_encoding::HEXLOWER + .decode(b"0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33") + .expect("hex should decode"), + }), + Vec::::new(), + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar" + ); + } + + #[test] + fn build_store_path_with_non_zero_references() { + // This hash should match: + // + // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" + // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + // + // $ nix store make-content-addressed /nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz + // rewrote '/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz' to '/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz' + let outer = build_regular_ca_path( + "baz", + &NixHashWithMode::Recursive(NixHash { + algo: HashAlgo::Sha256, + digest: nixbase32::decode(b"1xqkzcb3909fp07qngljr4wcdnrh1gdam1m2n29i6hhrxlmkgkv1") + .expect("hex should decode"), + }), + vec!["/nix/store/dxwkwjzdaq7ka55pkk252gh32bgpmql4-foo"], + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz" + ); + } } diff --git a/tvix/store/src/proto/tests/pathinfo.rs b/tvix/store/src/proto/tests/pathinfo.rs index 0cc8f87587f1..54a76fc6c554 100644 --- a/tvix/store/src/proto/tests/pathinfo.rs +++ b/tvix/store/src/proto/tests/pathinfo.rs @@ -66,7 +66,7 @@ fn validate_no_node( }, Err(ValidatePathInfoError::InvalidNodeName( "invalid".to_string(), - store_path::Error::InvalidName("".to_string()) + store_path::Error::InvalidName(store_path::NameError::InvalidName("".to_string())) )); "invalid node name" )] @@ -111,7 +111,7 @@ fn validate_directory( }, Err(ValidatePathInfoError::InvalidNodeName( "invalid".to_string(), - store_path::Error::InvalidName("".to_string()) + store_path::Error::InvalidName(store_path::NameError::InvalidName("".to_string())) )); "invalid node name" )] @@ -141,7 +141,7 @@ fn validate_file(t_file_node: proto::FileNode, t_result: Result