From a94a1434cc2a57b330a2ad6f310573fb70e15e8a Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Thu, 26 Jan 2023 14:18:12 +0100 Subject: fix(tvix/cli): handle SRI hashes in outputHash Instead of being called with `md5`, `sha1`, `sha256` or `sha512`, `fetchurl.nix` (from corepkgs / ` Autosubmit: flokli Tested-by: BuildkiteCI --- tvix/cli/Cargo.toml | 6 +++ tvix/cli/src/derivation.rs | 126 +++++++++++++++++++++++++++++++++++++++------ tvix/cli/src/errors.rs | 32 +++++++++++- 3 files changed, 146 insertions(+), 18 deletions(-) (limited to 'tvix/cli') diff --git a/tvix/cli/Cargo.toml b/tvix/cli/Cargo.toml index 45ed05e089..83796855b6 100644 --- a/tvix/cli/Cargo.toml +++ b/tvix/cli/Cargo.toml @@ -12,6 +12,12 @@ tvix-eval = { path = "../eval" } tvix-derivation = { path = "../derivation" } rustyline = "10.0.0" clap = { version = "4.0", features = ["derive", "env"] } +tvix-store-bin = { path = "../store" } dirs = "4.0.0" smol_str = "0.1" aho-corasick = "0.7" +ssri = "7.0.0" +data-encoding = "2.3.3" + +[dev-dependencies] +test-case = "2.2.2" diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs index d57503a696..122330b963 100644 --- a/tvix/cli/src/derivation.rs +++ b/tvix/cli/src/derivation.rs @@ -1,5 +1,6 @@ //! Implements `builtins.derivation`, the core of what makes Nix build packages. +use data_encoding::BASE64; use std::cell::RefCell; use std::collections::{btree_map, BTreeSet}; use std::rc::Rc; @@ -79,6 +80,82 @@ fn populate_inputs>( } } +/// Due to the support for SRI hashes, and how these are passed along to +/// builtins.derivation, outputHash and outputHashAlgo can have values which +/// need to be further modified before constructing the Derivation struct. +/// +/// If outputHashAlgo is an SRI hash, outputHashAlgo must either be an empty +/// string, or the hash algorithm as specified in the (single) SRI (entry). +/// SRI strings with multiple hash algorithms are not supported. +/// +/// In case an SRI string was used, the (single) fixed output is populated +/// with the hash algo name, and the hash digest is populated with the +/// (lowercase) hex encoding of the digest. +/// +/// These values are only rewritten for the outputs, not what's passed to env. +fn construct_output_hash(digest: &str, algo: &str, hash_mode: Option<&str>) -> Result { + let sri_parsed = digest.parse::(); + // SRI strings can embed multiple hashes with different algos, but that's probably not supported + + let (digest, algo): (String, String) = match sri_parsed { + Err(e) => { + // unable to parse as SRI, but algo not set + if algo.is_empty() { + // InvalidSRIString doesn't implement PartialEq, but our error does + return Err(Error::InvalidSRIString(e.to_string())); + } + + // algo is set. Assume the digest is set to some nixbase32. + // TODO: more validation here + + (digest.to_string(), algo.to_string()) + } + Ok(sri_parsed) => { + // We don't support more than one SRI hash + if sri_parsed.hashes.len() != 1 { + return Err(Error::UnsupportedSRIMultiple(sri_parsed.hashes.len()).into()); + } + + // grab the first (and only hash) + let sri_parsed_hash = &sri_parsed.hashes[0]; + + // ensure the algorithm in the SRI is supported + if !(sri_parsed_hash.algorithm == ssri::Algorithm::Sha1 + || sri_parsed_hash.algorithm == ssri::Algorithm::Sha256 + || sri_parsed_hash.algorithm == ssri::Algorithm::Sha512) + { + Error::UnsupportedSRIAlgo(sri_parsed_hash.algorithm.to_string()); + } + + // if algo is set, it needs to match what the SRI says + if !algo.is_empty() && algo != sri_parsed_hash.algorithm.to_string() { + return Err(Error::ConflictingSRIHashAlgo( + algo.to_string(), + sri_parsed_hash.algorithm.to_string(), + )); + } + + // the digest comes base64-encoded. We need to decode, and re-encode as hexlower. + match BASE64.decode(sri_parsed_hash.digest.as_bytes()) { + Err(e) => return Err(Error::InvalidSRIDigest(e).into()), + Ok(sri_digest) => ( + data_encoding::HEXLOWER.encode(&sri_digest), + sri_parsed_hash.algorithm.to_string(), + ), + } + } + }; + + // mutate the algo string a bit more, depending on hashMode + let algo = match hash_mode { + None | Some("flat") => algo, + Some("recursive") => format!("r:{}", algo), + Some(other) => return Err(Error::InvalidOutputHashMode(other.to_string()).into()), + }; + + Ok(Hash { algo, digest }) +} + /// Populate the output configuration of a derivation based on the /// parameters passed to the call, flipping the required /// parameters for a fixed-output derivation if necessary. @@ -102,6 +179,12 @@ fn populate_output_configuration( .as_str() .to_string(); + let digest_str = hash + .force(vm)? + .coerce_to_string(CoercionKind::Strong, vm)? + .as_str() + .to_string(); + let hash_mode = match hash_mode { None => None, Some(mode) => Some( @@ -112,23 +195,12 @@ fn populate_output_configuration( ), }; - let algo = match hash_mode.as_deref() { - None | Some("flat") => algo, - Some("recursive") => format!("r:{}", algo), - Some(other) => { - return Err(Error::InvalidOutputHashMode(other.to_string()).into()) - } - }; - - out.hash = Some(Hash { - algo, - - digest: hash - .force(vm)? - .coerce_to_string(CoercionKind::Strong, vm)? - .as_str() - .to_string(), - }); + // construct out.hash + out.hash = Some(construct_output_hash( + &digest_str, + &algo, + hash_mode.as_deref(), + )?); } }, @@ -371,6 +443,7 @@ pub use derivation_builtins::builtins as derivation_builtins; #[cfg(test)] mod tests { use super::*; + use test_case::test_case; use tvix_eval::observer::NoOpObserver; static mut OBSERVER: NoOpObserver = NoOpObserver {}; @@ -576,4 +649,23 @@ mod tests { vec!["--foo".to_string(), "42".to_string(), "--bar".to_string()] ); } + + #[test_case( + "sha256-swapHA/ZO8QoDPwumMt6s5gf91oYe+oyk4EfRSyJqMg=", "sha256", Some("flat"), + Ok(Hash { algo: "sha256".to_string(), digest: "b306a91c0fd93bc4280cfc2e98cb7ab3981ff75a187bea3293811f452c89a8c8".to_string() }); + "sha256 and SRI" + )] + #[test_case( + "sha256-s6JN6XqP28g1uYMxaVAQMLiXcDG8tUs7OsE3QPhGqzA=", "", Some("flat"), + Ok(Hash { algo: "sha256".to_string(), digest: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30".to_string() }); + "SRI only" + )] + fn test_construct_output_hash( + digest: &str, + algo: &str, + hash_mode: Option<&str>, + result: Result, + ) { + assert_eq!(construct_output_hash(digest, algo, hash_mode), result); + } } diff --git a/tvix/cli/src/errors.rs b/tvix/cli/src/errors.rs index cbf8ed9457..5791c5332b 100644 --- a/tvix/cli/src/errors.rs +++ b/tvix/cli/src/errors.rs @@ -1,7 +1,7 @@ use std::{error, fmt::Display, rc::Rc}; use tvix_derivation::DerivationError; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum Error { // Errors related to derivation construction DuplicateOutput(String), @@ -10,6 +10,11 @@ pub enum Error { ShadowedOutput(String), InvalidDerivation(DerivationError), InvalidOutputHashMode(String), + UnsupportedSRIAlgo(String), + UnsupportedSRIMultiple(usize), + InvalidSRIDigest(data_encoding::DecodeError), + InvalidSRIString(String), + ConflictingSRIHashAlgo(String, String), } impl Display for Error { @@ -38,6 +43,31 @@ impl Display for Error { f, "invalid output hash mode: '{mode}', only 'recursive' and 'flat` are supported" ), + Error::UnsupportedSRIAlgo(algo) => { + write!( + f, + "unsupported sri algorithm: {algo}, only sha1, sha256 or sha512 is supported" + ) + } + Error::UnsupportedSRIMultiple(n) => { + write!( + f, + "invalid number of sri hashes in string ({n}), only one hash is supported" + ) + } + Error::InvalidSRIDigest(err) => { + write!(f, "invalid sri digest: {}", err) + } + Error::InvalidSRIString(err) => { + write!(f, "failed to parse SRI string: {}", err) + } + Error::ConflictingSRIHashAlgo(algo, sri_algo) => { + write!( + f, + "outputHashAlgo is set to {}, but outputHash contains SRI with algo {}", + algo, sri_algo + ) + } } } } -- cgit 1.4.1