From b932cf2d85af64a383f8020394301889f5df04c5 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Fri, 31 Mar 2023 08:27:16 -0400 Subject: refactor(nix-compat) Make `nixhash_*` mods hierarchical They can go under `nixhash` Change-Id: Ia15835c57130b66d58f5df80ae9595dceee00941 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8408 Reviewed-by: flokli Tested-by: BuildkiteCI --- tvix/nix-compat/src/nixhash/mod.rs | 454 +++++++++++++++++++++++++++++++++++++ 1 file changed, 454 insertions(+) create mode 100644 tvix/nix-compat/src/nixhash/mod.rs (limited to 'tvix/nix-compat/src/nixhash/mod.rs') diff --git a/tvix/nix-compat/src/nixhash/mod.rs b/tvix/nix-compat/src/nixhash/mod.rs new file mode 100644 index 000000000000..dccba7a5ba27 --- /dev/null +++ b/tvix/nix-compat/src/nixhash/mod.rs @@ -0,0 +1,454 @@ +use crate::nixbase32; +use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER}; +use thiserror::Error; + +mod algos; +mod with_mode; + +pub use algos::HashAlgo; +pub use with_mode::NixHashWithMode; + +/// Nix allows specifying hashes in various encodings, and magically just +/// derives the encoding. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct NixHash { + pub digest: Vec, + + pub algo: HashAlgo, +} + +impl NixHash { + /// Constructs a new [NixHash] by specifying [HashAlgo] and digest. + pub fn new(algo: HashAlgo, digest: Vec) -> Self { + Self { algo, digest } + } + + /// Formats a [NixHash] in the Nix default hash format, + /// which is the algo, followed by a colon, then the lower hex encoded digest. + pub fn to_nix_hash_string(&self) -> String { + format!("{}:{}", self.algo, HEXLOWER.encode(&self.digest)) + } +} + +/// Errors related to NixHash construction. +#[derive(Debug, Error)] +pub enum Error { + #[error("invalid hash algo: {0}")] + InvalidAlgo(String), + #[error("invalid SRI string: {0}")] + InvalidSRI(String), + #[error("invalid encoded digest length '{0}' for algo {1}")] + InvalidEncodedDigestLength(usize, HashAlgo), + #[error("invalid base16 encoding: {0}")] + InvalidBase16Encoding(data_encoding::DecodeError), + #[error("invalid base32 encoding: {0}")] + InvalidBase32Encoding(nixbase32::Nixbase32DecodeError), + #[error("invalid base64 encoding: {0}")] + InvalidBase64Encoding(data_encoding::DecodeError), + #[error("conflicting hash algo: {0} (hash_algo) vs {1} (inline)")] + ConflictingHashAlgos(String, String), + #[error("missing inline hash algo, but no externally-specified algo: {0}")] + MissingInlineHashAlgo(String), +} + +/// parses a string to a nix hash. +/// +/// Hashes can be: +/// - Nix hash strings +/// - SRI hashes +/// - bare digests +/// +/// Encoding for Nix hash strings or bare digests can be: +/// - base16 (lowerhex), +/// - nixbase32, +/// - base64 (StdEncoding) +/// - sri string +/// +/// The encoding is derived from the length of the string and the hash type. +/// The hash is communicated out-of-band, but might also be in-band (in the +/// case of a nix hash string or SRI), in which it needs to be consistent with the +/// one communicated out-of-band. +pub fn from_str(s: &str, algo_str: Option<&str>) -> Result { + // validate algo_str, construct hash_algo + let algo: Option = match &algo_str { + Some("sha1") => Some(HashAlgo::Sha1), + Some("sha256") => Some(HashAlgo::Sha256), + Some("sha512") => Some(HashAlgo::Sha512), + Some("md5") => Some(HashAlgo::Md5), + Some(e) => return Err(Error::InvalidAlgo(e.to_string())), + None => None, + }; + + // peek at the beginning of the string. Let's detect the SRI path first. + if s.starts_with("sha1-") + || s.starts_with("sha256-") + || s.starts_with("sha512-") + || s.starts_with("md5-") + { + let parsed_nixhash = from_sri_str(s)?; + // ensure the algo matches with what has been passed externally, if so. + if let Some(algo) = algo { + if algo != parsed_nixhash.algo { + return Err(Error::ConflictingHashAlgos( + algo.to_string(), + parsed_nixhash.algo.to_string(), + )); + } + } + return Ok(parsed_nixhash); + } + + // Now, peek at the beginning again to see if it's a Nix Hash + if s.starts_with("sha1:") + || s.starts_with("sha256:") + || s.starts_with("sha512:") + || s.starts_with("md5:") + { + let parsed_nixhash = from_nix_str(s)?; + // ensure the algo matches with what has been passed externally, if so. + if let Some(algo) = algo { + if algo != parsed_nixhash.algo { + return Err(Error::ConflictingHashAlgos( + algo.to_string(), + parsed_nixhash.algo.to_string(), + )); + } + } + return Ok(parsed_nixhash); + } + + // In all other cases, we assume a bare digest, so there MUST be an externally-passed algo. + match algo { + // Fail if there isn't. + None => Err(Error::MissingInlineHashAlgo(s.to_string())), + Some(algo) => decode_digest(s, algo), + } +} + +/// Parses a Nix hash string ($algo:$digest) to a NixHash. +pub fn from_nix_str(s: &str) -> Result { + if let Some(rest) = s.strip_prefix("sha1:") { + decode_digest(rest, HashAlgo::Sha1) + } else if let Some(rest) = s.strip_prefix("sha256:") { + decode_digest(rest, HashAlgo::Sha256) + } else if let Some(rest) = s.strip_prefix("sha512:") { + decode_digest(rest, HashAlgo::Sha512) + } else if let Some(rest) = s.strip_prefix("md5:") { + decode_digest(rest, HashAlgo::Md5) + } else { + Err(Error::InvalidAlgo(s.to_string())) + } +} + +/// Parses a Nix SRI string to a NixHash. +/// Contrary to the SRI spec, Nix doesn't support SRI strings with multiple hashes, +/// only supports sha256 and sha512 from the spec, and supports sha1 and md5 +/// additionally. +/// It also accepts SRI strings where the base64 has an with invalid padding. +pub fn from_sri_str(s: &str) -> Result { + // try to find the first occurence of "-" + let idx = s.as_bytes().iter().position(|&e| e == b'-'); + + if idx.is_none() { + return Err(Error::InvalidSRI(s.to_string())); + } + + let idx = idx.unwrap(); + + // try to map the part before that `-` to a supported hash algo: + let algo: HashAlgo = s[..idx].try_into()?; + + // the rest should be the digest (as Nix doesn't support more than one hash in an SRI string). + let encoded_digest = &s[idx + 1..]; + let actual_len = encoded_digest.as_bytes().len(); + + // verify the digest length matches what we'd expect from the hash function, + // and then either try decoding as BASE64 or BASE64_NOPAD. + // This will also reject SRI strings with more than one hash, because the length won't match + if actual_len == BASE64.encode_len(hash_algo_length(&algo)) { + let digest: Vec = BASE64 + .decode(encoded_digest.as_bytes()) + .map_err(Error::InvalidBase64Encoding)?; + Ok(NixHash { digest, algo }) + } else if actual_len == BASE64_NOPAD.encode_len(hash_algo_length(&algo)) { + let digest: Vec = BASE64_NOPAD + .decode(encoded_digest.as_bytes()) + .map_err(Error::InvalidBase64Encoding)?; + Ok(NixHash { digest, algo }) + } else { + // NOTE: As of now, we reject SRI hashes containing additional + // characters (which upstream Nix seems to simply truncate), as + // there's no occurence of this is in nixpkgs. + // It most likely should also be a bug in Nix. + Err(Error::InvalidEncodedDigestLength( + encoded_digest.as_bytes().len(), + algo, + )) + } +} + +/// decode a plain digest depending on the hash algo specified externally. +fn decode_digest(s: &str, algo: HashAlgo) -> Result { + // for the chosen hash algo, calculate the expected (decoded) digest length + // (as bytes) + let expected_digest_len = hash_algo_length(&algo); + + Ok(NixHash { + digest: match s.len() { + n if n == data_encoding::HEXLOWER.encode_len(expected_digest_len) => { + data_encoding::HEXLOWER + .decode(s.as_ref()) + .map_err(Error::InvalidBase16Encoding) + } + n if n == nixbase32::encode_len(expected_digest_len) => { + nixbase32::decode(s.as_ref()).map_err(Error::InvalidBase32Encoding) + } + n if n == BASE64.encode_len(expected_digest_len) => BASE64 + .decode(s.as_ref()) + .map_err(Error::InvalidBase64Encoding), + _ => return Err(Error::InvalidEncodedDigestLength(s.len(), algo)), + }?, + algo, + }) +} + +// return the number of bytes in the digest of the given hash algo. +fn hash_algo_length(hash_algo: &HashAlgo) -> usize { + match hash_algo { + HashAlgo::Sha1 => 20, + HashAlgo::Sha256 => 32, + HashAlgo::Sha512 => 64, + HashAlgo::Md5 => 16, + } +} + +#[cfg(test)] +mod tests { + use crate::{ + nixbase32, + nixhash::{self, HashAlgo, NixHash}, + }; + use test_case::test_case; + const DIGEST_SHA1: &[u8] = &[ + 0x60, 0x16, 0x77, 0x79, 0x97, 0xc3, 0x0a, 0xb0, 0x24, 0x13, 0xcf, 0x50, 0x95, 0x62, 0x2c, + 0xd7, 0x92, 0x42, 0x83, 0xac, + ]; + + const DIGEST_SHA256: &[u8] = &[ + 0xa5, 0xce, 0x9c, 0x15, 0x5e, 0xd0, 0x93, 0x97, 0x61, 0x46, 0x46, 0xc9, 0x71, 0x7f, 0xc7, + 0xcd, 0x94, 0xb1, 0x02, 0x3d, 0x7b, 0x76, 0xb6, 0x18, 0xd4, 0x09, 0xe4, 0xfe, 0xfd, 0x6e, + 0x9d, 0x39, + ]; + + const DIGEST_SHA512: &[u8] = &[ + 0xab, 0x40, 0xd0, 0xbe, 0x35, 0x41, 0xf0, 0x77, 0x4b, 0xba, 0x78, 0x15, 0xd1, 0x3d, 0x10, + 0xb0, 0x32, 0x52, 0xe9, 0x6e, 0x95, 0xf7, 0xdb, 0xb4, 0xee, 0x99, 0xa3, 0xb4, 0x31, 0xc2, + 0x16, 0x62, 0xfd, 0x69, 0x71, 0xa0, 0x20, 0x16, 0x0e, 0x39, 0x84, 0x8a, 0xa5, 0xf3, 0x05, + 0xb9, 0xbe, 0x0f, 0x78, 0x72, 0x7b, 0x2b, 0x07, 0x89, 0xe3, 0x9f, 0x12, 0x4d, 0x21, 0xe9, + 0x2b, 0x8f, 0x39, 0xef, + ]; + const DIGEST_MD5: &[u8] = &[ + 0xc4, 0x87, 0x4a, 0x88, 0x97, 0x44, 0x0b, 0x39, 0x3d, 0x86, 0x2d, 0x8f, 0xd4, 0x59, 0x07, + 0x3f, + ]; + + fn to_base16(digest: &[u8]) -> String { + data_encoding::HEXLOWER.encode(digest) + } + + fn to_nixbase32(digest: &[u8]) -> String { + nixbase32::encode(digest) + } + + fn to_base64(digest: &[u8]) -> String { + data_encoding::BASE64.encode(digest) + } + + fn to_base64_nopad(digest: &[u8]) -> String { + data_encoding::BASE64_NOPAD.encode(digest) + } + + // TODO + fn make_nixhash(algo: &HashAlgo, digest_encoded: String) -> String { + format!("{}:{}", algo, digest_encoded) + } + fn make_sri_string(algo: &HashAlgo, digest_encoded: String) -> String { + format!("{}-{}", algo, digest_encoded) + } + + /// Test parsing a hash string in various formats, and also when/how the out-of-band algo is needed. + #[test_case(DIGEST_SHA1, HashAlgo::Sha1; "sha1")] + #[test_case(DIGEST_SHA256, HashAlgo::Sha256; "sha256")] + #[test_case(DIGEST_SHA512, HashAlgo::Sha512; "sha512")] + #[test_case(DIGEST_MD5, HashAlgo::Md5; "md5")] + fn from_str(digest: &[u8], algo: HashAlgo) { + let expected_hash = NixHash { + digest: digest.to_vec(), + algo: algo.clone(), + }; + // parse SRI + { + // base64 without out-of-band algo + let s = make_sri_string(&algo, to_base64(digest)); + let h = nixhash::from_str(&s, None).expect("must succeed"); + assert_eq!(expected_hash, h); + + // base64 with out-of-band-algo + let s = make_sri_string(&algo, to_base64(digest)); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + + // base64_nopad without out-of-band algo + let s = make_sri_string(&algo, to_base64_nopad(digest)); + let h = nixhash::from_str(&s, None).expect("must succeed"); + assert_eq!(expected_hash, h); + + // base64_nopad with out-of-band-algo + let s = make_sri_string(&algo, to_base64_nopad(digest)); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + } + + // parse plain base16. should succeed with algo out-of-band, but fail without. + { + let s = to_base16(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + } + + // parse plain nixbase32. should succeed with algo out-of-band, but fail without. + { + let s = to_nixbase32(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + } + + // parse plain base64. should succeed with algo out-of-band, but fail without. + { + let s = to_base64(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + } + + // parse Nix hash strings + { + // base16. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(&algo, to_base16(digest)); + assert_eq!( + expected_hash, + nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + // nixbase32. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(&algo, to_nixbase32(digest)); + assert_eq!( + expected_hash, + nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + // base64. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(&algo, to_base64(digest)); + assert_eq!( + expected_hash, + nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + } + } + + /// Test parsing an SRI hash via the [nixhash::from_sri_str] method. + #[test] + fn from_sri_str() { + let nix_hash = nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank/v1unTk=") + .expect("must succeed"); + + assert_eq!(HashAlgo::Sha256, nix_hash.algo); + assert_eq!( + vec![ + 0xa5, 0xce, 0x9c, 0x15, 0x5e, 0xd0, 0x93, 0x97, 0x61, 0x46, 0x46, 0xc9, 0x71, 0x7f, + 0xc7, 0xcd, 0x94, 0xb1, 0x02, 0x3d, 0x7b, 0x76, 0xb6, 0x18, 0xd4, 0x09, 0xe4, 0xfe, + 0xfd, 0x6e, 0x9d, 0x39 + ], + nix_hash.digest + ) + } + + /// Ensure we detect truncated base64 digests, where the digest size + /// doesn't match what's expected from that hash function. + #[test] + fn from_sri_str_truncated() { + nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank") + .expect_err("must fail"); + } + + /// Ensure we fail on SRI hashes that Nix doesn't support. + #[test] + fn from_sri_str_unsupported() { + nixhash::from_sri_str( + "sha384-o4UVSl89mIB0sFUK+3jQbG+C9Zc9dRlV/Xd3KAvXEbhqxu0J5OAdg6b6VHKHwQ7U", + ) + .expect_err("must fail"); + } + + /// Ensure we reject invalid base64 encoding + #[test] + fn from_sri_str_invalid_base64() { + nixhash::from_sri_str("sha256-invalid=base64").expect_err("must fail"); + } + + /// Ensure we reject SRI strings with multiple hashes, as Nix doesn't support that. + #[test] + fn from_sri_str_unsupported_multiple() { + nixhash::from_sri_str("sha256-ngth6szLtC1IJIYyz3lhftzL8SkrJkqPyPve+dGqa1Y= sha512-q0DQvjVB8HdLungV0T0QsDJS6W6V99u07pmjtDHCFmL9aXGgIBYOOYSKpfMFub4PeHJ7KweJ458STSHpK4857w==").expect_err("must fail"); + } + + /// Nix also accepts SRI strings with missing padding, but only in case the + /// string is expressed as SRI, so it still needs to have a `sha256-` prefix. + /// + /// This both seems to work if it is passed with and without specifying the + /// hash algo out-of-band (hash = "sha256-…" or sha256 = "sha256-…") + /// + /// Passing the same broken base64 string, but not as SRI, while passing + /// the hash algo out-of-band does not work. + #[test] + fn sha256_broken_padding() { + let broken_base64 = "fgIr3TyFGDAXP5+qoAaiMKDg/a1MlT6Fv/S/DaA24S8"; + // if padded with a trailing '=' + let expected_digest = vec![ + 0x7e, 0x02, 0x2b, 0xdd, 0x3c, 0x85, 0x18, 0x30, 0x17, 0x3f, 0x9f, 0xaa, 0xa0, 0x06, + 0xa2, 0x30, 0xa0, 0xe0, 0xfd, 0xad, 0x4c, 0x95, 0x3e, 0x85, 0xbf, 0xf4, 0xbf, 0x0d, + 0xa0, 0x36, 0xe1, 0x2f, + ]; + + // passing hash algo out of band should succeed + let nix_hash = nixhash::from_str(&format!("sha256-{}", &broken_base64), Some("sha256")) + .expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest); + + // not passing hash algo out of band should succeed + let nix_hash = + nixhash::from_str(&format!("sha256-{}", &broken_base64), None).expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest); + + // not passing SRI, but hash algo out of band should fail + nixhash::from_str(broken_base64, Some("sha256")).expect_err("must fail"); + } +} -- cgit 1.4.1