about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-01-31T23·21+0100
committerflokli <flokli@flokli.de>2023-02-01T15·29+0000
commit265dd36b9819031e83342b4c2bf061f3c6bae22e (patch)
treec9ce0325eda15d745a18f90d805aca42e094967b
parent366d2c5e92dd36c3bb81397c8031c6e0227de3ce (diff)
feat(tvix/nix-compat): add nixhash module r/5806
This module takes care of parsing various hashes and algorithms.

It will get used to modify derivation output hashes in the next CL.

Change-Id: Idc07c401dbb7510f49883ac02b8379b9a5d930c7
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7990
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
-rw-r--r--tvix/nix-compat/src/lib.rs1
-rw-r--r--tvix/nix-compat/src/nixhash.rs358
2 files changed, 359 insertions, 0 deletions
diff --git a/tvix/nix-compat/src/lib.rs b/tvix/nix-compat/src/lib.rs
index 94fd88549a..40237f94c2 100644
--- a/tvix/nix-compat/src/lib.rs
+++ b/tvix/nix-compat/src/lib.rs
@@ -3,6 +3,7 @@ use sha2::{Digest, Sha256};
 pub mod derivation;
 pub mod nar;
 pub mod nixbase32;
+pub mod nixhash;
 pub mod store_path;
 
 /// Nix placeholders (i.e. values returned by `builtins.placeholder`)
diff --git a/tvix/nix-compat/src/nixhash.rs b/tvix/nix-compat/src/nixhash.rs
new file mode 100644
index 0000000000..ceea886a4c
--- /dev/null
+++ b/tvix/nix-compat/src/nixhash.rs
@@ -0,0 +1,358 @@
+use std::fmt::Display;
+use thiserror::Error;
+
+use crate::nixbase32;
+
+/// Nix allows specifying hashes in various encodings, and magically just
+/// derives the encoding.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub struct NixHash {
+    pub digest: Vec<u8>,
+    pub algo: HashAlgo,
+}
+
+/// This are the hash algorithms supported by cppnix.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum HashAlgo {
+    Md5,
+    Sha1,
+    Sha256,
+    Sha512,
+}
+
+impl Display for HashAlgo {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match &self {
+            HashAlgo::Md5 => write!(f, "md5"),
+            HashAlgo::Sha1 => write!(f, "sha1"),
+            HashAlgo::Sha256 => write!(f, "sha256"),
+            HashAlgo::Sha512 => write!(f, "sha512"),
+        }
+    }
+}
+
+impl TryFrom<&str> for HashAlgo {
+    type Error = Error;
+
+    fn try_from(algo_str: &str) -> Result<Self, Self::Error> {
+        match algo_str {
+            "md5" => Ok(Self::Md5),
+            "sha1" => Ok(Self::Sha1),
+            "sha256" => Ok(Self::Sha256),
+            "sha512" => Ok(Self::Sha512),
+            _ => Err(Error::InvalidAlgo(algo_str.to_string())),
+        }
+    }
+}
+
+// return the number of bytes in the digest of the given hash algo.
+fn hash_algo_length(hash_algo: &HashAlgo) -> usize {
+    match hash_algo {
+        HashAlgo::Sha1 => 20,
+        HashAlgo::Sha256 => 32,
+        HashAlgo::Sha512 => 64,
+        HashAlgo::Md5 => 16,
+    }
+}
+
+/// Errors related to NixHash construction.
+#[derive(Debug, Error)]
+pub enum Error {
+    #[error("invalid hash algo: {0}")]
+    InvalidAlgo(String),
+    #[error("invalid sri string")]
+    InvalidSRI,
+    #[error("invalid encoded digest length '{0}' for algo {0}")]
+    InvalidEncodedDigestLength(usize, HashAlgo),
+    #[error("invalid base16 encoding: {0}")]
+    InvalidBase16Encoding(data_encoding::DecodeError),
+    #[error("invalid base32 encoding: {0}")]
+    InvalidBase32Encoding(nixbase32::Nixbase32DecodeError),
+    #[error("invalid base64 encoding: {0}")]
+    InvalidBase64Encoding(data_encoding::DecodeError),
+    #[error("conflicting hash algo: {0} (hash_algo) vs {1} (SRI)")]
+    ConflictingHashAlgos(String, String),
+}
+
+/// parses a string to a nix hash.
+///
+/// strings can be encoded as:
+/// - base16 (lowerhex),
+/// - nixbase32,
+/// - base64 (StdEncoding)
+/// - sri string
+///
+/// The encoding is derived from the length of the string and the hash type.
+/// The hash type may be omitted if the hash is expressed in SRI.
+/// Even though SRI allows specifying multiple algorithms, Nix does only
+/// support a single one.
+pub fn from_str(s: &str, algo_str: Option<&str>) -> Result<NixHash, Error> {
+    // validate algo_str, construct hash_algo
+    let hash_algo: Option<HashAlgo> = match &algo_str {
+        Some("sha1") => Some(HashAlgo::Sha1),
+        Some("sha256") => Some(HashAlgo::Sha256),
+        Some("sha512") => Some(HashAlgo::Sha512),
+        Some("md5") => Some(HashAlgo::Md5),
+        Some(e) => return Err(Error::InvalidAlgo(e.to_string())),
+        None => None,
+    };
+
+    // in case the hash algo is set, try to detect the encoding
+    if let Some(hash_algo) = hash_algo {
+        // for the chosen hash algo, calculate the expected digest length (as bytes)
+        let expected_digest_len = hash_algo_length(&hash_algo);
+
+        let decoded_digest = match s.len() {
+            n if n == data_encoding::HEXLOWER.encode_len(expected_digest_len) => {
+                data_encoding::HEXLOWER
+                    .decode(s.as_ref())
+                    .map_err(Error::InvalidBase16Encoding)
+            }
+            n if n == nixbase32::encode_len(expected_digest_len) => {
+                nixbase32::decode(s.as_ref()).map_err(Error::InvalidBase32Encoding)
+            }
+            n if n == data_encoding::BASE64.encode_len(expected_digest_len) => {
+                data_encoding::BASE64
+                    .decode(s.as_ref())
+                    .map_err(Error::InvalidBase64Encoding)
+            }
+            _ => {
+                // another length than what we expected from the passed hash algo
+                // try to parse as SRI
+                let nix_hash = from_sri_str(s)?;
+
+                // ensure the algo matches what was specified
+                if hash_algo != nix_hash.algo {
+                    return Err(Error::ConflictingHashAlgos(
+                        hash_algo.to_string(),
+                        nix_hash.algo.to_string(),
+                    ));
+                }
+
+                // return
+                return Ok(nix_hash);
+            }
+        }?;
+
+        Ok(NixHash {
+            digest: decoded_digest,
+            algo: hash_algo,
+        })
+    } else {
+        // try to decode as SRI
+        let nix_hash = from_sri_str(s)?;
+        // and return
+        Ok(nix_hash)
+    }
+}
+
+/// Like [from_str], but only for SRI string.
+/// Contrary to the SRI spec, Nix doesn't support SRI strings with multiple hashes,
+/// only supports sha256 and sha512 from the spec, and supports sha1 and md5
+/// additionally.
+pub fn from_sri_str(s: &str) -> Result<NixHash, Error> {
+    // try to find the first occurence of "-"
+    let idx = s.as_bytes().iter().position(|&e| e == b'-');
+
+    if idx.is_none() {
+        return Err(Error::InvalidSRI);
+    }
+
+    let idx = idx.unwrap();
+
+    // try to map the part before that `-` to a supported hash algo:
+    let algo: HashAlgo = s[..idx].try_into()?;
+
+    // the rest should be the digest (as Nix doesn't support more than one hash in an SRI string).
+    let digest_str = &s[idx + 1..];
+
+    // verify the digest length matches what we'd expect from the hash function.
+    // This will also reject strings with more than one hash, because the length won't match.
+    if digest_str.as_bytes().len() != data_encoding::BASE64.encode_len(hash_algo_length(&algo)) {
+        return Err(Error::InvalidEncodedDigestLength(
+            digest_str.as_bytes().len(),
+            algo,
+        ));
+    }
+
+    // decode the base64 string
+    let digest: Vec<u8> = data_encoding::BASE64
+        .decode(digest_str.as_bytes())
+        .map_err(Error::InvalidBase64Encoding)?;
+    Ok(NixHash { digest, algo })
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::nixhash::{self, HashAlgo};
+
+    const SHA256_SRI: &str = "sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank/v1unTk=";
+    const SHA256_BASE16: &str = "a5ce9c155ed09397614646c9717fc7cd94b1023d7b76b618d409e4fefd6e9d39";
+    const SHA256_NIXBASE32: &str = "0fcxdvyzxr09shcbcxkv7l1b356dqxzp3ja68rhrg4yhbqarrkm5";
+    const SHA256_BASE64: &str = "pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank/v1unTk=";
+
+    const SHA1_SRI: &str = "sha1-YBZ3eZfDCrAkE89QlWIs15JCg6w=";
+    const SHA1_BASE16: &str = "6016777997c30ab02413cf5095622cd7924283ac";
+    const SHA1_NIXBASE32: &str = "mj1l54np5ii9al6g2cjb02n3jxwpf5k0";
+    const SHA1_BASE64: &str = "YBZ3eZfDCrAkE89QlWIs15JCg6w=";
+
+    const MD5_SRI: &str = "md5-xIdKiJdECzk9hi2P1FkHPw==";
+    const MD5_BASE16: &str = "c4874a8897440b393d862d8fd459073f";
+    const MD5_NIXBASE32: &str = "1z0xcx93rdhqykj2s4jy44m1y4";
+    const MD5_BASE64: &str = "xIdKiJdECzk9hi2P1FkHPw==";
+
+    const SHA512_SRI: &str = "sha512-q0DQvjVB8HdLungV0T0QsDJS6W6V99u07pmjtDHCFmL9aXGgIBYOOYSKpfMFub4PeHJ7KweJ458STSHpK4857w==";
+    const SHA512_BASE16: &str = "ab40d0be3541f0774bba7815d13d10b03252e96e95f7dbb4ee99a3b431c21662fd6971a020160e39848aa5f305b9be0f78727b2b0789e39f124d21e92b8f39ef";
+    const SHA512_NIXBASE32: &str = "3pkk3rbx4hls4lzwf4hfavvf9w0zgmr0prsb2l47471c850f5lzsqhnq8qv98wrxssdpxwmdvlm4cmh20yx25bqp95pgw216nzd0h5b";
+    const SHA512_BASE64: &str =
+        "q0DQvjVB8HdLungV0T0QsDJS6W6V99u07pmjtDHCFmL9aXGgIBYOOYSKpfMFub4PeHJ7KweJ458STSHpK4857w==";
+
+    /// Test parsing a hash without a hash algo specified works if the hash is
+    /// in SRI format, and works for all formats if the hash algo is specified.
+    #[test]
+    fn from_str() {
+        let nix_hash_1 = nixhash::from_str(SHA256_SRI, None).expect("must succeed");
+        assert_eq!(HashAlgo::Sha256, nix_hash_1.algo);
+        assert_eq!(
+            vec![
+                0xa5, 0xce, 0x9c, 0x15, 0x5e, 0xd0, 0x93, 0x97, 0x61, 0x46, 0x46, 0xc9, 0x71, 0x7f,
+                0xc7, 0xcd, 0x94, 0xb1, 0x02, 0x3d, 0x7b, 0x76, 0xb6, 0x18, 0xd4, 0x09, 0xe4, 0xfe,
+                0xfd, 0x6e, 0x9d, 0x39
+            ],
+            nix_hash_1.digest
+        );
+
+        // pass the same string, while also specifying the algo
+        let nix_hash_2 = nixhash::from_str(SHA256_SRI, Some("sha256")).expect("must succeed");
+        // this should be equal to nix_hash_1
+        assert_eq!(nix_hash_1, nix_hash_2);
+
+        // parse as base16, while specifying the algo
+        let nix_hash_base16 =
+            nixhash::from_str(SHA256_BASE16, Some("sha256")).expect("must succeed");
+        // this should be equal to nix_hash_1
+        assert_eq!(nix_hash_1, nix_hash_base16);
+
+        // parse as nixbase32, while specifying the algo
+        let nix_hash_nixbase32 =
+            nixhash::from_str(SHA256_NIXBASE32, Some("sha256")).expect("must succeed");
+        // this should be equal to nix_hash_1
+        assert_eq!(nix_hash_1, nix_hash_nixbase32);
+
+        // parse as base64, while specifying the algo
+        let nix_hash_base64 =
+            nixhash::from_str(SHA256_BASE64, Some("sha256")).expect("must succeed");
+        // this should be equal to nix_hash_1
+        assert_eq!(nix_hash_1, nix_hash_base64);
+    }
+
+    #[test]
+    fn from_str_sha1() {
+        let nix_hash_sha1 = nixhash::from_str(SHA1_SRI, None).expect("must succeed");
+        assert_eq!(HashAlgo::Sha1, nix_hash_sha1.algo);
+
+        assert_eq!(
+            nix_hash_sha1,
+            nixhash::from_str(SHA1_BASE16, Some("sha1")).expect("must succeed")
+        );
+        assert_eq!(
+            nix_hash_sha1,
+            nixhash::from_str(SHA1_NIXBASE32, Some("sha1")).expect("must succeed")
+        );
+        assert_eq!(
+            nix_hash_sha1,
+            nixhash::from_str(SHA1_BASE64, Some("sha1")).expect("must succeed")
+        );
+    }
+
+    #[test]
+    fn from_str_md5() {
+        let nix_hash_md5 = nixhash::from_str(MD5_SRI, None).expect("must succeed");
+        assert_eq!(HashAlgo::Md5, nix_hash_md5.algo);
+
+        assert_eq!(
+            nix_hash_md5,
+            nixhash::from_str(MD5_BASE16, Some("md5")).expect("must succeed")
+        );
+        assert_eq!(
+            nix_hash_md5,
+            nixhash::from_str(MD5_NIXBASE32, Some("md5")).expect("must succeed")
+        );
+        assert_eq!(
+            nix_hash_md5,
+            nixhash::from_str(MD5_BASE64, Some("md5")).expect("must succeed")
+        );
+    }
+    #[test]
+    fn from_str_sha512() {
+        let nix_hash_sha512 = nixhash::from_str(SHA512_SRI, None).expect("must succeed");
+        assert_eq!(HashAlgo::Sha512, nix_hash_sha512.algo);
+
+        assert_eq!(
+            nix_hash_sha512,
+            nixhash::from_str(SHA512_BASE16, Some("sha512")).expect("must succeed")
+        );
+        assert_eq!(
+            nix_hash_sha512,
+            nixhash::from_str(SHA512_NIXBASE32, Some("sha512")).expect("must succeed")
+        );
+        assert_eq!(
+            nix_hash_sha512,
+            nixhash::from_str(SHA512_BASE64, Some("sha512")).expect("must succeed")
+        );
+    }
+
+    /// Test a algo needs to be specified if the hash itself is not SRI.
+    #[test]
+    fn from_str_algo_missing() {
+        nixhash::from_str(SHA256_BASE16, None).expect_err("must fail");
+        nixhash::from_str(SHA256_NIXBASE32, None).expect_err("must fail");
+        nixhash::from_str(SHA256_BASE64, None).expect_err("must fail");
+    }
+
+    /// Test parsing an SRI hash via the [nixhash::from_sri_str] method.
+    #[test]
+    fn from_sri_str() {
+        let nix_hash = nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank/v1unTk=")
+            .expect("must succeed");
+
+        assert_eq!(HashAlgo::Sha256, nix_hash.algo);
+        assert_eq!(
+            vec![
+                0xa5, 0xce, 0x9c, 0x15, 0x5e, 0xd0, 0x93, 0x97, 0x61, 0x46, 0x46, 0xc9, 0x71, 0x7f,
+                0xc7, 0xcd, 0x94, 0xb1, 0x02, 0x3d, 0x7b, 0x76, 0xb6, 0x18, 0xd4, 0x09, 0xe4, 0xfe,
+                0xfd, 0x6e, 0x9d, 0x39
+            ],
+            nix_hash.digest
+        )
+    }
+
+    /// Ensure we detect truncated base64 digests, where the digest size
+    /// doesn't match what's expected from that hash function.
+    #[test]
+    fn from_sri_str_truncated() {
+        nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank")
+            .expect_err("must fail");
+    }
+
+    /// Ensure we fail on SRI hashes that Nix doesn't support.
+    #[test]
+    fn from_sri_str_unsupported() {
+        nixhash::from_sri_str(
+            "sha384-o4UVSl89mIB0sFUK+3jQbG+C9Zc9dRlV/Xd3KAvXEbhqxu0J5OAdg6b6VHKHwQ7U",
+        )
+        .expect_err("must fail");
+    }
+
+    /// Ensure we reject invalid base64 encoding
+    #[test]
+    fn from_sri_str_invalid_base64() {
+        nixhash::from_sri_str("sha256-invalid=base64").expect_err("must fail");
+    }
+
+    /// Ensure we reject SRI strings with multiple hashes, as Nix doesn't support that.
+    #[test]
+    fn from_stri_str_unsupported_multiple() {
+        nixhash::from_sri_str("sha256-ngth6szLtC1IJIYyz3lhftzL8SkrJkqPyPve+dGqa1Y= sha512-q0DQvjVB8HdLungV0T0QsDJS6W6V99u07pmjtDHCFmL9aXGgIBYOOYSKpfMFub4PeHJ7KweJ458STSHpK4857w==").expect_err("must fail");
+    }
+}