about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-03-14T16·16+0100
committerclbot <clbot@tvl.fyi>2023-03-15T11·50+0000
commitb55d1f97ce8762711ff44f9b5695452cc8083c44 (patch)
treed02d5a3f79594da503ba708ae283257e3b24dd51
parente82385dbe5a3ba397bb6ebcb3f3ddf28bc0e3802 (diff)
refactor(tvix/nix-compat): -derivation::Hash, +NixHash r/6009
This stops using our own custom Hash structure, which was mostly only
used because we had to parse the JSON representation somehow.

Since cl/8217, there's a `NixHash` struct, which is better suited to
hold this data. Converting the format requires a bit of serde labor
though, but that only really matters when interacting with JSON
representations (which we mostly don't).

Change-Id: Idc5ee511e36e6726c71f66face8300a441b0bf4c
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8304
Autosubmit: flokli <flokli@flokli.de>
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
-rw-r--r--tvix/Cargo.nix4
-rw-r--r--tvix/cli/src/derivation.rs21
-rw-r--r--tvix/nix-compat/Cargo.toml1
-rw-r--r--tvix/nix-compat/src/derivation/mod.rs67
-rw-r--r--tvix/nix-compat/src/derivation/output.rs38
-rw-r--r--tvix/nix-compat/src/derivation/tests/mod.rs19
-rw-r--r--tvix/nix-compat/src/derivation/write.rs26
-rw-r--r--tvix/nix-compat/src/nixhash.rs112
8 files changed, 201 insertions, 87 deletions
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index 7dcd3cef53..25869404fe 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -3611,6 +3611,10 @@ rec {
             features = [ "derive" ];
           }
           {
+            name = "serde_json";
+            packageId = "serde_json";
+          }
+          {
             name = "sha2";
             packageId = "sha2 0.10.6";
           }
diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs
index 61c4489954..1508e3e6d9 100644
--- a/tvix/cli/src/derivation.rs
+++ b/tvix/cli/src/derivation.rs
@@ -1,5 +1,5 @@
 //! Implements `builtins.derivation`, the core of what makes Nix build packages.
-use nix_compat::derivation::{Derivation, Hash};
+use nix_compat::derivation::Derivation;
 use nix_compat::{hash_placeholder, nixhash};
 use std::cell::RefCell;
 use std::collections::{btree_map, BTreeSet};
@@ -126,19 +126,18 @@ fn populate_output_configuration(
 
                 let output_hash = nixhash::from_str(&hash, a).map_err(Error::InvalidOutputHash)?;
 
-                // construct the algo string. Depending on hashMode, we prepend a `r:`.
-                let algo = match hash_mode.as_deref() {
-                    None | Some("flat") => format!("{}", &output_hash.algo),
-                    Some("recursive") => format!("r:{}", &output_hash.algo),
+                // construct the NixHashWithMode.
+                out.hash_with_mode = match hash_mode.as_deref() {
+                    None | Some("flat") => Some(nixhash::NixHashWithMode::Flat(
+                        nixhash::NixHash::new(output_hash.algo, output_hash.digest),
+                    )),
+                    Some("recursive") => Some(nixhash::NixHashWithMode::Recursive(
+                        nixhash::NixHash::new(output_hash.algo, output_hash.digest),
+                    )),
                     Some(other) => {
                         return Err(Error::InvalidOutputHashMode(other.to_string()).into())
                     }
-                };
-
-                out.hash = Some(Hash {
-                    algo,
-                    digest: data_encoding::HEXLOWER.encode(&output_hash.digest),
-                });
+                }
             }
         }
     }
diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml
index 7758ea995c..49ddbf4728 100644
--- a/tvix/nix-compat/Cargo.toml
+++ b/tvix/nix-compat/Cargo.toml
@@ -10,6 +10,7 @@ anyhow = "1.0.68"
 data-encoding = "2.3.3"
 glob = "0.3.0"
 serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
 sha2 = "0.10.6"
 thiserror = "1.0.38"
 
diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs
index 2b549536b2..06b7ba02f4 100644
--- a/tvix/nix-compat/src/derivation/mod.rs
+++ b/tvix/nix-compat/src/derivation/mod.rs
@@ -1,5 +1,5 @@
 use crate::{
-    nixhash::NixHash,
+    nixhash::HashAlgo,
     store_path::{self, StorePath},
 };
 use serde::{Deserialize, Serialize};
@@ -17,8 +17,9 @@ mod write;
 mod tests;
 
 // Public API of the crate.
+pub use crate::nixhash::{NixHash, NixHashWithMode};
 pub use errors::{DerivationError, OutputError};
-pub use output::{Hash, Output};
+pub use output::Output;
 pub use utils::path_with_references;
 
 #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
@@ -75,23 +76,22 @@ impl Derivation {
         buffer
     }
 
-    /// Returns the fixed output path and its hash
-    // (if the Derivation is fixed output),
-    /// or None if there is no fixed output.
+    /// Returns the fixed output path and its [NixHashWithMode]
+    /// (if the Derivation is fixed output), or None if there is no fixed output.
     /// This takes some shortcuts in case more than one output exists, as this
     /// can't be a valid fixed-output Derivation.
-    pub fn get_fixed_output(&self) -> Option<(&String, &Hash)> {
+    pub fn get_fixed_output(&self) -> Option<(&String, &NixHashWithMode)> {
         if self.outputs.len() != 1 {
             return None;
         }
 
         if let Some(out_output) = self.outputs.get("out") {
-            if let Some(out_output_hash) = &out_output.hash {
+            if let Some(out_output_hash) = &out_output.hash_with_mode {
                 return Some((&out_output.path, out_output_hash));
             }
             // There has to be a hash, otherwise it would not be FOD
         }
-        return None;
+        None
     }
 
     /// Returns the drv path of a Derivation struct.
@@ -160,8 +160,6 @@ impl Derivation {
     ///    `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of
     ///    the A-Term.
     ///
-    /// TODO: what's the representation of ${digest}?
-    ///
     /// If the derivation is not a fixed derivation, it's up to the caller of
     /// this function to provide a lookup function to lookup these calculation
     /// results of parent derivations at `fn_get_hash_derivation_modulo` (by
@@ -175,8 +173,9 @@ impl Derivation {
             // Fixed-output derivations return a fixed hash
             Some((fixed_output_path, fixed_output_hash)) => {
                 hasher.update(format!(
-                    "fixed:out:{}:{}:{}",
-                    &fixed_output_hash.algo, &fixed_output_hash.digest, fixed_output_path,
+                    "fixed:out:{}:{}",
+                    fixed_output_hash.to_nix_hash_string(),
+                    fixed_output_path
                 ));
                 hasher.finalize()
             }
@@ -196,7 +195,7 @@ impl Derivation {
                 for (drv_path, output_names) in &self.input_derivations {
                     replaced_input_derivations.insert(
                         data_encoding::HEXLOWER
-                            .encode(&fn_get_derivation_or_fod_hash(&drv_path).digest),
+                            .encode(&fn_get_derivation_or_fod_hash(drv_path).digest),
                         output_names.clone(),
                     );
                 }
@@ -255,7 +254,7 @@ impl Derivation {
                         output_path_name.push_str(output_name);
                     }
 
-                    let s = &format!(
+                    let fp = &format!(
                         "output:{}:{}:{}:{}",
                         output_name,
                         derivation_or_fod_hash.to_nix_hash_string(),
@@ -264,7 +263,7 @@ impl Derivation {
                     );
 
                     let abs_store_path =
-                        utils::build_store_path(false, s, &output_path_name)?.to_absolute_path();
+                        utils::build_store_path(false, fp, &output_path_name)?.to_absolute_path();
 
                     output.path = abs_store_path.clone();
                     self.environment
@@ -277,33 +276,41 @@ impl Derivation {
                 // footgun prevention mechanism.
                 assert!(fixed_output_path.is_empty());
 
-                let s = {
-                    let mut s = String::new();
+                let fp = {
+                    let mut fp = String::new();
                     // Fixed-output derivation.
-                    // There's two different hashing strategies in place, depending on the value of hash.algo.
+                    // There's two different hashing strategies in place,
+                    // depending on whether the hash is recursive AND sha256 or anything else.
                     // This code is _weird_ but it is what Nix is doing. See:
                     // https://github.com/NixOS/nix/blob/1385b2007804c8a0370f2a6555045a00e34b07c7/src/libstore/store-api.cc#L178-L196
-                    if fixed_output_hash.algo == "r:sha256" {
-                        s.push_str(&format!(
-                            "source:sha256:{}",
-                            fixed_output_hash.digest, // lowerhex
-                        ));
+                    if let NixHashWithMode::Recursive(recursive_hash) = fixed_output_hash {
+                        if recursive_hash.algo == HashAlgo::Sha256 {
+                            fp.push_str(&format!("source:{}", recursive_hash.to_nix_hash_string()));
+                        } else {
+                            // This is similar to the FOD case, with an empty fixed_output_path.
+                            fp.push_str(&format!(
+                                "output:out:{}",
+                                derivation_or_fod_hash.to_nix_hash_string(),
+                            ));
+                        }
                     } else {
-                        s.push_str("output:out:");
-                        // This is drv_replacement for FOD, with an empty fixed_output_path.
-                        s.push_str(&derivation_or_fod_hash.to_nix_hash_string());
+                        // This is similar to the FOD case, with an empty fixed_output_path.
+                        fp.push_str(&format!(
+                            "output:out:{}",
+                            derivation_or_fod_hash.to_nix_hash_string(),
+                        ));
                     }
-                    s.push_str(&format!(":{}:{}", store_path::STORE_DIR, name));
-                    s
+                    fp.push_str(&format!(":{}:{}", store_path::STORE_DIR, name));
+                    fp
                 };
 
-                let abs_store_path = utils::build_store_path(false, &s, name)?.to_absolute_path();
+                let abs_store_path = utils::build_store_path(false, &fp, name)?.to_absolute_path();
 
                 self.outputs.insert(
                     "out".to_string(),
                     Output {
                         path: abs_store_path.clone(),
-                        hash: Some(fixed_output_hash.clone()),
+                        hash_with_mode: Some(fixed_output_hash.clone()),
                     },
                 );
                 self.environment.insert("out".to_string(), abs_store_path);
diff --git a/tvix/nix-compat/src/derivation/output.rs b/tvix/nix-compat/src/derivation/output.rs
index 39c0dbde5a..4bfc7bf801 100644
--- a/tvix/nix-compat/src/derivation/output.rs
+++ b/tvix/nix-compat/src/derivation/output.rs
@@ -1,5 +1,6 @@
 use crate::derivation::OutputError;
-use crate::{nixbase32, store_path::StorePath};
+use crate::nixhash::{HashAlgo, NixHashWithMode};
+use crate::store_path::StorePath;
 use serde::{Deserialize, Serialize};
 
 #[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
@@ -7,42 +8,23 @@ pub struct Output {
     pub path: String,
 
     #[serde(flatten)]
-    pub hash: Option<Hash>,
-}
-
-#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
-pub struct Hash {
-    #[serde(rename = "hash")]
-    pub digest: String,
-    #[serde(rename = "hashAlgo")]
-    pub algo: String,
+    pub hash_with_mode: Option<NixHashWithMode>,
 }
 
 impl Output {
     pub fn is_fixed(&self) -> bool {
-        self.hash.is_some()
+        self.hash_with_mode.is_some()
     }
 
     pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> {
-        if let Some(hash) = &self.hash {
-            // try to decode digest
-            let result = nixbase32::decode(hash.digest.as_bytes());
-            match result {
-                Err(e) => return Err(OutputError::InvalidHashEncoding(hash.digest.clone(), e)),
-                Ok(digest) => {
-                    if hash.algo != "sha1" && hash.algo != "sha256" {
-                        return Err(OutputError::InvalidHashAlgo(hash.algo.to_string()));
-                    }
-                    if (hash.algo == "sha1" && digest.len() != 20)
-                        || (hash.algo == "sha256" && digest.len() != 32)
-                    {
-                        return Err(OutputError::InvalidDigestSizeForAlgo(
-                            digest.len(),
-                            hash.algo.to_string(),
-                        ));
+        if let Some(hash) = &self.hash_with_mode {
+            match hash {
+                NixHashWithMode::Flat(h) | NixHashWithMode::Recursive(h) => {
+                    if h.algo != HashAlgo::Sha1 || h.algo != HashAlgo::Sha256 {
+                        return Err(OutputError::InvalidHashAlgo(h.algo.to_string()));
                     }
                 }
-            };
+            }
         }
         if validate_output_paths {
             if let Err(e) = StorePath::from_absolute_path(&self.path) {
diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs
index 7f94f5e690..d7b63a45ad 100644
--- a/tvix/nix-compat/src/derivation/tests/mod.rs
+++ b/tvix/nix-compat/src/derivation/tests/mod.rs
@@ -1,5 +1,6 @@
-use crate::derivation::output::{Hash, Output};
+use crate::derivation::output::Output;
 use crate::derivation::Derivation;
+use crate::nixhash::NixHash;
 use crate::store_path::StorePath;
 use std::collections::BTreeSet;
 use std::fs::File;
@@ -218,11 +219,15 @@ fn output_path_construction() {
         "out".to_string(),
         Output {
             path: "".to_string(), // will be calculated
-            hash: Some(Hash {
-                digest: "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"
-                    .to_string(),
-                algo: "r:sha256".to_string(),
-            }),
+            hash_with_mode: Some(crate::nixhash::NixHashWithMode::Recursive(NixHash {
+                digest: data_encoding::HEXLOWER
+                    .decode(
+                        "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"
+                            .as_bytes(),
+                    )
+                    .unwrap(),
+                algo: crate::nixhash::HashAlgo::Sha256,
+            })),
         },
     );
 
@@ -271,7 +276,7 @@ fn output_path_construction() {
         "out".to_string(),
         Output {
             path: "".to_string(), // will be calculated
-            hash: None,
+            hash_with_mode: None,
         },
     );
 
diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs
index 9423ef2e6a..52166294e0 100644
--- a/tvix/nix-compat/src/derivation/write.rs
+++ b/tvix/nix-compat/src/derivation/write.rs
@@ -58,16 +58,22 @@ pub fn write_outputs(
 
         let mut elements: Vec<&str> = vec![output_name, &output.path];
 
-        match &output.hash {
-            Some(hash) => {
-                elements.push(&hash.algo);
-                elements.push(&hash.digest);
-            }
-            None => {
-                elements.push("");
-                elements.push("");
-            }
-        }
+        let (e2, e3) = match &output.hash_with_mode {
+            Some(hash) => match hash {
+                crate::nixhash::NixHashWithMode::Flat(h) => (
+                    h.algo.to_string(),
+                    data_encoding::HEXLOWER.encode(&h.digest),
+                ),
+                crate::nixhash::NixHashWithMode::Recursive(h) => (
+                    format!("r:{}", h.algo),
+                    data_encoding::HEXLOWER.encode(&h.digest),
+                ),
+            },
+            None => ("".to_string(), "".to_string()),
+        };
+
+        elements.push(&e2);
+        elements.push(&e3);
 
         write_array_elements(
             writer,
diff --git a/tvix/nix-compat/src/nixhash.rs b/tvix/nix-compat/src/nixhash.rs
index 4cb076ed16..0da90fe4cc 100644
--- a/tvix/nix-compat/src/nixhash.rs
+++ b/tvix/nix-compat/src/nixhash.rs
@@ -1,14 +1,124 @@
 use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER};
+use serde::ser::SerializeMap;
+use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use std::fmt::Display;
 use thiserror::Error;
 
 use crate::nixbase32;
 
+/// A Nix Hash can either be flat or recursive.
+#[derive(Clone, Debug, Eq, PartialEq)]
+pub enum NixHashWithMode {
+    Flat(NixHash),
+    Recursive(NixHash),
+}
+
+impl NixHashWithMode {
+    /// Formats a [NixHashWithMode] in the Nix default hash format,
+    /// which is the algo, followed by a colon, then the lower hex encoded digest.
+    /// In case the hash itself is recursive, a `r:` is added as prefix
+    pub fn to_nix_hash_string(&self) -> String {
+        match self {
+            NixHashWithMode::Flat(h) => h.to_nix_hash_string(),
+            NixHashWithMode::Recursive(h) => format!("r:{}", h.to_nix_hash_string()),
+        }
+    }
+}
+
+impl Serialize for NixHashWithMode {
+    /// map a NixHashWithMode into the serde data model.
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        let mut map = serializer.serialize_map(Some(2))?;
+        match self {
+            NixHashWithMode::Flat(h) => {
+                map.serialize_entry("hash", &nixbase32::encode(&h.digest))?;
+                map.serialize_entry("hashAlgo", &h.algo.to_string())?;
+            }
+            NixHashWithMode::Recursive(h) => {
+                map.serialize_entry("hash", &nixbase32::encode(&h.digest))?;
+                map.serialize_entry("hashAlgo", &format!("r:{}", &h.algo.to_string()))?;
+            }
+        };
+        map.end()
+    }
+}
+
+impl<'de> Deserialize<'de> for NixHashWithMode {
+    /// map the serde data model into a NixHashWithMode.
+    ///
+    /// The serde data model has a `hash` field (containing a digest in nixbase32),
+    /// and a `hashAlgo` field, containing the stringified hash algo.
+    /// In case the hash is recursive, hashAlgo also has a `r:` prefix.
+    ///
+    /// This is to match how `nix show-derivation` command shows them in JSON
+    /// representation.
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        // TODO: don't use serde_json here?
+        // TODO: serde seems to simply set `hash_with_mode` to None if hash
+        // and hashAlgo fail, but that should be a proper deserialization error
+        // that should be propagated to the user!
+
+        let json = serde_json::Value::deserialize(deserializer)?;
+        match json.as_object() {
+            None => Err(serde::de::Error::custom("couldn't parse as map"))?,
+            Some(map) => {
+                let digest: Vec<u8> = {
+                    if let Some(v) = map.get("hash") {
+                        if let Some(s) = v.as_str() {
+                            data_encoding::HEXLOWER
+                                .decode(s.as_bytes())
+                                .map_err(|e| serde::de::Error::custom(e.to_string()))?
+                        } else {
+                            return Err(serde::de::Error::custom(
+                                "couldn't parse 'hash' as string",
+                            ));
+                        }
+                    } else {
+                        return Err(serde::de::Error::custom("couldn't extract 'hash' key"));
+                    }
+                };
+
+                if let Some(v) = map.get("hashAlgo") {
+                    if let Some(s) = v.as_str() {
+                        match s.strip_prefix("r:") {
+                            Some(rest) => Ok(NixHashWithMode::Recursive(NixHash::new(
+                                HashAlgo::try_from(rest).map_err(|e| {
+                                    serde::de::Error::custom(format!("unable to parse algo: {}", e))
+                                })?,
+                                digest,
+                            ))),
+                            None => Ok(NixHashWithMode::Flat(NixHash::new(
+                                HashAlgo::try_from(s).map_err(|e| {
+                                    serde::de::Error::custom(format!("unable to parse algo: {}", e))
+                                })?,
+                                digest,
+                            ))),
+                        }
+                    } else {
+                        Err(serde::de::Error::custom(
+                            "couldn't parse 'hashAlgo' as string",
+                        ))
+                    }
+                } else {
+                    Err(serde::de::Error::custom("couldn't extract 'hashAlgo' key"))
+                }
+            }
+        }
+    }
+}
+
 /// Nix allows specifying hashes in various encodings, and magically just
 /// derives the encoding.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub struct NixHash {
     pub digest: Vec<u8>,
+
     pub algo: HashAlgo,
 }
 
@@ -26,7 +136,7 @@ impl NixHash {
 }
 
 /// This are the hash algorithms supported by cppnix.
-#[derive(Clone, Debug, Eq, PartialEq)]
+#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
 pub enum HashAlgo {
     Md5,
     Sha1,