about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2023-03-13T22·52+0100
committerflokli <flokli@flokli.de>2023-03-14T18·26+0000
commit5f260edf7fba9a6ad1e0ee16779df11d92686828 (patch)
tree587b94d271c2d748164bde1a2bc5c5a093557d70
parent32999cb6f60ec89099f1f5295038cfeca2fb106a (diff)
refactor(tvix/nix-compat): replace calculate_drv_replacement_str r/5995
Call this function derivation_or_fod_hash, and return a NixHash.

This is more in line with how cppnix calls this, and allows using
to_nix_hash_string() in some places.

Change-Id: Iebf5355f08ed5c9a044844739350f829f874f0ce
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8293
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
-rw-r--r--tvix/cli/src/derivation.rs22
-rw-r--r--tvix/cli/src/known_paths.rs28
-rw-r--r--tvix/nix-compat/src/derivation/mod.rs74
-rw-r--r--tvix/nix-compat/src/derivation/tests/mod.rs45
4 files changed, 98 insertions, 71 deletions
diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs
index 15c4c6f858f7..61c4489954f1 100644
--- a/tvix/cli/src/derivation.rs
+++ b/tvix/cli/src/derivation.rs
@@ -339,21 +339,27 @@ mod derivation_builtins {
         // eval data structures.
         drv.validate(false).map_err(Error::InvalidDerivation)?;
 
-        let tmp_replacement_str =
-            drv.calculate_drv_replacement_str(|drv| known_paths.get_replacement_string(drv));
+        // Calculate the derivation_or_fod_hash for the current derivation.
+        // This one is still intermediate (so not added to known_paths)
+        let derivation_or_fod_hash_tmp =
+            drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv));
 
-        drv.calculate_output_paths(&name, &tmp_replacement_str)
+        // Mutate the Derivation struct and set output paths
+        drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp)
             .map_err(Error::InvalidDerivation)?;
 
-        let actual_replacement_str =
-            drv.calculate_drv_replacement_str(|drv| known_paths.get_replacement_string(drv));
-
         let derivation_path = drv
             .calculate_derivation_path(&name)
             .map_err(Error::InvalidDerivation)?;
 
-        known_paths
-            .add_replacement_string(derivation_path.to_absolute_path(), &actual_replacement_str);
+        // recompute the hash derivation modulo and add to known_paths
+        let derivation_or_fod_hash_final =
+            drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv));
+
+        known_paths.add_hash_derivation_modulo(
+            derivation_path.to_absolute_path(),
+            &derivation_or_fod_hash_final,
+        );
 
         // mark all the new paths as known
         let output_names: Vec<String> = drv.outputs.keys().map(Clone::clone).collect();
diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs
index f16fad902654..07373ef0da7a 100644
--- a/tvix/cli/src/known_paths.rs
+++ b/tvix/cli/src/known_paths.rs
@@ -12,6 +12,7 @@
 //! information.
 
 use crate::refscan::{ReferenceScanner, STORE_PATH_LEN};
+use nix_compat::nixhash::NixHash;
 use std::{
     collections::{hash_map, BTreeSet, HashMap},
     ops::Index,
@@ -67,11 +68,10 @@ pub struct KnownPaths {
     /// path used for reference scanning.
     paths: HashMap<PathName, KnownPath>,
 
-    /// All known replacement strings for derivations.
+    /// All known derivation or FOD hashes.
     ///
-    /// Keys are derivation paths, values are the opaque replacement
-    /// strings.
-    replacements: HashMap<String, String>,
+    /// Keys are derivation paths, values is the NixHash.
+    derivation_or_fod_hashes: HashMap<String, NixHash>,
 }
 
 impl Index<&PathName> for KnownPaths {
@@ -156,25 +156,29 @@ impl KnownPaths {
         ReferenceScanner::new(candidates)
     }
 
-    /// Fetch the opaque "replacement string" for a given derivation path.
-    pub fn get_replacement_string(&self, drv: &str) -> String {
+    /// Fetch the opaque "hash derivation modulo" for a given derivation path.
+    pub fn get_hash_derivation_modulo(&self, drv_path: &str) -> NixHash {
         // TODO: we rely on an invariant that things *should* have
         // been calculated if we get this far.
-        self.replacements[drv].clone()
+        self.derivation_or_fod_hashes[drv_path].clone()
     }
 
-    pub fn add_replacement_string<D: ToString>(&mut self, drv: D, replacement_str: &str) {
+    pub fn add_hash_derivation_modulo<D: ToString>(
+        &mut self,
+        drv: D,
+        hash_derivation_modulo: &NixHash,
+    ) {
         #[allow(unused_variables)] // assertions on this only compiled in debug builds
         let old = self
-            .replacements
-            .insert(drv.to_string(), replacement_str.to_owned());
+            .derivation_or_fod_hashes
+            .insert(drv.to_string(), hash_derivation_modulo.to_owned());
 
         #[cfg(debug_assertions)]
         {
             if let Some(old) = old {
                 debug_assert!(
-                    old == replacement_str,
-                    "replacement string for a given derivation should always match"
+                    old == *hash_derivation_modulo,
+                    "hash derivation modulo for a given derivation should always be calculated the same"
                 );
             }
         }
diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs
index d348b0d716b9..e0e18823c5d9 100644
--- a/tvix/nix-compat/src/derivation/mod.rs
+++ b/tvix/nix-compat/src/derivation/mod.rs
@@ -150,21 +150,31 @@ impl Derivation {
         utils::build_store_path(true, &s, name)
     }
 
-    /// Calculate the drv replacement string for a given derivation.
+    /// Calculates the hash of a derivation modulo fixed-output subderivations.
     ///
-    /// This is either called on a struct without output paths populated,
-    /// to provide the `drv_replacement_str` value for the `calculate_output_paths`
-    /// function call, or called on a struct with output paths populated, to
-    /// calculate / cache lookups for calls to fn_get_drv_replacement.
+    /// This is called `hashDerivationModulo` in nixcpp.
     ///
-    /// `fn_get_drv_replacement` is used to look up the drv replacement strings
-    /// for input_derivations the Derivation refers to.
-    pub fn calculate_drv_replacement_str<F>(&self, fn_get_drv_replacement: F) -> String
+    /// It returns a [NixHash], created by calculating the sha256 digest of
+    /// the derivation ATerm representation, except that:
+    ///  -  any input derivation paths have beed replaced "by the result of a
+    ///     recursive call to this function" and that
+    ///  - for fixed-output derivations the special
+    ///    `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of
+    ///    the A-Term.
+    ///
+    /// TODO: what's the representation of ${digest}?
+    ///
+    /// If the derivation is not a fixed derivation, it's up to the caller of
+    /// this function to provide a lookup function to lookup these calculation
+    /// results of parent derivations at `fn_get_hash_derivation_modulo` (by
+    /// drv path).
+    pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> NixHash
     where
-        F: Fn(&str) -> String,
+        F: Fn(&str) -> NixHash,
     {
         let mut hasher = Sha256::new();
         let digest = match self.get_fixed_output() {
+            // Fixed-output derivations return a fixed hash
             Some((fixed_output_path, fixed_output_hash)) => {
                 hasher.update(format!(
                     "fixed:out:{}:{}:{}",
@@ -172,15 +182,24 @@ impl Derivation {
                 ));
                 hasher.finalize()
             }
+            // Non-Fixed-output derivations return a hash of the ATerm notation, but with all
+            // input_derivation paths replaced by a recursive call to this function.
+            // We use fn_get_derivation_or_fod_hash here, so callers can precompute this.
             None => {
+                // This is a new map from derivation_or_fod_hash.digest (as lowerhex)
+                // to list of output names
                 let mut replaced_input_derivations: BTreeMap<String, BTreeSet<String>> =
                     BTreeMap::new();
 
-                // For each input_derivation, look up the replacement.
-                for (drv_path, input_derivation) in &self.input_derivations {
+                // For each input_derivation, look up the
+                // derivation_or_fod_hash, and replace the derivation path with it's HEXLOWER
+                // digest.
+                // This is not the [NixHash::to_nix_hash_string], but without the sha256: prefix).
+                for (drv_path, output_names) in &self.input_derivations {
                     replaced_input_derivations.insert(
-                        fn_get_drv_replacement(drv_path).to_string(),
-                        input_derivation.clone(),
+                        data_encoding::HEXLOWER
+                            .encode(&fn_get_derivation_or_fod_hash(&drv_path).digest),
+                        output_names.clone(),
                     );
                 }
 
@@ -196,8 +215,7 @@ impl Derivation {
                 hasher.finalize()
             }
         };
-
-        format!("{:x}", digest)
+        NixHash::new(crate::nixhash::HashAlgo::Sha256, digest.to_vec())
     }
 
     /// This calculates all output paths of a Derivation and updates the struct.
@@ -205,17 +223,13 @@ impl Derivation {
     /// This means, self.outputs[$outputName].path needs to be an empty string,
     /// and self.environment[$outputName] needs to be an empty string.
     ///
-    /// Output path calculation requires knowledge of "drv replacement
-    /// strings", and in case of non-fixed-output derivations, also knowledge
-    /// of "drv replacement" strings (recursively) of all input derivations.
-    ///
-    /// We solve this by asking the caller of this function to provide
-    /// the drv replacement string of the current derivation itself,
-    /// which is ran on the struct without output paths.
+    /// Output path calculation requires knowledge of the
+    /// derivation_or_fod_hash [NixHash], which (in case of non-fixed-output
+    /// derivations) also requires knowledge of other hash_derivation_modulo
+    /// [NixHash]es.
     ///
-    /// This sound terribly ugly, but won't be too much of a concern later on, as
-    /// naming fixed-output paths once uploaded will be a tvix-store concern,
-    /// so there's no need to calculate them here anymore.
+    /// We solve this by asking the caller of this function to provide the
+    /// hash_derivation_modulo of the current Derivation.
     ///
     /// On completion, self.environment[$outputName] and
     /// self.outputs[$outputName].path are set to the calculated output path for all
@@ -223,7 +237,7 @@ impl Derivation {
     pub fn calculate_output_paths(
         &mut self,
         name: &str,
-        drv_replacement_str: &str,
+        derivation_or_fod_hash: &NixHash,
     ) -> Result<(), DerivationError> {
         // Check if the Derivation is fixed output, because they cause
         // different fingerprints to be hashed.
@@ -244,9 +258,9 @@ impl Derivation {
                     }
 
                     let s = &format!(
-                        "output:{}:sha256:{}:{}:{}",
+                        "output:{}:{}:{}:{}",
                         output_name,
-                        drv_replacement_str,
+                        derivation_or_fod_hash.to_nix_hash_string(),
                         store_path::STORE_DIR,
                         output_path_name,
                     );
@@ -277,9 +291,9 @@ impl Derivation {
                             fixed_output_hash.digest, // nixbase32
                         ));
                     } else {
-                        s.push_str("output:out:sha256:");
+                        s.push_str("output:out:");
                         // This is drv_replacement for FOD, with an empty fixed_output_path.
-                        s.push_str(drv_replacement_str);
+                        s.push_str(&derivation_or_fod_hash.to_nix_hash_string());
                     }
                     s.push_str(&format!(":{}:{}", store_path::STORE_DIR, name));
                     s
diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs
index 34f6cae678d4..1a4f8ce23286 100644
--- a/tvix/nix-compat/src/derivation/tests/mod.rs
+++ b/tvix/nix-compat/src/derivation/tests/mod.rs
@@ -96,16 +96,16 @@ fn derivation_with_trimmed_outputs(derivation: &Derivation) -> Derivation {
     }
 }
 
-#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")]
-#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")]
-fn replacement_drv_path(drv_path: &str, expected_replacement_str: &str) {
+#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")]
+#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")]
+fn hash_derivation_modulo(drv_path: &str, expected_nix_hash_string: &str) {
     // read in the fixture
     let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path));
     let drv: Derivation = serde_json::from_str(&data).expect("must deserialize");
 
-    let drv_replacement_str = drv.calculate_drv_replacement_str(|_| panic!("must not be called"));
+    let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called"));
 
-    assert_eq!(expected_replacement_str, drv_replacement_str);
+    assert_eq!(expected_nix_hash_string, actual.to_nix_hash_string());
 }
 
 #[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")]
@@ -122,16 +122,16 @@ fn output_paths(name: &str, drv_path: &str) {
 
     let mut derivation = derivation_with_trimmed_outputs(&expected_derivation);
 
-    // calculate the drv replacement string.
+    // calculate the derivation_or_fod_hash of derivation
     // We don't expect the lookup function to be called for most derivations.
-    let replacement_str = derivation.calculate_drv_replacement_str(|drv_name| {
+    let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|parent_drv_path| {
         // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv
         // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv
         if name == "foo"
             && ((drv_path == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"
-                && drv_name == "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv")
+                && parent_drv_path == "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv")
                 || (drv_path == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"
-                    && drv_name == "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"))
+                    && parent_drv_path == "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"))
         {
             // do the lookup, by reading in the fixture of the requested
             // drv_name, and calculating its drv replacement (on the non-stripped version)
@@ -140,22 +140,25 @@ fn output_paths(name: &str, drv_path: &str) {
             let data = read_file(&format!(
                 "{}/{}.json",
                 RESOURCES_PATHS,
-                Path::new(drv_name).file_name().unwrap().to_string_lossy()
+                Path::new(parent_drv_path)
+                    .file_name()
+                    .unwrap()
+                    .to_string_lossy()
             ));
 
             let drv: Derivation = serde_json::from_str(&data).expect("must deserialize");
 
-            // calculate replacement string. These don't trigger any subsequent requests, as they're both FOD.
-            drv.calculate_drv_replacement_str(|_| panic!("must not lookup"))
+            // calculate derivation_or_fod_hash for each parent.
+            // This may not trigger subsequent requests, as both parents are FOD.
+            drv.derivation_or_fod_hash(|_| panic!("must not lookup"))
         } else {
             // we only expect this to be called in the "foo" testcase, for the "bar derivations"
             panic!("may only be called for foo testcase on bar derivations");
         }
     });
 
-    // We need to calculate the replacement_str, as fixed-sha1 does use it.
     derivation
-        .calculate_output_paths(name, &replacement_str)
+        .calculate_output_paths(name, &calculated_derivation_or_fod_hash)
         .unwrap();
 
     // The derivation should now look like it was before
@@ -226,7 +229,7 @@ fn output_path_construction() {
     // calculate bar output paths
     let bar_calc_result = bar_drv.calculate_output_paths(
         "bar",
-        &bar_drv.calculate_drv_replacement_str(|_| panic!("is FOD, should not lookup")),
+        &bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")),
     );
     assert!(bar_calc_result.is_ok());
 
@@ -241,8 +244,8 @@ fn output_path_construction() {
     // now construct foo, which requires bar_drv
     // Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar.
     let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path;
-    let bar_drv_replacement_str =
-        &bar_drv.calculate_drv_replacement_str(|_| panic!("is FOD, should not lookup"));
+    let bar_drv_derivation_or_fod_hash =
+        bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup"));
 
     let bar_drv_path = bar_drv
         .calculate_derivation_path("bar")
@@ -281,11 +284,11 @@ fn output_path_construction() {
     // calculate foo output paths
     let foo_calc_result = foo_drv.calculate_output_paths(
         "foo",
-        &foo_drv.calculate_drv_replacement_str(|drv_name| {
-            if drv_name != "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" {
-                panic!("lookup called with unexpected drv_name: {}", drv_name);
+        &foo_drv.derivation_or_fod_hash(|drv_path| {
+            if drv_path != "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" {
+                panic!("lookup called with unexpected drv_path: {}", drv_path);
             }
-            bar_drv_replacement_str.clone()
+            bar_drv_derivation_or_fod_hash.clone()
         }),
     );
     assert!(foo_calc_result.is_ok());