about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-03-14T11·50+0200
committerclbot <clbot@tvl.fyi>2024-03-14T16·52+0000
commit43c851bc841bccc65ffddab7205783c43f25417f (patch)
tree74249d4df9d5c82f5e12b345a62728bbeaed157f
parent35f636b68482d8c84939b4707514a48416557bb4 (diff)
refactor(nix-compat/store_path): take [u8;32] for outer fingerprint r/7691
The outer fingerprint used for store path calculation is always a sha256
digest. This includes both input and output-addressed store paths.

We used a NixHash here, which can also represent other hash types, and
that had a bunch of annoyances:

 - Whenever we had the bytes, we had to wrap them in a NixHash::Sha256().
 - Things like AtermWriteable had to be implemented on NixHash,
   even though we then had an assertion it was only called in the
   NixHash::Sha256 case.

Change-Id: Ic895503d9b071800d2e52ae057666f44bd0ab9d6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11142
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: John Ericson <git@johnericson.me>
Reviewed-by: picnoir picnoir <picnoir@alternativebit.fr>
-rw-r--r--tvix/glue/src/known_paths.rs18
-rw-r--r--tvix/nix-compat/src/derivation/mod.rs14
-rw-r--r--tvix/nix-compat/src/derivation/tests/mod.rs12
-rw-r--r--tvix/nix-compat/src/derivation/write.rs14
-rw-r--r--tvix/nix-compat/src/store_path/utils.rs39
5 files changed, 46 insertions, 51 deletions
diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs
index 13f86fae0ea4..9cd9470fa949 100644
--- a/tvix/glue/src/known_paths.rs
+++ b/tvix/glue/src/known_paths.rs
@@ -8,7 +8,7 @@
 //! This data is required to find the derivation needed to actually trigger the
 //! build, if necessary.
 
-use nix_compat::{derivation::Derivation, nixhash::NixHash, store_path::StorePath};
+use nix_compat::{derivation::Derivation, store_path::StorePath};
 use std::collections::HashMap;
 
 /// Struct keeping track of all known Derivations in the current evaluation.
@@ -20,7 +20,7 @@ pub struct KnownPaths {
     ///
     /// Keys are derivation paths, values are a tuple of the "hash derivation
     /// modulo" and the Derivation struct itself.
-    derivations: HashMap<StorePath, (NixHash, Derivation)>,
+    derivations: HashMap<StorePath, ([u8; 32], Derivation)>,
 
     /// A map from output path to (one) drv path.
     /// Note that in the case of FODs, multiple drvs can produce the same output
@@ -30,7 +30,7 @@ pub struct KnownPaths {
 
 impl KnownPaths {
     /// Fetch the opaque "hash derivation modulo" for a given derivation path.
-    pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath) -> Option<&NixHash> {
+    pub fn get_hash_derivation_modulo(&self, drv_path: &StorePath) -> Option<&[u8; 32]> {
         self.derivations
             .get(drv_path)
             .map(|(hash_derivation_modulo, _derivation)| hash_derivation_modulo)
@@ -83,7 +83,7 @@ impl KnownPaths {
         #[allow(unused_variables)] // assertions on this only compiled in debug builds
         let old = self
             .derivations
-            .insert(drv_path.to_owned(), (hash_derivation_modulo.clone(), drv));
+            .insert(drv_path.to_owned(), (hash_derivation_modulo, drv));
 
         #[cfg(debug_assertions)]
         {
@@ -99,7 +99,7 @@ impl KnownPaths {
 
 #[cfg(test)]
 mod tests {
-    use nix_compat::{derivation::Derivation, nixhash::NixHash, store_path::StorePath};
+    use nix_compat::{derivation::Derivation, store_path::StorePath};
 
     use super::KnownPaths;
     use hex_literal::hex;
@@ -165,9 +165,9 @@ mod tests {
 
         // It should be possible to get the hash derivation modulo.
         assert_eq!(
-            Some(&NixHash::Sha256(hex!(
+            Some(&hex!(
                 "c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df"
-            ))),
+            )),
             known_paths.get_hash_derivation_modulo(&BAR_DRV_PATH.clone())
         );
 
@@ -180,9 +180,9 @@ mod tests {
             known_paths.get_drv_by_drvpath(&FOO_DRV_PATH)
         );
         assert_eq!(
-            Some(&NixHash::Sha256(hex!(
+            Some(&hex!(
                 "af030d36d63d3d7f56a71adaba26b36f5fa1f9847da5eed953ed62e18192762f"
-            ))),
+            )),
             known_paths.get_hash_derivation_modulo(&FOO_DRV_PATH.clone())
         );
 
diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs
index 848f1a31ff36..9a7dd04c11c4 100644
--- a/tvix/nix-compat/src/derivation/mod.rs
+++ b/tvix/nix-compat/src/derivation/mod.rs
@@ -178,8 +178,8 @@ impl Derivation {
     ///
     /// This is called `hashDerivationModulo` in nixcpp.
     ///
-    /// It returns a [NixHash], created by calculating the sha256 digest of
-    /// the derivation ATerm representation, except that:
+    /// It returns the sha256 digest of the derivation ATerm representation,
+    /// except that:
     ///  -  any input derivation paths have beed replaced "by the result of a
     ///     recursive call to this function" and that
     ///  - for fixed-output derivations the special
@@ -190,16 +190,16 @@ impl Derivation {
     /// this function to provide a lookup function to lookup these calculation
     /// results of parent derivations at `fn_get_derivation_or_fod_hash` (by
     /// drv path).
-    pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> NixHash
+    pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> [u8; 32]
     where
-        F: Fn(&StorePathRef) -> NixHash,
+        F: Fn(&StorePathRef) -> [u8; 32],
     {
         // Fixed-output derivations return a fixed hash.
         // Non-Fixed-output derivations return the sha256 digest of the ATerm
         // notation, but with all input_derivation paths replaced by a recursive
         // call to this function.
         // We use fn_get_derivation_or_fod_hash here, so callers can precompute this.
-        NixHash::Sha256(self.fod_digest().unwrap_or({
+        self.fod_digest().unwrap_or({
             // For each input_derivation, look up the
             // derivation_or_fod_hash, and replace the derivation path with
             // it's HEXLOWER digest.
@@ -216,7 +216,7 @@ impl Derivation {
             hasher.update(self.to_aterm_bytes_with_replacements(&input_derivations));
 
             hasher.finalize().into()
-        }))
+        })
     }
 
     /// This calculates all output paths of a Derivation and updates the struct.
@@ -238,7 +238,7 @@ impl Derivation {
     pub fn calculate_output_paths(
         &mut self,
         name: &str,
-        derivation_or_fod_hash: &NixHash,
+        derivation_or_fod_hash: &[u8; 32],
     ) -> Result<(), DerivationError> {
         // The fingerprint and hash differs per output
         for (output_name, output) in self.outputs.iter_mut() {
diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs
index 2bf09265bcda..56bad7869dbb 100644
--- a/tvix/nix-compat/src/derivation/tests/mod.rs
+++ b/tvix/nix-compat/src/derivation/tests/mod.rs
@@ -5,6 +5,7 @@ use crate::derivation::parser::Error;
 use crate::derivation::Derivation;
 use crate::store_path::StorePath;
 use bstr::{BStr, BString};
+use hex_literal::hex;
 use std::collections::BTreeSet;
 use std::fs::File;
 use std::io::Read;
@@ -184,16 +185,15 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation {
     }
 }
 
-#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")]
-#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")]
-fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) {
+#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", hex!("724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"); "fixed_sha256")]
+#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", hex!("c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df");"fixed-sha1")]
+fn derivation_or_fod_hash(drv_path: &str, expected_digest: [u8; 32]) {
     // read in the fixture
     let json_bytes = read_file(&format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path));
     let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize");
 
     let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called"));
-
-    assert_eq!(expected_nix_hash_string, actual.to_nix_hex_string());
+    assert_eq!(expected_digest, actual);
 }
 
 /// This reads a Derivation (in A-Term), trims out all fields containing
@@ -401,7 +401,7 @@ fn output_path_construction() {
             if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" {
                 panic!("lookup called with unexpected drv_path: {}", drv_path);
             }
-            bar_drv_derivation_or_fod_hash.clone()
+            bar_drv_derivation_or_fod_hash
         }),
     );
     assert!(foo_calc_result.is_ok());
diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs
index f20bf4e121d2..735b781574e1 100644
--- a/tvix/nix-compat/src/derivation/write.rs
+++ b/tvix/nix-compat/src/derivation/write.rs
@@ -8,7 +8,8 @@ use crate::derivation::{ca_kind_prefix, output::Output};
 use crate::nixbase32;
 use crate::store_path::{StorePath, StorePathRef, STORE_DIR_WITH_SLASH};
 use bstr::BString;
-use std::fmt::Display;
+use data_encoding::HEXLOWER;
+
 use std::{
     collections::{BTreeMap, BTreeSet},
     io,
@@ -16,8 +17,6 @@ use std::{
     io::Write,
 };
 
-use super::NixHash;
-
 pub const DERIVATION_PREFIX: &str = "Derive";
 pub const PAREN_OPEN: char = '(';
 pub const PAREN_CLOSE: char = ')';
@@ -31,7 +30,7 @@ pub const QUOTE: char = '"';
 /// Note that we mostly use explicit `write_*` calls
 /// instead since the serialization of the items depends on
 /// the context a lot.
-pub(crate) trait AtermWriteable: Display {
+pub(crate) trait AtermWriteable {
     fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()>;
 
     fn aterm_bytes(&self) -> Vec<u8> {
@@ -67,12 +66,9 @@ impl AtermWriteable for String {
     }
 }
 
-impl AtermWriteable for NixHash {
+impl AtermWriteable for [u8; 32] {
     fn aterm_write(&self, writer: &mut impl Write) -> std::io::Result<()> {
-        // When we serialize the placeholder hashes,
-        // they need to be SHA256.
-        debug_assert!(matches!(self, NixHash::Sha256(_)));
-        write_field(writer, self.to_plain_hex_string(), false)
+        write_field(writer, HEXLOWER.encode(self), false)
     }
 }
 
diff --git a/tvix/nix-compat/src/store_path/utils.rs b/tvix/nix-compat/src/store_path/utils.rs
index 2289594e3a10..d054d5955696 100644
--- a/tvix/nix-compat/src/store_path/utils.rs
+++ b/tvix/nix-compat/src/store_path/utils.rs
@@ -1,8 +1,8 @@
 use crate::nixbase32;
 use crate::nixhash::{CAHash, NixHash};
 use crate::store_path::{Error, StorePathRef, DIGEST_SIZE, STORE_DIR};
+use data_encoding::HEXLOWER;
 use sha2::{Digest, Sha256};
-use std::io::Write;
 use thiserror;
 
 /// Errors that can occur when creating a content-addressed store path.
@@ -66,14 +66,11 @@ pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>(
         return Err(BuildStorePathError::InvalidReference());
     }
 
-    let (ty, hash) = match &ca_hash {
-        CAHash::Text(ref digest) => (
-            make_references_string("text", references, false),
-            NixHash::Sha256(*digest),
-        ),
+    let (ty, inner_digest) = match &ca_hash {
+        CAHash::Text(ref digest) => (make_references_string("text", references, false), *digest),
         CAHash::Nar(NixHash::Sha256(ref digest)) => (
             make_references_string("source", references, self_reference),
-            NixHash::Sha256(*digest),
+            *digest,
         ),
 
         // for all other CAHash::Nar, another custom scheme is used.
@@ -84,7 +81,7 @@ pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>(
 
             (
                 "output:out".to_string(),
-                NixHash::Sha256(fixed_out_digest("fixed:out:r", hash)),
+                fixed_out_digest("fixed:out:r", hash),
             )
         }
         // CaHash::Flat is using something very similar, except the `r:` prefix.
@@ -95,12 +92,12 @@ pub fn build_ca_path<'a, S: AsRef<str>, I: IntoIterator<Item = S>>(
 
             (
                 "output:out".to_string(),
-                NixHash::Sha256(fixed_out_digest("fixed:out", hash)),
+                fixed_out_digest("fixed:out", hash),
             )
         }
     };
 
-    build_store_path_from_fingerprint_parts(&ty, &hash, name)
+    build_store_path_from_fingerprint_parts(&ty, &inner_digest, name)
         .map_err(BuildStorePathError::InvalidStorePath)
 }
 
@@ -128,13 +125,13 @@ pub fn build_nar_based_store_path<'a>(
 /// Input-addresed store paths are always derivation outputs, the "input" in question is the
 /// derivation and its closure.
 pub fn build_output_path<'a>(
-    drv_hash: &NixHash,
+    drv_sha256: &[u8; 32],
     output_name: &str,
     output_path_name: &'a str,
 ) -> Result<StorePathRef<'a>, Error> {
     build_store_path_from_fingerprint_parts(
         &(String::from("output:") + output_name),
-        drv_hash,
+        drv_sha256,
         output_path_name,
     )
 }
@@ -145,18 +142,20 @@ pub fn build_output_path<'a>(
 /// but other fingerprints starting with "output:" are also used in Derivation
 /// output path calculation.
 ///
-/// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes,
-/// and nixbase32-encoded (32 characters).
+/// The fingerprint is hashed with sha256, and its digest is compressed to 20
+/// bytes.
+/// Inside a StorePath, that digest is printed nixbase32-encoded
+/// (32 characters).
 fn build_store_path_from_fingerprint_parts<'a>(
     ty: &str,
-    hash: &NixHash,
+    inner_digest: &[u8; 32],
     name: &'a str,
 ) -> Result<StorePathRef<'a>, Error> {
-    let digest: [u8; DIGEST_SIZE] = compress_hash(&{
-        let mut h = Sha256::new();
-        write!(h, "{ty}:{}:{STORE_DIR}:{name}", hash.to_nix_hex_string()).unwrap();
-        h.finalize()
-    });
+    let fingerprint = format!(
+        "{ty}:sha256:{}:{STORE_DIR}:{name}",
+        HEXLOWER.encode(inner_digest)
+    );
+    let digest: [u8; DIGEST_SIZE] = compress_hash(&Sha256::new_with_prefix(fingerprint).finalize());
 
     // name validation happens in here.
     StorePathRef::from_name_and_digest(name, &digest)