From 95bec264d55ddc4d1c9f53211c49899d93babd12 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Wed, 4 Jan 2023 22:26:50 +0100 Subject: feat(tvix/derivation): implement output path calculation This implement output path calculation for fixed outputs, both fixed- output and non-fixed-output. Change-Id: I0a77b99f2ba6b39467cc5dd589ce152a40387f9a Reviewed-on: https://cl.tvl.fyi/c/depot/+/7761 Reviewed-by: tazjin Tested-by: BuildkiteCI Reviewed-by: jrhahn --- tvix/derivation/src/derivation.rs | 171 +++++++++++++++++++++++++++++++++++++- tvix/derivation/src/tests/mod.rs | 93 +++++++++++++++++++++ 2 files changed, 262 insertions(+), 2 deletions(-) (limited to 'tvix/derivation/src') diff --git a/tvix/derivation/src/derivation.rs b/tvix/derivation/src/derivation.rs index a9e769543e00..2afe672e2e9b 100644 --- a/tvix/derivation/src/derivation.rs +++ b/tvix/derivation/src/derivation.rs @@ -1,9 +1,9 @@ use crate::nix_hash; -use crate::output::Output; +use crate::output::{Hash, Output}; use crate::write; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use std::{collections::BTreeMap, fmt, fmt::Write, iter::FromIterator}; +use std::{collections::BTreeMap, fmt, fmt::Write}; use tvix_store::nixbase32::NIXBASE32; use tvix_store::nixpath::{NixPath, ParseNixPathError, STORE_DIR}; @@ -147,6 +147,173 @@ impl Derivation { build_store_path(true, &hasher.finalize(), name) } + + /// Calculate the drv replacement string for a given derivation. + /// + /// This is either called on a struct without output paths populated, + /// to provide the `drv_replacement_str` value for the `calculate_output_paths` + /// function call, or called on a struct with output paths populated, to + /// calculate / cache lookups for calls to fn_get_drv_replacement. + /// + /// `fn_get_drv_replacement` is used to look up the drv replacement strings + /// for input_derivations the Derivation refers to. + pub fn calculate_drv_replacement_str(&self, fn_get_drv_replacement: F) -> String + where + F: Fn(&str) -> String, + { + let mut hasher = Sha256::new(); + let digest = match self.get_fixed_output() { + Some((fixed_output_path, fixed_output_hash)) => { + hasher.update("fixed:out:"); + hasher.update(&fixed_output_hash.algo); + hasher.update(":"); + hasher.update(&fixed_output_hash.digest); + hasher.update(":"); + hasher.update(fixed_output_path); + hasher.finalize() + } + None => { + let mut replaced_input_derivations: BTreeMap> = BTreeMap::new(); + + // For each input_derivation, look up the replacement. + for (drv_path, input_derivation) in &self.input_derivations { + replaced_input_derivations.insert( + fn_get_drv_replacement(drv_path).to_string(), + input_derivation.to_vec(), + ); + } + + // construct a new derivation struct with these replaced input derivation strings + let replaced_derivation = Derivation { + input_derivations: replaced_input_derivations, + ..self.clone() + }; + + // write the ATerm of that to the hash function + hasher.update(replaced_derivation.to_string()); + + hasher.finalize() + } + }; + + format!("{:x}", digest) + } + + /// This calculates all output paths of a Derivation and updates the struct. + /// It requires the struct to be initially without output paths. + /// This means, self.outputs[$outputName].path needs to be an empty string, + /// and self.environment[$outputName] needs to be an empty string. + /// + /// Output path calculation requires knowledge of "drv replacement + /// strings", and in case of non-fixed-output derivations, also knowledge + /// of "drv replacement" strings (recursively) of all input derivations. + /// + /// We solve this by asking the caller of this function to provide + /// the drv replacement string of the current derivation itself, + /// which is ran on the struct without output paths. + /// + /// This sound terribly ugly, but won't be too much of a concern later on, as + /// naming fixed-output paths once uploaded will be a tvix-store concern, + /// so there's no need to calculate them here anymore. + /// + /// On completion, self.environment[$outputName] and + /// self.outputs[$outputName].path are set to the calculated output path for all + /// outputs. + pub fn calculate_output_paths( + &mut self, + name: &str, + drv_replacement_str: &str, + ) -> Result<(), ParseNixPathError> { + let mut hasher = Sha256::new(); + + // Check if the Derivation is fixed output, because they cause + // different fingerprints to be hashed. + match self.get_fixed_output() { + None => { + // The fingerprint and hash differs per output + for (output_name, output) in self.outputs.iter_mut() { + // Assert that outputs are not yet populated, to avoid using this function wrongly. + // We don't also go over self.environment, but it's a sufficient + // footgun prevention mechanism. + assert!(output.path.is_empty()); + + hasher.update("output:"); + hasher.update(output_name); + hasher.update(":sha256:"); + hasher.update(drv_replacement_str); + hasher.update(":"); + hasher.update(tvix_store::nixpath::STORE_DIR); + hasher.update(":"); + + // calculate the output_name_path, which is the part of the NixPath after the digest. + let mut output_path_name = name.to_string(); + if output_name != "out" { + output_path_name.push('-'); + output_path_name.push_str(output_name); + } + + hasher.update(output_path_name.as_str()); + + let digest = hasher.finalize_reset(); + + let abs_store_path = format!( + "{}/{}", + tvix_store::nixpath::STORE_DIR, + build_store_path(false, &digest, &output_path_name)? + ); + + output.path = abs_store_path.clone(); + self.environment + .insert(output_name.to_string(), abs_store_path); + } + } + Some((fixed_output_path, fixed_output_hash)) => { + // Assert that outputs are not yet populated, to avoid using this function wrongly. + // We don't also go over self.environment, but it's a sufficient + // footgun prevention mechanism. + assert!(fixed_output_path.is_empty()); + + let digest = { + // Fixed-output derivation. + // There's two different hashing strategies in place, depending on the value of hash.algo. + // This code is _weird_ but it is what Nix is doing. See: + // https://github.com/NixOS/nix/blob/1385b2007804c8a0370f2a6555045a00e34b07c7/src/libstore/store-api.cc#L178-L196 + if fixed_output_hash.algo == "r:sha256" { + hasher.update("source:"); + hasher.update("sha256"); + hasher.update(":"); + hasher.update(fixed_output_hash.digest.clone()); // nixbase32 + } else { + hasher.update("output:out:sha256:"); + // This is drv_replacement for FOD, with an empty fixed_output_path. + hasher.update(drv_replacement_str); + } + hasher.update(":"); + hasher.update(tvix_store::nixpath::STORE_DIR); + hasher.update(":"); + hasher.update(name); + hasher.finalize() + }; + + let abs_store_path = format!( + "{}/{}", + tvix_store::nixpath::STORE_DIR, + build_store_path(false, &digest, name)? + ); + + self.outputs.insert( + "out".to_string(), + Output { + path: abs_store_path.clone(), + hash: Some(fixed_output_hash.clone()), + }, + ); + self.environment.insert("out".to_string(), abs_store_path); + } + }; + + Ok(()) + } } impl fmt::Display for Derivation { diff --git a/tvix/derivation/src/tests/mod.rs b/tvix/derivation/src/tests/mod.rs index 623cc4e5b13a..dbba6e521511 100644 --- a/tvix/derivation/src/tests/mod.rs +++ b/tvix/derivation/src/tests/mod.rs @@ -1,4 +1,5 @@ use crate::derivation::Derivation; +use crate::output::Output; use std::fs::File; use std::io::Read; use std::path::Path; @@ -67,3 +68,95 @@ fn derivation_path(name: &str, expected_path: &str) { NixPath::from_string(expected_path).unwrap() ); } + +/// This trims all outputs from a Derivation struct, +/// by setting outputs[$outputName].path and environment[$outputName] to the empty string. +fn derivation_with_trimmed_outputs(derivation: &Derivation) -> Derivation { + let mut trimmed_env = derivation.environment.clone(); + let mut trimmed_outputs = derivation.outputs.clone(); + + for (output_name, output) in &derivation.outputs { + trimmed_env.insert(output_name.clone(), "".to_string()); + assert!(trimmed_outputs.contains_key(output_name)); + trimmed_outputs.insert( + output_name.to_string(), + Output { + path: "".to_string(), + ..output.clone() + }, + ); + } + + // replace environment and outputs with the trimmed variants + Derivation { + environment: trimmed_env, + outputs: trimmed_outputs, + ..derivation.clone() + } +} + +#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")] +#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")] +fn replacement_drv_path(drv_path: &str, expected_replacement_str: &str) { + // read in the fixture + let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); + let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); + + let drv_replacement_str = drv.calculate_drv_replacement_str(|_| panic!("must not be called")); + + assert_eq!(expected_replacement_str, drv_replacement_str); +} + +#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] +#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")] +#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")] +#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")] +#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")] +#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")] +#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] +fn output_paths(name: &str, drv_path: &str) { + // read in the fixture + let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); + let expected_derivation: Derivation = serde_json::from_str(&data).expect("must deserialize"); + + let mut derivation = derivation_with_trimmed_outputs(&expected_derivation); + + // calculate the drv replacement string. + // We don't expect the lookup function to be called for most derivations. + let replacement_str = derivation.calculate_drv_replacement_str(|drv_name| { + // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv + // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv + if name == "foo" + && ((drv_path == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv" + && drv_name == "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv") + || (drv_path == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv" + && drv_name == "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")) + { + // do the lookup, by reading in the fixture of the requested + // drv_name, and calculating its drv replacement (on the non-stripped version) + // In a real-world scenario you would have already done this during construction. + + let data = read_file(&format!( + "{}/{}.json", + RESOURCES_PATHS, + Path::new(drv_name).file_name().unwrap().to_string_lossy() + )); + + let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); + + // calculate replacement string. These don't trigger any subsequent requests, as they're both FOD. + drv.calculate_drv_replacement_str(|_| panic!("must not lookup")) + } else { + // we only expect this to be called in the "foo" testcase, for the "bar derivations" + panic!("may only be called for foo testcase on bar derivations"); + } + }); + + // We need to calculate the replacement_str, as fixed-sha1 does use it. + derivation + .calculate_output_paths(&name, &replacement_str) + .unwrap(); + + // The derivation should now look like it was before + assert_eq!(expected_derivation, derivation); +} -- cgit 1.4.1