From 6f993b8bde8201213fe2953ea663ac387de916e3 Mon Sep 17 00:00:00 2001 From: Jürgen Hahn Date: Mon, 2 Jan 2023 16:09:18 +0100 Subject: feat(tvix/derivation): add nix drv path generation to Derivation This adds a function to generate the derivation path. The computation is based on the Go implementation. Change-Id: Iae89db4976f5fd9208f0453f73688689a245cd66 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7729 Tested-by: BuildkiteCI Reviewed-by: flokli --- tvix/Cargo.lock | 23 +++++++++++++ tvix/derivation/Cargo.toml | 3 ++ tvix/derivation/src/derivation.rs | 71 ++++++++++++++++++++++++++++++++++++++- tvix/derivation/src/lib.rs | 1 + tvix/derivation/src/nix_hash.rs | 15 +++++++++ tvix/derivation/src/tests/mod.rs | 17 ++++++++++ tvix/derivation/src/write.rs | 11 ++++-- 7 files changed, 137 insertions(+), 4 deletions(-) create mode 100644 tvix/derivation/src/nix_hash.rs diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 56e568559ffd..9cc9ffb6d1ea 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -374,6 +374,15 @@ version = "3.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7704b5fdd17b18ae31c4c1da5a2e0305a2bf17b5249300a9ee9ed7b72114c636" +[[package]] +name = "cpufeatures" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d997bd5e24a5928dd43e46dc529867e207907fe0b239c3477d924f7f2ca320" +dependencies = [ + "libc", +] + [[package]] name = "criterion" version = "0.4.0" @@ -486,7 +495,10 @@ dependencies = [ "glob", "serde", "serde_json", + "sha2", + "test-case", "test-generator", + "tvix-store-bin", ] [[package]] @@ -1662,6 +1674,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82e6b795fe2e3b1e845bafcb27aa35405c4d47cdfc92af5fc8d3002f76cebdc0" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "sharded-slab" version = "0.1.4" diff --git a/tvix/derivation/Cargo.toml b/tvix/derivation/Cargo.toml index 99dfbb3a104f..ec998c446508 100644 --- a/tvix/derivation/Cargo.toml +++ b/tvix/derivation/Cargo.toml @@ -8,6 +8,8 @@ edition = "2021" [dependencies] glob = "0.3.0" serde = { version = "1.0", features = ["derive"] } +sha2 = "0.10.6" +tvix-store-bin = { path = "../store" } [dev-dependencies.test-generator] # This fork of test-generator adds support for cargo workspaces, see @@ -17,3 +19,4 @@ rev = "82e799979980962aec1aa324ec6e0e4cad781f41" [dev-dependencies] serde_json = "1.0" +test-case = "2.2.2" diff --git a/tvix/derivation/src/derivation.rs b/tvix/derivation/src/derivation.rs index e624be20cab3..9531157d6f98 100644 --- a/tvix/derivation/src/derivation.rs +++ b/tvix/derivation/src/derivation.rs @@ -1,7 +1,10 @@ +use crate::nix_hash; use crate::output::Output; use crate::write; use serde::{Deserialize, Serialize}; -use std::{collections::BTreeMap, fmt, fmt::Write}; +use sha2::{Digest, Sha256}; +use std::{collections::BTreeMap, fmt, fmt::Write, iter::FromIterator}; +use tvix_store::nixbase32::NIXBASE32; #[derive(Serialize, Deserialize)] pub struct Derivation { @@ -31,6 +34,72 @@ impl Derivation { Ok(()) } + + /// Returns the path of a Derivation struct. + /// + /// The path is calculated like this: + /// - Write the fingerprint of the Derivation to the sha256 hash function. + /// This is: `text:`, + /// all d.InputDerivations and d.InputSources (sorted, separated by a `:`), + /// a `:`, + /// a `sha256:`, followed by the sha256 digest of the ATerm representation (hex-encoded) + /// a `:`, + /// the storeDir, followed by a `:`, + /// the name of a derivation, + /// a `.drv`. + /// - Write the .drv A-Term contents to a hash function + /// - Take the digest, run hash.CompressHash(digest, 20) on it. + /// - Encode it with nixbase32 + /// - Construct the full path $storeDir/$nixbase32EncodedCompressedHash-$name.drv + pub fn calculate_derivation_path(&self, name: &str) -> String { + let mut hasher = Sha256::new(); + + // collect the list of paths from input_sources and input_derivations + // into a sorted list, and join them by : + hasher.update(write::TEXT_COLON); + + let concat_inputs: Vec = { + let mut inputs = self.input_sources.clone(); + let input_derivation_keys: Vec = + self.input_derivations.keys().cloned().collect(); + inputs.extend(input_derivation_keys); + inputs.sort(); + inputs + }; + + for input in concat_inputs { + hasher.update(input); + hasher.update(write::COLON); + } + + // calculate the sha256 hash of the ATerm representation, and represent + // it as a hex-encoded string (prefixed with sha256:). + hasher.update(write::SHA256_COLON); + + let digest = { + let mut derivation_hasher = Sha256::new(); + derivation_hasher.update(self.to_string()); + derivation_hasher.finalize() + }; + + hasher.update(format!("{:x}", digest)); + hasher.update(write::COLON); + hasher.update(write::STORE_PATH_COLON); + hasher.update(name); + hasher.update(write::DOT_FILE_EXT); + + let compressed = { + let aterm_digest = Vec::from_iter(hasher.finalize()); + nix_hash::compress_hash(&aterm_digest, 20) + }; + + format!( + "{}-{}{}", + NIXBASE32.encode(&compressed), + name, + write::DOT_FILE_EXT + ) + } } impl fmt::Display for Derivation { diff --git a/tvix/derivation/src/lib.rs b/tvix/derivation/src/lib.rs index a902943493d5..a8360fbafc5e 100644 --- a/tvix/derivation/src/lib.rs +++ b/tvix/derivation/src/lib.rs @@ -1,3 +1,4 @@ +mod nix_hash; mod output; mod string_escape; mod write; diff --git a/tvix/derivation/src/nix_hash.rs b/tvix/derivation/src/nix_hash.rs new file mode 100644 index 000000000000..a49d444faa53 --- /dev/null +++ b/tvix/derivation/src/nix_hash.rs @@ -0,0 +1,15 @@ +/// CompressHash takes an arbitrary long sequence of bytes (usually a hash +/// digest), and returns a sequence of bytes of length output_size. +/// It's calculated by rotating through the bytes in the output buffer (zero- +/// initialized), and XOR'ing with each byte of the passed input. +/// It consumes 1 byte at a time, and XOR's it with the current value in the +/// output buffer. +pub fn compress_hash(input: &[u8], output_size: usize) -> Vec { + let mut output: Vec = vec![0; output_size]; + + for (ii, ch) in input.iter().enumerate() { + output[ii % output_size] ^= ch; + } + + output +} diff --git a/tvix/derivation/src/tests/mod.rs b/tvix/derivation/src/tests/mod.rs index 3d4aae3fe4d4..435d82144067 100644 --- a/tvix/derivation/src/tests/mod.rs +++ b/tvix/derivation/src/tests/mod.rs @@ -2,8 +2,11 @@ use crate::derivation::Derivation; use std::fs::File; use std::io::Read; use std::path::Path; +use test_case::test_case; use test_generator::test_resources; +const RESOURCES_PATHS: &str = "src/tests/derivation_tests"; + fn read_file(path: &str) -> String { let path = Path::new(path); let mut file = File::open(path).unwrap(); @@ -36,3 +39,17 @@ fn check_to_string(path_to_drv_file: &str) { assert_eq!(expected, derivation.to_string()); } + +#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] +#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")] +#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")] +#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")] +#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")] +#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")] +#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] +fn derivation_path(name: &str, expected_path: &str) { + let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, expected_path)); + let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted"); + + assert_eq!(derivation.calculate_derivation_path(name), expected_path); +} diff --git a/tvix/derivation/src/write.rs b/tvix/derivation/src/write.rs index 987c924fae3a..0ad1eb71e4da 100644 --- a/tvix/derivation/src/write.rs +++ b/tvix/derivation/src/write.rs @@ -10,6 +10,12 @@ pub const BRACKET_CLOSE: char = ']'; pub const COMMA: char = ','; pub const QUOTE: char = '"'; +pub const COLON: &str = ":"; +pub const TEXT_COLON: &str = "text:"; +pub const SHA256_COLON: &str = "sha256:"; +pub const STORE_PATH_COLON: &str = "/nix/store:"; +pub const DOT_FILE_EXT: &str = ".drv"; + fn write_array_elements( writer: &mut impl Write, quote: bool, @@ -110,7 +116,7 @@ pub fn write_input_derivations( pub fn write_input_sources( writer: &mut impl Write, - input_sources: &Vec, + input_sources: &[String], ) -> Result<(), fmt::Error> { writer.write_char(COMMA)?; @@ -138,8 +144,7 @@ pub fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), fmt:: writer.write_str(escape_string(builder).as_str())?; Ok(()) } - -pub fn write_arguments(writer: &mut impl Write, arguments: &Vec) -> Result<(), fmt::Error> { +pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result<(), fmt::Error> { writer.write_char(COMMA)?; // convert Vec to [&str] let v: Vec<&str> = arguments.iter().map(|x| &**x).collect(); -- cgit 1.4.1