about summary refs log tree commit diff
path: root/tvix/derivation/src
diff options
context:
space:
mode:
authorJürgen Hahn <mail.jhahn@gmail.com>2023-01-02T15·09+0100
committerjrhahn <mail.jhahn@gmail.com>2023-01-04T12·24+0000
commit6f993b8bde8201213fe2953ea663ac387de916e3 (patch)
treef82cd8094172c18a5eb0601f278fa4205e48344c /tvix/derivation/src
parent79c05f38109d2eac07b9ba893b9b6f67fd34c29c (diff)
feat(tvix/derivation): add nix drv path generation to Derivation r/5580
This adds a function to generate the derivation path. The computation
is based on the Go implementation.

Change-Id: Iae89db4976f5fd9208f0453f73688689a245cd66
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7729
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
Diffstat (limited to 'tvix/derivation/src')
-rw-r--r--tvix/derivation/src/derivation.rs71
-rw-r--r--tvix/derivation/src/lib.rs1
-rw-r--r--tvix/derivation/src/nix_hash.rs15
-rw-r--r--tvix/derivation/src/tests/mod.rs17
-rw-r--r--tvix/derivation/src/write.rs11
5 files changed, 111 insertions, 4 deletions
diff --git a/tvix/derivation/src/derivation.rs b/tvix/derivation/src/derivation.rs
index e624be20cab3..9531157d6f98 100644
--- a/tvix/derivation/src/derivation.rs
+++ b/tvix/derivation/src/derivation.rs
@@ -1,7 +1,10 @@
+use crate::nix_hash;
 use crate::output::Output;
 use crate::write;
 use serde::{Deserialize, Serialize};
-use std::{collections::BTreeMap, fmt, fmt::Write};
+use sha2::{Digest, Sha256};
+use std::{collections::BTreeMap, fmt, fmt::Write, iter::FromIterator};
+use tvix_store::nixbase32::NIXBASE32;
 
 #[derive(Serialize, Deserialize)]
 pub struct Derivation {
@@ -31,6 +34,72 @@ impl Derivation {
 
         Ok(())
     }
+
+    /// Returns the path of a Derivation struct.
+    ///
+    /// The path is calculated like this:
+    ///   - Write the fingerprint of the Derivation to the sha256 hash function.
+    ///     This is: `text:`,
+    ///     all d.InputDerivations and d.InputSources (sorted, separated by a `:`),
+    ///     a `:`,
+    ///     a `sha256:`, followed by the sha256 digest of the ATerm representation (hex-encoded)
+    ///     a `:`,
+    ///     the storeDir, followed by a `:`,
+    ///     the name of a derivation,
+    ///     a `.drv`.
+    ///   - Write the .drv A-Term contents to a hash function
+    ///   - Take the digest, run hash.CompressHash(digest, 20) on it.
+    ///   - Encode it with nixbase32
+    ///   - Construct the full path $storeDir/$nixbase32EncodedCompressedHash-$name.drv
+    pub fn calculate_derivation_path(&self, name: &str) -> String {
+        let mut hasher = Sha256::new();
+
+        // collect the list of paths from input_sources and input_derivations
+        // into a sorted list, and join them by :
+        hasher.update(write::TEXT_COLON);
+
+        let concat_inputs: Vec<String> = {
+            let mut inputs = self.input_sources.clone();
+            let input_derivation_keys: Vec<String> =
+                self.input_derivations.keys().cloned().collect();
+            inputs.extend(input_derivation_keys);
+            inputs.sort();
+            inputs
+        };
+
+        for input in concat_inputs {
+            hasher.update(input);
+            hasher.update(write::COLON);
+        }
+
+        // calculate the sha256 hash of the ATerm representation, and represent
+        // it as a hex-encoded string (prefixed with sha256:).
+        hasher.update(write::SHA256_COLON);
+
+        let digest = {
+            let mut derivation_hasher = Sha256::new();
+            derivation_hasher.update(self.to_string());
+            derivation_hasher.finalize()
+        };
+
+        hasher.update(format!("{:x}", digest));
+        hasher.update(write::COLON);
+        hasher.update(write::STORE_PATH_COLON);
+        hasher.update(name);
+        hasher.update(write::DOT_FILE_EXT);
+
+        let compressed = {
+            let aterm_digest = Vec::from_iter(hasher.finalize());
+            nix_hash::compress_hash(&aterm_digest, 20)
+        };
+
+        format!(
+            "{}-{}{}",
+            NIXBASE32.encode(&compressed),
+            name,
+            write::DOT_FILE_EXT
+        )
+    }
 }
 
 impl fmt::Display for Derivation {
diff --git a/tvix/derivation/src/lib.rs b/tvix/derivation/src/lib.rs
index a902943493d5..a8360fbafc5e 100644
--- a/tvix/derivation/src/lib.rs
+++ b/tvix/derivation/src/lib.rs
@@ -1,3 +1,4 @@
+mod nix_hash;
 mod output;
 mod string_escape;
 mod write;
diff --git a/tvix/derivation/src/nix_hash.rs b/tvix/derivation/src/nix_hash.rs
new file mode 100644
index 000000000000..a49d444faa53
--- /dev/null
+++ b/tvix/derivation/src/nix_hash.rs
@@ -0,0 +1,15 @@
+/// CompressHash takes an arbitrary long sequence of bytes (usually a hash
+/// digest), and returns a sequence of bytes of length output_size.
+/// It's calculated by rotating through the bytes in the output buffer (zero-
+/// initialized), and XOR'ing with each byte of the passed input.
+/// It consumes 1 byte at a time, and XOR's it with the current value in the
+/// output buffer.
+pub fn compress_hash(input: &[u8], output_size: usize) -> Vec<u8> {
+    let mut output: Vec<u8> = vec![0; output_size];
+
+    for (ii, ch) in input.iter().enumerate() {
+        output[ii % output_size] ^= ch;
+    }
+
+    output
+}
diff --git a/tvix/derivation/src/tests/mod.rs b/tvix/derivation/src/tests/mod.rs
index 3d4aae3fe4d4..435d82144067 100644
--- a/tvix/derivation/src/tests/mod.rs
+++ b/tvix/derivation/src/tests/mod.rs
@@ -2,8 +2,11 @@ use crate::derivation::Derivation;
 use std::fs::File;
 use std::io::Read;
 use std::path::Path;
+use test_case::test_case;
 use test_generator::test_resources;
 
+const RESOURCES_PATHS: &str = "src/tests/derivation_tests";
+
 fn read_file(path: &str) -> String {
     let path = Path::new(path);
     let mut file = File::open(path).unwrap();
@@ -36,3 +39,17 @@ fn check_to_string(path_to_drv_file: &str) {
 
     assert_eq!(expected, derivation.to_string());
 }
+
+#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")]
+#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")]
+#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")]
+#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")]
+#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")]
+#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")]
+#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")]
+fn derivation_path(name: &str, expected_path: &str) {
+    let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, expected_path));
+    let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted");
+
+    assert_eq!(derivation.calculate_derivation_path(name), expected_path);
+}
diff --git a/tvix/derivation/src/write.rs b/tvix/derivation/src/write.rs
index 987c924fae3a..0ad1eb71e4da 100644
--- a/tvix/derivation/src/write.rs
+++ b/tvix/derivation/src/write.rs
@@ -10,6 +10,12 @@ pub const BRACKET_CLOSE: char = ']';
 pub const COMMA: char = ',';
 pub const QUOTE: char = '"';
 
+pub const COLON: &str = ":";
+pub const TEXT_COLON: &str = "text:";
+pub const SHA256_COLON: &str = "sha256:";
+pub const STORE_PATH_COLON: &str = "/nix/store:";
+pub const DOT_FILE_EXT: &str = ".drv";
+
 fn write_array_elements(
     writer: &mut impl Write,
     quote: bool,
@@ -110,7 +116,7 @@ pub fn write_input_derivations(
 
 pub fn write_input_sources(
     writer: &mut impl Write,
-    input_sources: &Vec<String>,
+    input_sources: &[String],
 ) -> Result<(), fmt::Error> {
     writer.write_char(COMMA)?;
 
@@ -138,8 +144,7 @@ pub fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), fmt::
     writer.write_str(escape_string(builder).as_str())?;
     Ok(())
 }
-
-pub fn write_arguments(writer: &mut impl Write, arguments: &Vec<String>) -> Result<(), fmt::Error> {
+pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result<(), fmt::Error> {
     writer.write_char(COMMA)?;
     // convert Vec<String> to [&str]
     let v: Vec<&str> = arguments.iter().map(|x| &**x).collect();