diff options
Diffstat (limited to 'tvix/nix-compat')
39 files changed, 3272 insertions, 0 deletions
diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml new file mode 100644 index 000000000000..89aae3d4fad8 --- /dev/null +++ b/tvix/nix-compat/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "nix-compat" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bstr = { version = "1.6.0", features = ["alloc", "unicode", "serde"] } +data-encoding = "2.3.3" +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" +sha2 = "0.10.6" +thiserror = "1.0.38" + +[dev-dependencies] +serde_json = "1.0" +test-case = "2.2.2" + +[dev-dependencies.test-generator] +# This fork of test-generator adds support for cargo workspaces, see +# also https://github.com/frehberg/test-generator/pull/14 +git = "https://github.com/JamesGuthrie/test-generator.git" +rev = "82e799979980962aec1aa324ec6e0e4cad781f41" diff --git a/tvix/nix-compat/default.nix b/tvix/nix-compat/default.nix new file mode 100644 index 000000000000..7f51438eb70d --- /dev/null +++ b/tvix/nix-compat/default.nix @@ -0,0 +1,5 @@ +{ depot, ... }: + +depot.tvix.crates.workspaceMembers.nix-compat.build.override { + runTests = true; +} diff --git a/tvix/nix-compat/src/derivation/errors.rs b/tvix/nix-compat/src/derivation/errors.rs new file mode 100644 index 000000000000..8e9e6a121096 --- /dev/null +++ b/tvix/nix-compat/src/derivation/errors.rs @@ -0,0 +1,59 @@ +use crate::{nixbase32::Nixbase32DecodeError, store_path}; +use thiserror::Error; + +/// Errors that can occur during the validation of Derivation structs. +#[derive(Debug, Error, PartialEq)] +pub enum DerivationError { + // outputs + #[error("no outputs defined")] + NoOutputs(), + #[error("invalid output name: {0}")] + InvalidOutputName(String), + #[error("encountered fixed-output derivation, but more than 1 output in total")] + MoreThanOneOutputButFixed(), + #[error("invalid output name for fixed-output derivation: {0}")] + InvalidOutputNameForFixed(String), + #[error("unable to validate output {0}: {1}")] + InvalidOutput(String, OutputError), + #[error("unable to validate output {0}: {1}")] + InvalidOutputDerivationPath(String, store_path::BuildStorePathError), + // input derivation + #[error("unable to parse input derivation path {0}: {1}")] + InvalidInputDerivationPath(String, store_path::Error), + #[error("input derivation {0} doesn't end with .drv")] + InvalidInputDerivationPrefix(String), + #[error("input derivation {0} output names are empty")] + EmptyInputDerivationOutputNames(String), + #[error("input derivation {0} output name {1} is invalid")] + InvalidInputDerivationOutputName(String, String), + + // input sources + #[error("unable to parse input sources path {0}: {1}")] + InvalidInputSourcesPath(String, store_path::Error), + + // platform + #[error("invalid platform field: {0}")] + InvalidPlatform(String), + + // builder + #[error("invalid builder field: {0}")] + InvalidBuilder(String), + + // environment + #[error("invalid environment key {0}")] + InvalidEnvironmentKey(String), +} + +/// Errors that can occur during the validation of a specific +// [crate::derivation::Output] of a [crate::derivation::Derviation]. +#[derive(Debug, Error, PartialEq)] +pub enum OutputError { + #[error("Invalid output path {0}: {1}")] + InvalidOutputPath(String, store_path::Error), + #[error("Invalid hash encoding: {0}")] + InvalidHashEncoding(String, Nixbase32DecodeError), + #[error("Invalid hash algo: {0}")] + InvalidHashAlgo(String), + #[error("Invalid Digest size {0} for algo {1}")] + InvalidDigestSizeForAlgo(usize, String), +} diff --git a/tvix/nix-compat/src/derivation/escape.rs b/tvix/nix-compat/src/derivation/escape.rs new file mode 100644 index 000000000000..06b550bbf02d --- /dev/null +++ b/tvix/nix-compat/src/derivation/escape.rs @@ -0,0 +1,27 @@ +use bstr::ByteSlice; + +/// Escapes a byte sequence. Does not add surrounding quotes. +pub fn escape_bytes<P: AsRef<[u8]>>(s: P) -> Vec<u8> { + let mut s: Vec<u8> = s.as_ref().to_vec(); + + s = s.replace(b"\\", b"\\\\"); + s = s.replace(b"\n", b"\\n"); + s = s.replace(b"\r", b"\\r"); + s = s.replace(b"\t", b"\\t"); + s = s.replace(b"\"", b"\\\""); + + s +} + +#[cfg(test)] +mod tests { + use super::escape_bytes; + use test_case::test_case; + + #[test_case(b"", b""; "empty")] + #[test_case(b"\"", b"\\\""; "doublequote")] + #[test_case(b":", b":"; "colon")] + fn escape(input: &[u8], expected: &[u8]) { + assert_eq!(expected, escape_bytes(input)) + } +} diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs new file mode 100644 index 000000000000..0e98e24f43dd --- /dev/null +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -0,0 +1,264 @@ +use crate::store_path::{ + self, build_output_path, build_regular_ca_path, build_text_path, StorePath, +}; +use bstr::BString; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::collections::{BTreeMap, BTreeSet}; +use std::io; + +mod errors; +mod escape; +mod output; +mod validate; +mod write; + +#[cfg(test)] +mod tests; + +// Public API of the crate. +pub use crate::nixhash::{NixHash, NixHashWithMode}; +pub use errors::{DerivationError, OutputError}; +pub use output::Output; + +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +pub struct Derivation { + #[serde(rename = "args")] + pub arguments: Vec<String>, + + pub builder: String, + + #[serde(rename = "env")] + pub environment: BTreeMap<String, BString>, + + #[serde(rename = "inputDrvs")] + pub input_derivations: BTreeMap<String, BTreeSet<String>>, + + #[serde(rename = "inputSrcs")] + pub input_sources: BTreeSet<String>, + + pub outputs: BTreeMap<String, Output>, + + pub system: String, +} + +impl Derivation { + /// write the Derivation to the given [std::io::Write], in ATerm format. + /// + /// The only errors returns are these when writing to the passed writer. + pub fn serialize(&self, writer: &mut impl std::io::Write) -> Result<(), io::Error> { + use write::*; + + writer.write_all(write::DERIVATION_PREFIX.as_bytes())?; + write_char(writer, write::PAREN_OPEN)?; + + write_outputs(writer, &self.outputs)?; + write_char(writer, COMMA)?; + + write_input_derivations(writer, &self.input_derivations)?; + write_char(writer, COMMA)?; + + write_input_sources(writer, &self.input_sources)?; + write_char(writer, COMMA)?; + + write_system(writer, &self.system)?; + write_char(writer, COMMA)?; + + write_builder(writer, &self.builder)?; + write_char(writer, COMMA)?; + + write_arguments(writer, &self.arguments)?; + write_char(writer, COMMA)?; + + write_enviroment(writer, &self.environment)?; + + write_char(writer, PAREN_CLOSE)?; + + Ok(()) + } + + /// return the ATerm serialization. + pub fn to_aterm_bytes(&self) -> Vec<u8> { + let mut buffer: Vec<u8> = Vec::new(); + + // invoke serialize and write to the buffer. + // Note we only propagate errors writing to the writer in serialize, + // which won't panic for the string we write to. + self.serialize(&mut buffer).unwrap(); + + buffer + } + + /// Returns the drv path of a [Derivation] struct. + /// + /// The drv path is calculated by invoking [build_text_path], using + /// the `name` with a `.drv` suffix as name, all [Derivation::input_sources] and + /// keys of [Derivation::input_derivations] as references, and the ATerm string of + /// the [Derivation] as content. + pub fn calculate_derivation_path(&self, name: &str) -> Result<StorePath, DerivationError> { + // append .drv to the name + let name = &format!("{}.drv", name); + + // collect the list of paths from input_sources and input_derivations + // into a (sorted, guaranteed by BTreeSet) list of references + let references: BTreeSet<String> = { + let mut inputs = self.input_sources.clone(); + let input_derivation_keys: Vec<String> = + self.input_derivations.keys().cloned().collect(); + inputs.extend(input_derivation_keys); + inputs + }; + + build_text_path(name, self.to_aterm_bytes(), references) + .map_err(|_e| DerivationError::InvalidOutputName(name.to_string())) + } + + /// Returns the FOD digest, if the derivation is fixed-output, or None if + /// it's not. + fn fod_digest(&self) -> Option<Vec<u8>> { + if self.outputs.len() != 1 { + return None; + } + + let out_output = self.outputs.get("out")?; + Some( + Sha256::new_with_prefix(format!( + "fixed:out:{}:{}", + out_output.hash_with_mode.clone()?.to_nix_hash_string(), + out_output.path + )) + .finalize() + .to_vec(), + ) + } + + /// Calculates the hash of a derivation modulo fixed-output subderivations. + /// + /// This is called `hashDerivationModulo` in nixcpp. + /// + /// It returns a [NixHash], created by calculating the sha256 digest of + /// the derivation ATerm representation, except that: + /// - any input derivation paths have beed replaced "by the result of a + /// recursive call to this function" and that + /// - for fixed-output derivations the special + /// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of + /// the A-Term. + /// + /// If the derivation is not a fixed derivation, it's up to the caller of + /// this function to provide a lookup function to lookup these calculation + /// results of parent derivations at `fn_get_hash_derivation_modulo` (by + /// drv path). + pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> NixHash + where + F: Fn(&str) -> NixHash, + { + // Fixed-output derivations return a fixed hash. + // Non-Fixed-output derivations return a hash of the ATerm notation, but with all + // input_derivation paths replaced by a recursive call to this function. + // We use fn_get_derivation_or_fod_hash here, so callers can precompute this. + let digest = self.fod_digest().unwrap_or({ + // This is a new map from derivation_or_fod_hash.digest (as lowerhex) + // to list of output names + let mut replaced_input_derivations: BTreeMap<String, BTreeSet<String>> = + BTreeMap::new(); + + // For each input_derivation, look up the + // derivation_or_fod_hash, and replace the derivation path with it's HEXLOWER + // digest. + // This is not the [NixHash::to_nix_hash_string], but without the sha256: prefix). + for (drv_path, output_names) in &self.input_derivations { + replaced_input_derivations.insert( + data_encoding::HEXLOWER.encode(&fn_get_derivation_or_fod_hash(drv_path).digest), + output_names.clone(), + ); + } + + // construct a new derivation struct with these replaced input derivation strings + let replaced_derivation = Derivation { + input_derivations: replaced_input_derivations, + ..self.clone() + }; + + // write the ATerm of that to the hash function + let mut hasher = Sha256::new(); + hasher.update(replaced_derivation.to_aterm_bytes()); + + hasher.finalize().to_vec() + }); + + // We populate the struct directly, as we know the sha256 digest has the + // right size. + NixHash { + algo: crate::nixhash::HashAlgo::Sha256, + digest: digest.to_vec(), + } + } + + /// This calculates all output paths of a Derivation and updates the struct. + /// It requires the struct to be initially without output paths. + /// This means, self.outputs[$outputName].path needs to be an empty string, + /// and self.environment[$outputName] needs to be an empty string. + /// + /// Output path calculation requires knowledge of the + /// derivation_or_fod_hash [NixHash], which (in case of non-fixed-output + /// derivations) also requires knowledge of other hash_derivation_modulo + /// [NixHash]es. + /// + /// We solve this by asking the caller of this function to provide the + /// hash_derivation_modulo of the current Derivation. + /// + /// On completion, self.environment[$outputName] and + /// self.outputs[$outputName].path are set to the calculated output path for all + /// outputs. + pub fn calculate_output_paths( + &mut self, + name: &str, + derivation_or_fod_hash: &NixHash, + ) -> Result<(), DerivationError> { + // The fingerprint and hash differs per output + for (output_name, output) in self.outputs.iter_mut() { + // Assert that outputs are not yet populated, to avoid using this function wrongly. + // We don't also go over self.environment, but it's a sufficient + // footgun prevention mechanism. + assert!(output.path.is_empty()); + + let path_name = output_path_name(name, output_name); + + // For fixed output derivation we use the per-output info, otherwise we use the + // derivation hash. + let abs_store_path = if let Some(ref hwm) = output.hash_with_mode { + build_regular_ca_path(&path_name, hwm, Vec::<String>::new(), false).map_err( + |e| DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e), + )? + } else { + build_output_path(derivation_or_fod_hash, output_name, &path_name).map_err(|e| { + DerivationError::InvalidOutputDerivationPath( + output_name.to_string(), + store_path::BuildStorePathError::InvalidStorePath(e), + ) + })? + }; + + output.path = abs_store_path.to_absolute_path(); + self.environment.insert( + output_name.to_string(), + abs_store_path.to_absolute_path().into(), + ); + } + + Ok(()) + } +} + +/// Calculate the name part of the store path of a derivation [Output]. +/// +/// It's the name, and (if it's the non-out output), the output name +/// after a `-`. +fn output_path_name(derivation_name: &str, output_name: &str) -> String { + let mut output_path_name = derivation_name.to_string(); + if output_name != "out" { + output_path_name.push('-'); + output_path_name.push_str(output_name); + } + output_path_name +} diff --git a/tvix/nix-compat/src/derivation/output.rs b/tvix/nix-compat/src/derivation/output.rs new file mode 100644 index 000000000000..982d8222e90e --- /dev/null +++ b/tvix/nix-compat/src/derivation/output.rs @@ -0,0 +1,146 @@ +use crate::derivation::OutputError; +use crate::nixhash::{HashAlgo, NixHashWithMode}; +use crate::store_path::StorePath; +use serde::{Deserialize, Serialize}; +use serde_json::Map; + +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)] +pub struct Output { + pub path: String, + + #[serde(flatten)] + pub hash_with_mode: Option<NixHashWithMode>, +} + +impl<'de> Deserialize<'de> for Output { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let fields = Map::deserialize(deserializer)?; + Ok(Self { + path: fields + .get("path") + .ok_or(serde::de::Error::missing_field( + "`path` is missing but required for outputs", + ))? + .as_str() + .ok_or(serde::de::Error::invalid_type( + serde::de::Unexpected::Other("certainly not a string"), + &"a string", + ))? + .to_owned(), + hash_with_mode: NixHashWithMode::from_map::<D>(&fields)?, + }) + } +} + +impl Output { + pub fn is_fixed(&self) -> bool { + self.hash_with_mode.is_some() + } + + pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> { + if let Some(hash) = &self.hash_with_mode { + match hash { + NixHashWithMode::Flat(h) | NixHashWithMode::Recursive(h) => { + if h.algo != HashAlgo::Sha1 || h.algo != HashAlgo::Sha256 { + return Err(OutputError::InvalidHashAlgo(h.algo.to_string())); + } + } + } + } + if validate_output_paths { + if let Err(e) = StorePath::from_absolute_path(self.path.as_bytes()) { + return Err(OutputError::InvalidOutputPath(self.path.to_string(), e)); + } + } + Ok(()) + } +} + +/// This ensures that a potentially valid input addressed +/// output is deserialized as a non-fixed output. +#[test] +fn deserialize_valid_input_addressed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablabla" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + assert!(!output.is_fixed()); +} + +/// This ensures that a potentially valid fixed output +/// output deserializes fine as a fixed output. +#[test] +fn deserialize_valid_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + assert!(output.is_fixed()); +} + +/// This ensures that parsing an input with the invalid hash encoding +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_encoding_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "IAMNOTVALIDNIXBASE32", + "hashAlgo": "r:sha256" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the wrong hash algo +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha1024" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash algo but present hash will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash but present hash algo will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hashAlgo": "r:sha1024" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv new file mode 100644 index 000000000000..a4fea3c5f486 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")],[],[],":",":",[],[("builder",":"),("name","bar"),("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("outputHash","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"),("outputHashAlgo","sha256"),("outputHashMode","recursive"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json new file mode 100644 index 000000000000..c8bbc4cbb5be --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "name": "bar", + "out": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar", + "outputHash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256", + "path": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv new file mode 100644 index 000000000000..f0d9230a5a52 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json","","")],[],[],":",":",[],[("builder",":"),("json","{\"hello\":\"moto\\n\"}"),("name","nested-json"),("out","/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json new file mode 100644 index 000000000000..9cb0b43b4c09 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json @@ -0,0 +1,19 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "json": "{\"hello\":\"moto\\n\"}", + "name": "nested-json", + "out": "/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv new file mode 100644 index 000000000000..a2cf9d31f92e --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")],[("/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("builder",":"),("name","foo"),("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json new file mode 100644 index 000000000000..957a85ccab82 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "bar": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar", + "builder": ":", + "name": "foo", + "out": "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo", + "system": ":" + }, + "inputDrvs": { + "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv": [ + "out" + ] + }, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv new file mode 100644 index 000000000000..bbe88c02c739 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode","","")],[],[],":",":",[],[("builder",":"),("letters","räksmörgås\nrødgrød med fløde\nLübeck\n肥猪\nこんにちは / 今日は\n🌮\n"),("name","unicode"),("out","/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json new file mode 100644 index 000000000000..f8f33c1bba17 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json @@ -0,0 +1,19 @@ +{ + "outputs": { + "out": { + "path": "/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "letters": "räksmörgås\nrødgrød med fløde\nLübeck\n肥猪\nこんにちは / 今日は\n🌮\n", + "name": "unicode", + "out": "/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode", + "system": ":" + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv new file mode 100644 index 000000000000..4b9338c0b953 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs","","")],[],[],":",":",[],[("__json","{\"builder\":\":\",\"name\":\"structured-attrs\",\"system\":\":\"}"),("out","/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json new file mode 100644 index 000000000000..74e3d7df55c5 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json @@ -0,0 +1,16 @@ +{ + "args": [], + "builder": ":", + "env": { + "__json": "{\"builder\":\":\",\"name\":\"structured-attrs\",\"system\":\":\"}", + "out": "/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv new file mode 100644 index 000000000000..1699c2a75e48 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo","","")],[("/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"),("builder",":"),("name","foo"),("out","/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json new file mode 100644 index 000000000000..831d27956d86 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "bar": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar", + "builder": ":", + "name": "foo", + "out": "/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo", + "system": ":" + }, + "inputDrvs": { + "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv": [ + "out" + ] + }, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv new file mode 100644 index 000000000000..523612238c76 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv @@ -0,0 +1 @@ +Derive([("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")],[],[],":",":",[],[("builder",":"),("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib"),("name","has-multi-out"),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out"),("outputs","out lib"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json new file mode 100644 index 000000000000..0bd7a2991cc7 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "lib": "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib", + "name": "has-multi-out", + "out": "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out", + "outputs": "out lib", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "lib": { + "path": "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib" + }, + "out": { + "path": "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv new file mode 100644 index 000000000000..6a7a35c58c3f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252","","")],[],[],":",":",[],[("builder",":"),("chars",""),("name","cp1252"),("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json new file mode 100644 index 000000000000..9d6ba8b7977f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv": { + "outputs": { + "out": { + "path": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "", + "name": "cp1252", + "out": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv new file mode 100644 index 000000000000..559e93ed0ed6 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar","r:sha1","0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33")],[],[],":",":",[],[("builder",":"),("name","bar"),("out","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"),("outputHash","0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"),("outputHashAlgo","sha1"),("outputHashMode","recursive"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json new file mode 100644 index 000000000000..e297d271592f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "name": "bar", + "out": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar", + "outputHash": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", + "outputHashAlgo": "sha1", + "outputHashMode": "recursive", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "hash": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", + "hashAlgo": "r:sha1", + "path": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv new file mode 100644 index 000000000000..b19fd8eb2ce4 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1","","")],[],[],":",":",[],[("builder",":"),("chars",""),("name","latin1"),("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json new file mode 100644 index 000000000000..ffd5c08da830 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv": { + "outputs": { + "out": { + "path": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "", + "name": "latin1", + "out": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs new file mode 100644 index 000000000000..de4ebb6cb20e --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -0,0 +1,419 @@ +use crate::derivation::output::Output; +use crate::derivation::Derivation; +use crate::store_path::StorePath; +use bstr::{BStr, BString}; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use std::str::FromStr; +use test_case::test_case; +use test_generator::test_resources; + +const RESOURCES_PATHS: &str = "src/derivation/tests/derivation_tests"; + +fn read_file(path: &str) -> BString { + let path = Path::new(path); + let mut file = File::open(path).unwrap(); + let mut file_contents = Vec::new(); + + file.read_to_end(&mut file_contents).unwrap(); + + file_contents.into() +} + +#[test_resources("src/derivation/tests/derivation_tests/*.drv")] +fn check_serialization(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + let mut serialized_derivation = Vec::new(); + derivation.serialize(&mut serialized_derivation).unwrap(); + + let expected = read_file(path_to_drv_file); + + assert_eq!(expected, BStr::new(&serialized_derivation)); +} + +#[test_resources("src/derivation/tests/derivation_tests/*.drv")] +fn validate(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + derivation + .validate(true) + .expect("derivation failed to validate") +} + +#[test_resources("src/derivation/tests/derivation_tests/*.drv")] +fn check_to_aterm_bytes(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + let expected = read_file(path_to_drv_file); + + assert_eq!(expected, BStr::new(&derivation.to_aterm_bytes())); +} + +#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] +#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")] +#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")] +#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")] +#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")] +#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")] +#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] +fn derivation_path(name: &str, expected_path: &str) { + let json_bytes = read_file(&format!("{}/{}.json", RESOURCES_PATHS, expected_path)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + assert_eq!( + derivation.calculate_derivation_path(name).unwrap(), + StorePath::from_str(expected_path).unwrap() + ); +} + +/// This trims all output paths from a Derivation struct, +/// by setting outputs[$outputName].path and environment[$outputName] to the empty string. +fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { + let mut trimmed_env = derivation.environment.clone(); + let mut trimmed_outputs = derivation.outputs.clone(); + + for (output_name, output) in &derivation.outputs { + trimmed_env.insert(output_name.clone(), "".into()); + assert!(trimmed_outputs.contains_key(output_name)); + trimmed_outputs.insert( + output_name.to_string(), + Output { + path: "".to_string(), + ..output.clone() + }, + ); + } + + // replace environment and outputs with the trimmed variants + Derivation { + environment: trimmed_env, + outputs: trimmed_outputs, + ..derivation.clone() + } +} + +#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")] +#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")] +fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) { + // read in the fixture + let json_bytes = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); + let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); + + let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); + + assert_eq!(expected_nix_hash_string, actual.to_nix_hash_string()); +} + +#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] +#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")] +#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")] +#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")] +#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")] +#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")] +#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] +fn output_paths(name: &str, drv_path: &str) { + // read in the fixture + let json_bytes = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); + let expected_derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("must deserialize"); + + let mut derivation = derivation_with_trimmed_output_paths(&expected_derivation); + + // calculate the derivation_or_fod_hash of derivation + // We don't expect the lookup function to be called for most derivations. + let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|parent_drv_path| { + // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv + // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv + if name == "foo" + && ((drv_path == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv" + && parent_drv_path == "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv") + || (drv_path == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv" + && parent_drv_path == "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")) + { + // do the lookup, by reading in the fixture of the requested + // drv_name, and calculating its drv replacement (on the non-stripped version) + // In a real-world scenario you would have already done this during construction. + + let json_bytes = read_file(&format!( + "{}/{}.json", + RESOURCES_PATHS, + Path::new(parent_drv_path) + .file_name() + .unwrap() + .to_string_lossy() + )); + + let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); + + // calculate derivation_or_fod_hash for each parent. + // This may not trigger subsequent requests, as both parents are FOD. + drv.derivation_or_fod_hash(|_| panic!("must not lookup")) + } else { + // we only expect this to be called in the "foo" testcase, for the "bar derivations" + panic!("may only be called for foo testcase on bar derivations"); + } + }); + + derivation + .calculate_output_paths(name, &calculated_derivation_or_fod_hash) + .unwrap(); + + // The derivation should now look like it was before + assert_eq!(expected_derivation, derivation); +} + +/// Exercises the output path calculation functions like a constructing client +/// (an implementation of builtins.derivation) would do: +/// +/// ```nix +/// rec { +/// bar = builtins.derivation { +/// name = "bar"; +/// builder = ":"; +/// system = ":"; +/// outputHash = "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"; +/// outputHashAlgo = "sha256"; +/// outputHashMode = "recursive"; +/// }; +/// +/// foo = builtins.derivation { +/// name = "foo"; +/// builder = ":"; +/// system = ":"; +/// inherit bar; +/// }; +/// } +/// ``` +/// It first assembles the bar derivation, does the output path calculation on +/// it, then continues with the foo derivation. +/// +/// The code ensures the resulting Derivations match our fixtures. +#[test] +fn output_path_construction() { + // create the bar derivation + let mut bar_drv = Derivation { + builder: ":".to_string(), + system: ":".to_string(), + ..Default::default() + }; + + // assemble bar env + let bar_env = &mut bar_drv.environment; + bar_env.insert("builder".to_string(), ":".into()); + bar_env.insert("name".to_string(), "bar".into()); + bar_env.insert("out".to_string(), "".into()); // will be calculated + bar_env.insert( + "outputHash".to_string(), + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba".into(), + ); + bar_env.insert("outputHashAlgo".to_string(), "sha256".into()); + bar_env.insert("outputHashMode".to_string(), "recursive".into()); + bar_env.insert("system".to_string(), ":".into()); + + // assemble bar outputs + bar_drv.outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), // will be calculated + hash_with_mode: Some(crate::nixhash::NixHashWithMode::Recursive( + ( + crate::nixhash::HashAlgo::Sha256, + data_encoding::HEXLOWER + .decode( + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" + .as_bytes(), + ) + .unwrap(), + ) + .try_into() + .unwrap(), + )), + }, + ); + + // calculate bar output paths + let bar_calc_result = bar_drv.calculate_output_paths( + "bar", + &bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")), + ); + assert!(bar_calc_result.is_ok()); + + // ensure it matches our bar fixture + let bar_json_bytes = read_file(&format!( + "{}/{}.json", + RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" + )); + let bar_drv_expected: Derivation = + serde_json::from_slice(&bar_json_bytes).expect("must deserialize"); + assert_eq!(bar_drv_expected, bar_drv); + + // now construct foo, which requires bar_drv + // Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar. + let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path; + let bar_drv_derivation_or_fod_hash = + bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")); + + let bar_drv_path = bar_drv + .calculate_derivation_path("bar") + .expect("must succeed"); + + // create foo derivation + let mut foo_drv = Derivation { + builder: ":".to_string(), + system: ":".to_string(), + ..Default::default() + }; + + // assemble foo env + let foo_env = &mut foo_drv.environment; + foo_env.insert("bar".to_string(), bar_output_path.to_owned().into()); + foo_env.insert("builder".to_string(), ":".into()); + foo_env.insert("name".to_string(), "foo".into()); + foo_env.insert("out".to_string(), "".into()); // will be calculated + foo_env.insert("system".to_string(), ":".into()); + + // asssemble foo outputs + foo_drv.outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), // will be calculated + hash_with_mode: None, + }, + ); + + // assemble foo input_derivations + foo_drv.input_derivations.insert( + bar_drv_path.to_absolute_path(), + BTreeSet::from(["out".to_string()]), + ); + + // calculate foo output paths + let foo_calc_result = foo_drv.calculate_output_paths( + "foo", + &foo_drv.derivation_or_fod_hash(|drv_path| { + if drv_path != "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { + panic!("lookup called with unexpected drv_path: {}", drv_path); + } + bar_drv_derivation_or_fod_hash.clone() + }), + ); + assert!(foo_calc_result.is_ok()); + + // ensure it matches our foo fixture + let foo_json_bytes = read_file(&format!( + "{}/{}.json", + RESOURCES_PATHS, "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv", + )); + let foo_drv_expected: Derivation = + serde_json::from_slice(&foo_json_bytes).expect("must deserialize"); + assert_eq!(foo_drv_expected, foo_drv); + + assert_eq!( + StorePath::from_str("4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv").expect("must succeed"), + foo_drv + .calculate_derivation_path("foo") + .expect("must succeed") + ); +} + +/// This constructs a Derivation using cp1252 encoding and ensures the +/// calculated derivation path matches the one Nix does calculate, as +/// well as the ATerm serialization. +/// We can't add this as a test_case to `output_paths`, as the JSON parser +/// refuses to parse our JSONs. +/// It looks like more recent versions of Nix also seem to not produce these +/// JSON files anymore, however, it still happily produces the .drv files in +/// the store. +#[test_case( + "cp1252", + vec![0xc5, 0xc4, 0xd6], + "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252", + "m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv"; + "cp1252" +)] +#[test_case( + "latin1", + vec![0xc5, 0xc4, 0xd6], + "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1", + "x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv"; + "latin1" +)] +fn non_unicode(name: &str, chars: Vec<u8>, exp_output_path: &str, exp_derivation_path: &str) { + // construct the Derivation + let mut outputs: BTreeMap<String, Output> = BTreeMap::new(); + outputs.insert( + "out".to_string(), + Output { + path: exp_output_path.to_string(), + ..Default::default() + }, + ); + + let mut environment: BTreeMap<String, BString> = BTreeMap::new(); + environment.insert("builder".to_string(), ":".into()); + environment.insert("chars".to_string(), chars.into()); + environment.insert("name".to_string(), name.into()); + environment.insert("out".to_string(), exp_output_path.into()); + environment.insert("system".to_string(), ":".into()); + let derivation: Derivation = Derivation { + builder: ":".to_string(), + environment, + outputs, + system: ":".to_string(), + ..Default::default() + }; + + // check the derivation_path matches what Nix calculated. + let actual_drv_path = derivation.calculate_derivation_path(name).unwrap(); + assert_eq!(exp_derivation_path.to_string(), actual_drv_path.to_string()); + + // Now wipe the output path info, and ensure we calculate the same output + // path. + { + let mut derivation = derivation_with_trimmed_output_paths(&derivation); + let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|_| { + panic!("No parents expected"); + }); + derivation + .calculate_output_paths(name, &calculated_derivation_or_fod_hash) + .unwrap(); + + assert_eq!( + exp_output_path.to_string(), + derivation.outputs.get("out").unwrap().path, + "expected calculated output path to match" + ); + } + + // Construct the ATerm representation and compare with our fixture. + { + let aterm_bytes = read_file(&format!("{}/{}", RESOURCES_PATHS, exp_derivation_path)); + assert_eq!( + aterm_bytes, + BStr::new(&derivation.to_aterm_bytes()), + "expected ATerm serialization to match", + ); + } +} diff --git a/tvix/nix-compat/src/derivation/validate.rs b/tvix/nix-compat/src/derivation/validate.rs new file mode 100644 index 000000000000..6b81c4c80b86 --- /dev/null +++ b/tvix/nix-compat/src/derivation/validate.rs @@ -0,0 +1,129 @@ +use crate::derivation::{Derivation, DerivationError}; +use crate::store_path::{self, StorePath}; + +impl Derivation { + /// validate ensures a Derivation struct is properly populated, + /// and returns a [DerivationError] if not. + /// + /// if `validate_output_paths` is set to false, the output paths are + /// excluded from validation. + /// + /// This is helpful to validate struct population before invoking + /// [Derivation::calculate_output_paths]. + pub fn validate(&self, validate_output_paths: bool) -> Result<(), DerivationError> { + // Ensure the number of outputs is > 1 + if self.outputs.is_empty() { + return Err(DerivationError::NoOutputs()); + } + + // Validate all outputs + for (output_name, output) in &self.outputs { + // empty output names are invalid. + // + // `drv` is an invalid output name too, as this would cause + // a `builtins.derivation` call to return an attrset with a + // `drvPath` key (which already exists) and has a different + // meaning. + // + // Other output names that don't match the name restrictions from + // [StorePath] will fail the [store_path::validate_name] check. + if output_name.is_empty() + || output_name == "drv" + || store_path::validate_name(output_name.as_bytes()).is_err() + { + return Err(DerivationError::InvalidOutputName(output_name.to_string())); + } + + if output.is_fixed() { + if self.outputs.len() != 1 { + return Err(DerivationError::MoreThanOneOutputButFixed()); + } + if output_name != "out" { + return Err(DerivationError::InvalidOutputNameForFixed( + output_name.to_string(), + )); + } + + break; + } + + if let Err(e) = output.validate(validate_output_paths) { + return Err(DerivationError::InvalidOutput(output_name.to_string(), e)); + } + } + + // Validate all input_derivations + for (input_derivation_path, output_names) in &self.input_derivations { + // Validate input_derivation_path + if let Err(e) = StorePath::from_absolute_path(input_derivation_path.as_bytes()) { + return Err(DerivationError::InvalidInputDerivationPath( + input_derivation_path.to_string(), + e, + )); + } + + if !input_derivation_path.ends_with(".drv") { + return Err(DerivationError::InvalidInputDerivationPrefix( + input_derivation_path.to_string(), + )); + } + + if output_names.is_empty() { + return Err(DerivationError::EmptyInputDerivationOutputNames( + input_derivation_path.to_string(), + )); + } + + for output_name in output_names.iter() { + // empty output names are invalid. + // + // `drv` is an invalid output name too, as this would cause + // a `builtins.derivation` call to return an attrset with a + // `drvPath` key (which already exists) and has a different + // meaning. + // + // Other output names that don't match the name restrictions from + // [StorePath] will fail the [StorePath::validate_name] check. + if output_name.is_empty() + || output_name == "drv" + || store_path::validate_name(output_name.as_bytes()).is_err() + { + return Err(DerivationError::InvalidInputDerivationOutputName( + input_derivation_path.to_string(), + output_name.to_string(), + )); + } + } + } + + // Validate all input_sources + for input_source in self.input_sources.iter() { + if let Err(e) = StorePath::from_absolute_path(input_source.as_bytes()) { + return Err(DerivationError::InvalidInputSourcesPath( + input_source.to_string(), + e, + )); + } + } + + // validate platform + if self.system.is_empty() { + return Err(DerivationError::InvalidPlatform(self.system.to_string())); + } + + // validate builder + if self.builder.is_empty() { + return Err(DerivationError::InvalidBuilder(self.builder.to_string())); + } + + // validate env, none of the keys may be empty. + // We skip the `name` validation seen in go-nix. + for k in self.environment.keys() { + if k.is_empty() { + return Err(DerivationError::InvalidEnvironmentKey(k.to_string())); + } + } + + Ok(()) + } +} diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs new file mode 100644 index 000000000000..9a6234424816 --- /dev/null +++ b/tvix/nix-compat/src/derivation/write.rs @@ -0,0 +1,200 @@ +//! This module implements the serialisation of derivations into the +//! [ATerm][] format used by C++ Nix. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html + +use crate::derivation::escape::escape_bytes; +use crate::derivation::output::Output; +use bstr::BString; +use std::{ + collections::{BTreeMap, BTreeSet}, + io, + io::Error, + io::Write, +}; + +pub const DERIVATION_PREFIX: &str = "Derive"; +pub const PAREN_OPEN: char = '('; +pub const PAREN_CLOSE: char = ')'; +pub const BRACKET_OPEN: char = '['; +pub const BRACKET_CLOSE: char = ']'; +pub const COMMA: char = ','; +pub const QUOTE: char = '"'; + +// Writes a character to the writer. +pub(crate) fn write_char(writer: &mut impl Write, c: char) -> io::Result<()> { + let mut buf = [0; 4]; + let b = c.encode_utf8(&mut buf).as_bytes(); + writer.write_all(b) +} + +// Write a string `s` as a quoted field to the writer. +// The `escape` argument controls whether escaping will be skipped. +// This is the case if `s` is known to only contain characters that need no +// escaping. +pub(crate) fn write_field<S: AsRef<[u8]>>( + writer: &mut impl Write, + s: S, + escape: bool, +) -> io::Result<()> { + write_char(writer, QUOTE)?; + + if !escape { + writer.write_all(s.as_ref())?; + } else { + writer.write_all(&escape_bytes(s.as_ref()))?; + } + + write_char(writer, QUOTE)?; + + Ok(()) +} + +fn write_array_elements<S: AsRef<[u8]>>( + writer: &mut impl Write, + elements: &[S], +) -> Result<(), io::Error> { + for (index, element) in elements.iter().enumerate() { + if index > 0 { + write_char(writer, COMMA)?; + } + + write_field(writer, element, true)?; + } + + Ok(()) +} + +pub fn write_outputs( + writer: &mut impl Write, + outputs: &BTreeMap<String, Output>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + for (ii, (output_name, output)) in outputs.iter().enumerate() { + if ii > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + + let mut elements: Vec<&str> = vec![output_name, &output.path]; + + let (e2, e3) = match &output.hash_with_mode { + Some(hash) => match hash { + crate::nixhash::NixHashWithMode::Flat(h) => ( + h.algo.to_string(), + data_encoding::HEXLOWER.encode(&h.digest), + ), + crate::nixhash::NixHashWithMode::Recursive(h) => ( + format!("r:{}", h.algo), + data_encoding::HEXLOWER.encode(&h.digest), + ), + }, + None => ("".to_string(), "".to_string()), + }; + + elements.push(&e2); + elements.push(&e3); + + write_array_elements(writer, &elements)?; + + write_char(writer, PAREN_CLOSE)?; + } + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_input_derivations( + writer: &mut impl Write, + input_derivations: &BTreeMap<String, BTreeSet<String>>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + + for (ii, (input_derivation_path, input_derivation)) in input_derivations.iter().enumerate() { + if ii > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + write_field(writer, input_derivation_path.as_str(), false)?; + write_char(writer, COMMA)?; + + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &input_derivation + .iter() + .map(String::as_bytes) + .collect::<Vec<_>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + write_char(writer, PAREN_CLOSE)?; + } + + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_input_sources( + writer: &mut impl Write, + input_sources: &BTreeSet<String>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &input_sources.iter().map(String::from).collect::<Vec<_>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_system(writer: &mut impl Write, platform: &str) -> Result<(), Error> { + write_field(writer, platform, true)?; + Ok(()) +} + +pub fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), Error> { + write_field(writer, builder, true)?; + Ok(()) +} + +pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &arguments + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::<Vec<BString>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_enviroment( + writer: &mut impl Write, + environment: &BTreeMap<String, BString>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + + for (i, (k, v)) in environment.iter().enumerate() { + if i > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + write_field(writer, k, false)?; + write_char(writer, COMMA)?; + write_field(writer, v, true)?; + write_char(writer, PAREN_CLOSE)?; + } + + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} diff --git a/tvix/nix-compat/src/lib.rs b/tvix/nix-compat/src/lib.rs new file mode 100644 index 000000000000..37d79f38fb37 --- /dev/null +++ b/tvix/nix-compat/src/lib.rs @@ -0,0 +1,5 @@ +pub mod derivation; +pub mod nar; +pub mod nixbase32; +pub mod nixhash; +pub mod store_path; diff --git a/tvix/nix-compat/src/nar/mod.rs b/tvix/nix-compat/src/nar/mod.rs new file mode 100644 index 000000000000..d3baa817825a --- /dev/null +++ b/tvix/nix-compat/src/nar/mod.rs @@ -0,0 +1 @@ +pub mod writer; diff --git a/tvix/nix-compat/src/nar/writer/mod.rs b/tvix/nix-compat/src/nar/writer/mod.rs new file mode 100644 index 000000000000..f018e4212e80 --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/mod.rs @@ -0,0 +1,220 @@ +//! Implements an interface for writing the Nix archive format (NAR). +//! +//! NAR files (and their hashed representations) are used in C++ Nix for +//! addressing fixed-output derivations and a variety of other things. +//! +//! NAR files can be output to any type that implements [`Write`], and content +//! can be read from any type that implementes [`BufRead`]. +//! +//! Writing a single file might look like this: +//! +//! ```rust +//! # use std::io::BufReader; +//! # let some_file: Vec<u8> = vec![0, 1, 2, 3, 4]; +//! +//! // Output location to write the NAR to. +//! let mut sink: Vec<u8> = Vec::new(); +//! +//! // Instantiate writer for this output location. +//! let mut nar = nix_compat::nar::writer::open(&mut sink)?; +//! +//! // Acquire metadata for the single file to output, and pass it in a +//! // `BufRead`-implementing type. +//! +//! let executable = false; +//! let size = some_file.len() as u64; +//! let mut reader = BufReader::new(some_file.as_slice()); +//! nar.file(executable, size, &mut reader)?; +//! # Ok::<(), std::io::Error>(()) +//! ``` + +use std::io::{ + self, BufRead, + ErrorKind::{InvalidInput, UnexpectedEof}, + Write, +}; + +mod wire; + +/// Convenience type alias for types implementing [`Write`]. +pub type Writer<'a> = dyn Write + Send + 'a; + +/// Create a new NAR, writing the output to the specified writer. +pub fn open<'a, 'w: 'a>(writer: &'a mut Writer<'w>) -> io::Result<Node<'a, 'w>> { + let mut node = Node { writer }; + node.write(&wire::TOK_NAR)?; + Ok(node) +} + +/// Single node in a NAR file. +/// +/// A NAR can be thought of as a tree of nodes represented by this type. Each +/// node can be a file, a symlink or a directory containing other nodes. +pub struct Node<'a, 'w: 'a> { + writer: &'a mut Writer<'w>, +} + +impl<'a, 'w> Node<'a, 'w> { + fn write(&mut self, data: &[u8]) -> io::Result<()> { + self.writer.write_all(data) + } + + fn pad(&mut self, n: u64) -> io::Result<()> { + match (n & 7) as usize { + 0 => Ok(()), + n => self.write(&[0; 8][n..]), + } + } + + /// Make this node a symlink. + pub fn symlink(mut self, target: &[u8]) -> io::Result<()> { + debug_assert!( + target.len() <= wire::MAX_TARGET_LEN, + "target.len() > {}", + wire::MAX_TARGET_LEN + ); + debug_assert!( + !target.contains(&b'\0'), + "invalid target characters: {target:?}" + ); + debug_assert!(!target.is_empty(), "empty target"); + + self.write(&wire::TOK_SYM)?; + self.write(&target.len().to_le_bytes())?; + self.write(target)?; + self.pad(target.len() as u64)?; + self.write(&wire::TOK_PAR)?; + Ok(()) + } + + /// Make this node a single file. + pub fn file(mut self, executable: bool, size: u64, reader: &mut dyn BufRead) -> io::Result<()> { + self.write(if executable { + &wire::TOK_EXE + } else { + &wire::TOK_REG + })?; + + self.write(&size.to_le_bytes())?; + + let mut need = size; + while need != 0 { + let data = reader.fill_buf()?; + + if data.is_empty() { + return Err(UnexpectedEof.into()); + } + + let n = need.min(data.len() as u64) as usize; + self.write(&data[..n])?; + + need -= n as u64; + reader.consume(n); + } + + // bail if there's still data left in the passed reader. + // This uses the same code as [BufRead::has_data_left] (unstable). + if reader.fill_buf().map(|b| !b.is_empty())? { + return Err(io::Error::new( + InvalidInput, + "reader contained more data than specified size", + )); + } + + self.pad(size)?; + self.write(&wire::TOK_PAR)?; + + Ok(()) + } + + /// Make this node a directory, the content of which is set using the + /// resulting [`Directory`] value. + pub fn directory(mut self) -> io::Result<Directory<'a, 'w>> { + self.write(&wire::TOK_DIR)?; + Ok(Directory::new(self)) + } +} + +#[cfg(debug_assertions)] +type Name = Vec<u8>; +#[cfg(not(debug_assertions))] +type Name = (); + +fn into_name(_name: &[u8]) -> Name { + #[cfg(debug_assertions)] + _name.to_owned() +} + +/// Content of a NAR node that represents a directory. +pub struct Directory<'a, 'w> { + node: Node<'a, 'w>, + prev_name: Option<Name>, +} + +impl<'a, 'w> Directory<'a, 'w> { + fn new(node: Node<'a, 'w>) -> Self { + Self { + node, + prev_name: None, + } + } + + /// Add an entry to the directory. + /// + /// The entry is simply another [`Node`], which can then be filled like the + /// root of a NAR (including, of course, by nesting directories). + pub fn entry(&mut self, name: &[u8]) -> io::Result<Node<'_, 'w>> { + debug_assert!( + name.len() <= wire::MAX_NAME_LEN, + "name.len() > {}", + wire::MAX_NAME_LEN + ); + debug_assert!(name != b"", "name may not be empty"); + debug_assert!(name != b".", "invalid name: {name:?}"); + debug_assert!(name != b"..", "invalid name: {name:?}"); + + debug_assert!(!name.contains(&b'/'), "invalid name characters: {name:?}"); + debug_assert!(!name.contains(&b'\0'), "invalid name characters: {name:?}"); + + match self.prev_name { + None => { + self.prev_name = Some(into_name(name)); + } + Some(ref mut _prev_name) => { + #[cfg(debug_assertions)] + { + assert!( + &**_prev_name < name, + "misordered names: {_prev_name:?} >= {name:?}" + ); + _prev_name.clear(); + _prev_name.append(&mut name.to_vec()); + } + self.node.write(&wire::TOK_PAR)?; + } + } + + self.node.write(&wire::TOK_ENT)?; + self.node.write(&name.len().to_le_bytes())?; + self.node.write(name)?; + self.node.pad(name.len() as u64)?; + self.node.write(&wire::TOK_NOD)?; + + Ok(Node { + writer: &mut *self.node.writer, + }) + } + + /// Close a directory and write terminators for the directory to the NAR. + /// + /// **Important:** This *must* be called when all entries have been written + /// in a directory, otherwise the resulting NAR file will be invalid. + pub fn close(mut self) -> io::Result<()> { + if self.prev_name.is_some() { + self.node.write(&wire::TOK_PAR)?; + } + + self.node.write(&wire::TOK_PAR)?; + Ok(()) + } +} diff --git a/tvix/nix-compat/src/nar/writer/wire.rs b/tvix/nix-compat/src/nar/writer/wire.rs new file mode 100644 index 000000000000..98581ae3aa7c --- /dev/null +++ b/tvix/nix-compat/src/nar/writer/wire.rs @@ -0,0 +1,46 @@ +pub const MAX_NAME_LEN: usize = 255; +pub const MAX_TARGET_LEN: usize = 4095; + +#[cfg(test)] +fn token(xs: &[&str]) -> Vec<u8> { + let mut out = vec![]; + for x in xs { + let len = x.len() as u64; + out.extend_from_slice(&len.to_le_bytes()); + out.extend_from_slice(x.as_bytes()); + + let n = x.len() & 7; + if n != 0 { + const ZERO: [u8; 8] = [0; 8]; + out.extend_from_slice(&ZERO[n..]); + } + } + out +} + +pub const TOK_NAR: [u8; 56] = *b"\x0d\0\0\0\0\0\0\0nix-archive-1\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_REG: [u8; 32] = *b"\x07\0\0\0\0\0\0\0regular\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_EXE: [u8; 64] = *b"\x07\0\0\0\0\0\0\0regular\0\x0a\0\0\0\0\0\0\0executable\0\0\0\0\0\0\0\0\0\0\0\0\0\0\x08\0\0\0\0\0\0\0contents"; +pub const TOK_SYM: [u8; 32] = *b"\x07\0\0\0\0\0\0\0symlink\0\x06\0\0\0\0\0\0\0target\0\0"; +pub const TOK_DIR: [u8; 24] = *b"\x09\0\0\0\0\0\0\0directory\0\0\0\0\0\0\0"; +pub const TOK_ENT: [u8; 48] = *b"\x05\0\0\0\0\0\0\0entry\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0name\0\0\0\0"; +pub const TOK_NOD: [u8; 48] = *b"\x04\0\0\0\0\0\0\0node\0\0\0\0\x01\0\0\0\0\0\0\0(\0\0\0\0\0\0\0\x04\0\0\0\0\0\0\0type\0\0\0\0"; +pub const TOK_PAR: [u8; 16] = *b"\x01\0\0\0\0\0\0\0)\0\0\0\0\0\0\0"; + +#[test] +fn tokens() { + let cases: &[(&[u8], &[&str])] = &[ + (&TOK_NAR, &["nix-archive-1", "(", "type"]), + (&TOK_REG, &["regular", "contents"]), + (&TOK_EXE, &["regular", "executable", "", "contents"]), + (&TOK_SYM, &["symlink", "target"]), + (&TOK_DIR, &["directory"]), + (&TOK_ENT, &["entry", "(", "name"]), + (&TOK_NOD, &["node", "(", "type"]), + (&TOK_PAR, &[")"]), + ]; + + for &(tok, xs) in cases { + assert_eq!(tok, token(xs)); + } +} diff --git a/tvix/nix-compat/src/nixbase32.rs b/tvix/nix-compat/src/nixbase32.rs new file mode 100644 index 000000000000..7fef1d673e2f --- /dev/null +++ b/tvix/nix-compat/src/nixbase32.rs @@ -0,0 +1,167 @@ +//! Implements the slightly odd "base32" encoding that's used in Nix. +//! +//! Nix uses a custom alphabet. Contrary to other implementations (RFC4648), +//! encoding to "nix base32" doesn't use any padding, and reads in characters +//! in reverse order. +//! +//! This is also the main reason why we can't use `data_encoding::Encoding` - +//! it gets things wrong if there normally would be a need for padding. + +use std::fmt::Write; + +use thiserror::Error; + +const ALPHABET: &[u8; 32] = b"0123456789abcdfghijklmnpqrsvwxyz"; + +/// Errors that can occur while decoding nixbase32-encoded data. +#[derive(Debug, Eq, PartialEq, Error)] +pub enum Nixbase32DecodeError { + #[error("character {0:x} not in alphabet")] + CharacterNotInAlphabet(u8), + #[error("nonzero carry")] + NonzeroCarry(), +} + +/// Returns encoded input +pub fn encode(input: &[u8]) -> String { + let output_len = encode_len(input.len()); + let mut output = String::with_capacity(output_len); + + if output_len > 0 { + for n in (0..=output_len - 1).rev() { + let b = n * 5; // bit offset within the entire input + let i = b / 8; // input byte index + let j = b % 8; // bit offset within that input byte + + let mut c = input[i] >> j; + if i + 1 < input.len() { + // we want to right shift, and discard shifted out bits (unchecked) + // To do this without panicing, we need to do the shifting in u16 + // and convert back to u8 afterwards. + c |= ((input[i + 1] as u16) << (8 - j as u16)) as u8 + } + + output + .write_char(ALPHABET[(c & 0x1f) as usize] as char) + .unwrap(); + } + } + + output +} + +/// This maps a nixbase32-encoded character to its binary representation, which +/// is also the index of the character in the alphabet. +fn decode_char(encoded_char: &u8) -> Option<u8> { + Some(match encoded_char { + b'0'..=b'9' => encoded_char - b'0', + b'a'..=b'd' => encoded_char - b'a' + 10_u8, + b'f'..=b'n' => encoded_char - b'f' + 14_u8, + b'p'..=b's' => encoded_char - b'p' + 23_u8, + b'v'..=b'z' => encoded_char - b'v' + 27_u8, + _ => return None, + }) +} + +/// Returns decoded input +pub fn decode(input: &[u8]) -> Result<Vec<u8>, Nixbase32DecodeError> { + let output_len = decode_len(input.len()); + let mut output: Vec<u8> = vec![0x00; output_len]; + + // loop over all characters in reverse, and keep the iteration count in n. + for (n, c) in input.iter().rev().enumerate() { + match decode_char(c) { + None => return Err(Nixbase32DecodeError::CharacterNotInAlphabet(*c)), + Some(c_decoded) => { + let b = n * 5; + let i = b / 8; + let j = b % 8; + + let val = (c_decoded as u16).rotate_left(j as u32); + output[i] |= (val & 0x00ff) as u8; + let carry = ((val & 0xff00) >> 8) as u8; + + // if we're at the end of dst… + if i == output_len - 1 { + // but have a nonzero carry, the encoding is invalid. + if carry != 0 { + return Err(Nixbase32DecodeError::NonzeroCarry()); + } + } else { + output[i + 1] |= carry; + } + } + } + } + + Ok(output) +} + +/// Returns the decoded length of an input of length len. +pub fn decode_len(len: usize) -> usize { + (len * 5) / 8 +} + +/// Returns the encoded length of an input of length len +pub fn encode_len(len: usize) -> usize { + if len == 0 { + return 0; + } + (len * 8 - 1) / 5 + 1 +} + +#[cfg(test)] +mod tests { + use test_case::test_case; + + #[test_case("", vec![] ; "empty bytes")] + #[test_case("0z", vec![0x1f]; "one byte")] + #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", vec![ + 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, + 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]; "store path")] + #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", vec![ + 0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30, + 0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30, + ]; "sha256")] + fn encode(enc: &str, dec: Vec<u8>) { + assert_eq!(enc, super::encode(&dec)); + } + + #[test_case("", Some(vec![]) ; "empty bytes")] + #[test_case("0z", Some(vec![0x1f]); "one byte")] + #[test_case("00bgd045z0d4icpbc2yyz4gx48ak44la", Some(vec![ + 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, + 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00]); "store path")] + #[test_case("0c5b8vw40dy178xlpddw65q9gf1h2186jcc3p4swinwggbllv8mk", Some(vec![ + 0xb3, 0xa2, 0x4d, 0xe9, 0x7a, 0x8f, 0xdb, 0xc8, 0x35, 0xb9, 0x83, 0x31, 0x69, 0x50, 0x10, 0x30, + 0xb8, 0x97, 0x70, 0x31, 0xbc, 0xb5, 0x4b, 0x3b, 0x3a, 0xc1, 0x37, 0x40, 0xf8, 0x46, 0xab, 0x30, + ]); "sha256")] + // this is invalid encoding, because it encodes 10 1-bits, so the carry + // would be 2 1-bits + #[test_case("zz", None; "invalid encoding-1")] + // this is an even more specific example - it'd decode as 00000000 11 + #[test_case("c0", None; "invalid encoding-2")] + + fn decode(enc: &str, dec: Option<Vec<u8>>) { + match dec { + Some(dec) => { + // The decode needs to match what's passed in dec + assert_eq!(dec, super::decode(enc.as_bytes()).unwrap()); + } + None => { + // the decode needs to be an error + assert!(super::decode(enc.as_bytes()).is_err()); + } + } + } + + #[test] + fn encode_len() { + assert_eq!(super::encode_len(20), 32) + } + + #[test] + fn decode_len() { + assert_eq!(super::decode_len(32), 20) + } +} diff --git a/tvix/nix-compat/src/nixhash/algos.rs b/tvix/nix-compat/src/nixhash/algos.rs new file mode 100644 index 000000000000..45cc242dc330 --- /dev/null +++ b/tvix/nix-compat/src/nixhash/algos.rs @@ -0,0 +1,44 @@ +use std::fmt::Display; + +use serde::{Deserialize, Serialize}; + +use crate::nixhash::Error; + +/// This are the hash algorithms supported by cppnix. +#[derive(Clone, Copy, Debug, Eq, PartialEq, Serialize, Deserialize)] +pub enum HashAlgo { + Md5, + Sha1, + Sha256, + Sha512, +} + +impl Display for HashAlgo { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match &self { + HashAlgo::Md5 => write!(f, "md5"), + HashAlgo::Sha1 => write!(f, "sha1"), + HashAlgo::Sha256 => write!(f, "sha256"), + HashAlgo::Sha512 => write!(f, "sha512"), + } + } +} + +/// TODO(Raito): this could be automated via macros, I suppose. +/// But this may be more expensive than just doing it by hand +/// and ensuring that is kept in sync. +pub const SUPPORTED_ALGOS: [&str; 4] = ["md5", "sha1", "sha256", "sha512"]; + +impl TryFrom<&str> for HashAlgo { + type Error = Error; + + fn try_from(algo_str: &str) -> Result<Self, Self::Error> { + match algo_str { + "md5" => Ok(Self::Md5), + "sha1" => Ok(Self::Sha1), + "sha256" => Ok(Self::Sha256), + "sha512" => Ok(Self::Sha512), + _ => Err(Error::InvalidAlgo(algo_str.to_string())), + } + } +} diff --git a/tvix/nix-compat/src/nixhash/mod.rs b/tvix/nix-compat/src/nixhash/mod.rs new file mode 100644 index 000000000000..1a8c0534b08a --- /dev/null +++ b/tvix/nix-compat/src/nixhash/mod.rs @@ -0,0 +1,466 @@ +use crate::nixbase32; +use data_encoding::{BASE64, BASE64_NOPAD, HEXLOWER}; +use thiserror; + +mod algos; +mod with_mode; + +pub use algos::HashAlgo; +pub use with_mode::NixHashWithMode; + +/// Nix allows specifying hashes in various encodings, and magically just +/// derives the encoding. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct NixHash { + pub digest: Vec<u8>, + + pub algo: HashAlgo, +} + +impl NixHash { + /// Formats a [NixHash] in the Nix default hash format, + /// which is the algo, followed by a colon, then the lower hex encoded digest. + pub fn to_nix_hash_string(&self) -> String { + format!("{}:{}", self.algo, HEXLOWER.encode(&self.digest)) + } +} + +impl TryFrom<(HashAlgo, Vec<u8>)> for NixHash { + type Error = Error; + + /// Constructs a new [NixHash] by specifying [HashAlgo] and digest. + // It can fail if the passed digest length doesn't match what's expected for + // the passed algo. + fn try_from(value: (HashAlgo, Vec<u8>)) -> Result<Self, Self::Error> { + let (algo, digest) = value; + if digest.len() != hash_algo_length(&algo) { + return Err(Error::InvalidEncodedDigestLength(digest.len(), algo)); + } + Ok(Self { algo, digest }) + } +} + +/// Errors related to NixHash construction. +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("invalid hash algo: {0}")] + InvalidAlgo(String), + #[error("invalid SRI string: {0}")] + InvalidSRI(String), + #[error("invalid encoded digest length '{0}' for algo {1}")] + InvalidEncodedDigestLength(usize, HashAlgo), + #[error("invalid base16 encoding: {0}")] + InvalidBase16Encoding(data_encoding::DecodeError), + #[error("invalid base32 encoding: {0}")] + InvalidBase32Encoding(nixbase32::Nixbase32DecodeError), + #[error("invalid base64 encoding: {0}")] + InvalidBase64Encoding(data_encoding::DecodeError), + #[error("conflicting hash algo: {0} (hash_algo) vs {1} (inline)")] + ConflictingHashAlgos(String, String), + #[error("missing inline hash algo, but no externally-specified algo: {0}")] + MissingInlineHashAlgo(String), +} + +/// parses a string to a nix hash. +/// +/// Hashes can be: +/// - Nix hash strings +/// - SRI hashes +/// - bare digests +/// +/// Encoding for Nix hash strings or bare digests can be: +/// - base16 (lowerhex), +/// - nixbase32, +/// - base64 (StdEncoding) +/// - sri string +/// +/// The encoding is derived from the length of the string and the hash type. +/// The hash is communicated out-of-band, but might also be in-band (in the +/// case of a nix hash string or SRI), in which it needs to be consistent with the +/// one communicated out-of-band. +pub fn from_str(s: &str, algo_str: Option<&str>) -> Result<NixHash, Error> { + // validate algo_str, construct hash_algo + let algo: Option<HashAlgo> = match &algo_str { + Some("sha1") => Some(HashAlgo::Sha1), + Some("sha256") => Some(HashAlgo::Sha256), + Some("sha512") => Some(HashAlgo::Sha512), + Some("md5") => Some(HashAlgo::Md5), + Some(e) => return Err(Error::InvalidAlgo(e.to_string())), + None => None, + }; + + // Peek at the beginning of the string to detect SRI hashes. + if s.starts_with("sha1-") + || s.starts_with("sha256-") + || s.starts_with("sha512-") + || s.starts_with("md5-") + { + let parsed_nixhash = from_sri_str(s)?; + // ensure the algo matches with what has been passed externally, if so. + if let Some(algo) = algo { + if algo != parsed_nixhash.algo { + return Err(Error::ConflictingHashAlgos( + algo.to_string(), + parsed_nixhash.algo.to_string(), + )); + } + } + return Ok(parsed_nixhash); + } + + // Peek at the beginning again to see if it's a Nix Hash + if s.starts_with("sha1:") + || s.starts_with("sha256:") + || s.starts_with("sha512:") + || s.starts_with("md5:") + { + let parsed_nixhash = from_nix_str(s)?; + // ensure the algo matches with what has been passed externally, if so. + if let Some(algo) = algo { + if algo != parsed_nixhash.algo { + return Err(Error::ConflictingHashAlgos( + algo.to_string(), + parsed_nixhash.algo.to_string(), + )); + } + } + return Ok(parsed_nixhash); + } + + // Neither of these, assume a bare digest, so there MUST be an externally-passed algo. + match algo { + // Fail if there isn't. + None => Err(Error::MissingInlineHashAlgo(s.to_string())), + Some(algo) => decode_digest(s, algo), + } +} + +/// Parses a Nix hash string ($algo:$digest) to a NixHash. +pub fn from_nix_str(s: &str) -> Result<NixHash, Error> { + if let Some(rest) = s.strip_prefix("sha1:") { + decode_digest(rest, HashAlgo::Sha1) + } else if let Some(rest) = s.strip_prefix("sha256:") { + decode_digest(rest, HashAlgo::Sha256) + } else if let Some(rest) = s.strip_prefix("sha512:") { + decode_digest(rest, HashAlgo::Sha512) + } else if let Some(rest) = s.strip_prefix("md5:") { + decode_digest(rest, HashAlgo::Md5) + } else { + Err(Error::InvalidAlgo(s.to_string())) + } +} + +/// Parses a Nix SRI string to a NixHash. +/// Contrary to the SRI spec, Nix doesn't support SRI strings with multiple hashes, +/// only supports sha256 and sha512 from the spec, and supports sha1 and md5 +/// additionally. +/// It also accepts SRI strings where the base64 has an with invalid padding. +pub fn from_sri_str(s: &str) -> Result<NixHash, Error> { + // try to find the first occurence of "-" + let idx = s.as_bytes().iter().position(|&e| e == b'-'); + + if idx.is_none() { + return Err(Error::InvalidSRI(s.to_string())); + } + + let idx = idx.unwrap(); + + // try to map the part before that `-` to a supported hash algo: + let algo: HashAlgo = s[..idx].try_into()?; + + // the rest should be the digest (as Nix doesn't support more than one hash in an SRI string). + let encoded_digest = &s[idx + 1..]; + let actual_len = encoded_digest.as_bytes().len(); + + // verify the digest length matches what we'd expect from the hash function, + // and then either try decoding as BASE64 or BASE64_NOPAD. + // This will also reject SRI strings with more than one hash, because the length won't match + if actual_len == BASE64.encode_len(hash_algo_length(&algo)) { + let digest: Vec<u8> = BASE64 + .decode(encoded_digest.as_bytes()) + .map_err(Error::InvalidBase64Encoding)?; + Ok(NixHash { digest, algo }) + } else if actual_len == BASE64_NOPAD.encode_len(hash_algo_length(&algo)) { + let digest: Vec<u8> = BASE64_NOPAD + .decode(encoded_digest.as_bytes()) + .map_err(Error::InvalidBase64Encoding)?; + Ok(NixHash { digest, algo }) + } else { + // NOTE: As of now, we reject SRI hashes containing additional + // characters (which upstream Nix seems to simply truncate), as + // there's no occurence of this is in nixpkgs. + // It most likely should also be a bug in Nix. + Err(Error::InvalidEncodedDigestLength( + encoded_digest.as_bytes().len(), + algo, + )) + } +} + +/// Decode a plain digest depending on the hash algo specified externally. +/// hexlower, nixbase32 and base64 encodings are supported - the encoding is +/// inferred from the input length. +fn decode_digest(s: &str, algo: HashAlgo) -> Result<NixHash, Error> { + // for the chosen hash algo, calculate the expected (decoded) digest length + // (as bytes) + let expected_digest_len = hash_algo_length(&algo); + + Ok(NixHash { + digest: match s.len() { + n if n == data_encoding::HEXLOWER.encode_len(expected_digest_len) => { + data_encoding::HEXLOWER + .decode(s.as_ref()) + .map_err(Error::InvalidBase16Encoding) + } + n if n == nixbase32::encode_len(expected_digest_len) => { + nixbase32::decode(s.as_ref()).map_err(Error::InvalidBase32Encoding) + } + n if n == BASE64.encode_len(expected_digest_len) => BASE64 + .decode(s.as_ref()) + .map_err(Error::InvalidBase64Encoding), + _ => return Err(Error::InvalidEncodedDigestLength(s.len(), algo)), + }?, + algo, + }) +} + +// return the number of bytes in the digest of the given hash algo. +fn hash_algo_length(hash_algo: &HashAlgo) -> usize { + match hash_algo { + HashAlgo::Sha1 => 20, + HashAlgo::Sha256 => 32, + HashAlgo::Sha512 => 64, + HashAlgo::Md5 => 16, + } +} + +#[cfg(test)] +mod tests { + use crate::{ + nixbase32, + nixhash::{self, HashAlgo, NixHash}, + }; + use test_case::test_case; + const DIGEST_SHA1: &[u8] = &[ + 0x60, 0x16, 0x77, 0x79, 0x97, 0xc3, 0x0a, 0xb0, 0x24, 0x13, 0xcf, 0x50, 0x95, 0x62, 0x2c, + 0xd7, 0x92, 0x42, 0x83, 0xac, + ]; + + const DIGEST_SHA256: &[u8] = &[ + 0xa5, 0xce, 0x9c, 0x15, 0x5e, 0xd0, 0x93, 0x97, 0x61, 0x46, 0x46, 0xc9, 0x71, 0x7f, 0xc7, + 0xcd, 0x94, 0xb1, 0x02, 0x3d, 0x7b, 0x76, 0xb6, 0x18, 0xd4, 0x09, 0xe4, 0xfe, 0xfd, 0x6e, + 0x9d, 0x39, + ]; + + const DIGEST_SHA512: &[u8] = &[ + 0xab, 0x40, 0xd0, 0xbe, 0x35, 0x41, 0xf0, 0x77, 0x4b, 0xba, 0x78, 0x15, 0xd1, 0x3d, 0x10, + 0xb0, 0x32, 0x52, 0xe9, 0x6e, 0x95, 0xf7, 0xdb, 0xb4, 0xee, 0x99, 0xa3, 0xb4, 0x31, 0xc2, + 0x16, 0x62, 0xfd, 0x69, 0x71, 0xa0, 0x20, 0x16, 0x0e, 0x39, 0x84, 0x8a, 0xa5, 0xf3, 0x05, + 0xb9, 0xbe, 0x0f, 0x78, 0x72, 0x7b, 0x2b, 0x07, 0x89, 0xe3, 0x9f, 0x12, 0x4d, 0x21, 0xe9, + 0x2b, 0x8f, 0x39, 0xef, + ]; + const DIGEST_MD5: &[u8] = &[ + 0xc4, 0x87, 0x4a, 0x88, 0x97, 0x44, 0x0b, 0x39, 0x3d, 0x86, 0x2d, 0x8f, 0xd4, 0x59, 0x07, + 0x3f, + ]; + + fn to_base16(digest: &[u8]) -> String { + data_encoding::HEXLOWER.encode(digest) + } + + fn to_nixbase32(digest: &[u8]) -> String { + nixbase32::encode(digest) + } + + fn to_base64(digest: &[u8]) -> String { + data_encoding::BASE64.encode(digest) + } + + fn to_base64_nopad(digest: &[u8]) -> String { + data_encoding::BASE64_NOPAD.encode(digest) + } + + // TODO + fn make_nixhash(algo: &HashAlgo, digest_encoded: String) -> String { + format!("{}:{}", algo, digest_encoded) + } + fn make_sri_string(algo: &HashAlgo, digest_encoded: String) -> String { + format!("{}-{}", algo, digest_encoded) + } + + /// Test parsing a hash string in various formats, and also when/how the out-of-band algo is needed. + #[test_case(DIGEST_SHA1, HashAlgo::Sha1; "sha1")] + #[test_case(DIGEST_SHA256, HashAlgo::Sha256; "sha256")] + #[test_case(DIGEST_SHA512, HashAlgo::Sha512; "sha512")] + #[test_case(DIGEST_MD5, HashAlgo::Md5; "md5")] + fn from_str(digest: &[u8], algo: HashAlgo) { + let expected_hash = NixHash { + digest: digest.to_vec(), + algo, + }; + // parse SRI + { + // base64 without out-of-band algo + let s = make_sri_string(&algo, to_base64(digest)); + let h = nixhash::from_str(&s, None).expect("must succeed"); + assert_eq!(expected_hash, h); + + // base64 with out-of-band-algo + let s = make_sri_string(&algo, to_base64(digest)); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + + // base64_nopad without out-of-band algo + let s = make_sri_string(&algo, to_base64_nopad(digest)); + let h = nixhash::from_str(&s, None).expect("must succeed"); + assert_eq!(expected_hash, h); + + // base64_nopad with out-of-band-algo + let s = make_sri_string(&algo, to_base64_nopad(digest)); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + } + + // parse plain base16. should succeed with algo out-of-band, but fail without. + { + let s = to_base16(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + } + + // parse plain nixbase32. should succeed with algo out-of-band, but fail without. + { + let s = to_nixbase32(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + } + + // parse plain base64. should succeed with algo out-of-band, but fail without. + { + let s = to_base64(digest); + nixhash::from_str(&s, None).expect_err("must fail"); + let h = nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed"); + assert_eq!(expected_hash, h); + } + + // parse Nix hash strings + { + // base16. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(&algo, to_base16(digest)); + assert_eq!( + expected_hash, + nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + // nixbase32. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(&algo, to_nixbase32(digest)); + assert_eq!( + expected_hash, + nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + // base64. should succeed with both algo out-of-band and in-band. + { + let s = make_nixhash(&algo, to_base64(digest)); + assert_eq!( + expected_hash, + nixhash::from_str(&s, None).expect("must succeed") + ); + assert_eq!( + expected_hash, + nixhash::from_str(&s, Some(&algo.to_string())).expect("must succeed") + ); + } + } + } + + /// Test parsing an SRI hash via the [nixhash::from_sri_str] method. + #[test] + fn from_sri_str() { + let nix_hash = nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank/v1unTk=") + .expect("must succeed"); + + assert_eq!(HashAlgo::Sha256, nix_hash.algo); + assert_eq!( + vec![ + 0xa5, 0xce, 0x9c, 0x15, 0x5e, 0xd0, 0x93, 0x97, 0x61, 0x46, 0x46, 0xc9, 0x71, 0x7f, + 0xc7, 0xcd, 0x94, 0xb1, 0x02, 0x3d, 0x7b, 0x76, 0xb6, 0x18, 0xd4, 0x09, 0xe4, 0xfe, + 0xfd, 0x6e, 0x9d, 0x39 + ], + nix_hash.digest + ) + } + + /// Ensure we detect truncated base64 digests, where the digest size + /// doesn't match what's expected from that hash function. + #[test] + fn from_sri_str_truncated() { + nixhash::from_sri_str("sha256-pc6cFV7Qk5dhRkbJcX/HzZSxAj17drYY1Ank") + .expect_err("must fail"); + } + + /// Ensure we fail on SRI hashes that Nix doesn't support. + #[test] + fn from_sri_str_unsupported() { + nixhash::from_sri_str( + "sha384-o4UVSl89mIB0sFUK+3jQbG+C9Zc9dRlV/Xd3KAvXEbhqxu0J5OAdg6b6VHKHwQ7U", + ) + .expect_err("must fail"); + } + + /// Ensure we reject invalid base64 encoding + #[test] + fn from_sri_str_invalid_base64() { + nixhash::from_sri_str("sha256-invalid=base64").expect_err("must fail"); + } + + /// Ensure we reject SRI strings with multiple hashes, as Nix doesn't support that. + #[test] + fn from_sri_str_unsupported_multiple() { + nixhash::from_sri_str("sha256-ngth6szLtC1IJIYyz3lhftzL8SkrJkqPyPve+dGqa1Y= sha512-q0DQvjVB8HdLungV0T0QsDJS6W6V99u07pmjtDHCFmL9aXGgIBYOOYSKpfMFub4PeHJ7KweJ458STSHpK4857w==").expect_err("must fail"); + } + + /// Nix also accepts SRI strings with missing padding, but only in case the + /// string is expressed as SRI, so it still needs to have a `sha256-` prefix. + /// + /// This both seems to work if it is passed with and without specifying the + /// hash algo out-of-band (hash = "sha256-…" or sha256 = "sha256-…") + /// + /// Passing the same broken base64 string, but not as SRI, while passing + /// the hash algo out-of-band does not work. + #[test] + fn sha256_broken_padding() { + let broken_base64 = "fgIr3TyFGDAXP5+qoAaiMKDg/a1MlT6Fv/S/DaA24S8"; + // if padded with a trailing '=' + let expected_digest = vec![ + 0x7e, 0x02, 0x2b, 0xdd, 0x3c, 0x85, 0x18, 0x30, 0x17, 0x3f, 0x9f, 0xaa, 0xa0, 0x06, + 0xa2, 0x30, 0xa0, 0xe0, 0xfd, 0xad, 0x4c, 0x95, 0x3e, 0x85, 0xbf, 0xf4, 0xbf, 0x0d, + 0xa0, 0x36, 0xe1, 0x2f, + ]; + + // passing hash algo out of band should succeed + let nix_hash = nixhash::from_str(&format!("sha256-{}", &broken_base64), Some("sha256")) + .expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest); + + // not passing hash algo out of band should succeed + let nix_hash = + nixhash::from_str(&format!("sha256-{}", &broken_base64), None).expect("must succeed"); + assert_eq!(&expected_digest, &nix_hash.digest); + + // not passing SRI, but hash algo out of band should fail + nixhash::from_str(broken_base64, Some("sha256")).expect_err("must fail"); + } +} diff --git a/tvix/nix-compat/src/nixhash/with_mode.rs b/tvix/nix-compat/src/nixhash/with_mode.rs new file mode 100644 index 000000000000..caf14331426a --- /dev/null +++ b/tvix/nix-compat/src/nixhash/with_mode.rs @@ -0,0 +1,189 @@ +use crate::nixbase32; +use crate::nixhash::{self, HashAlgo, NixHash}; +use serde::de::Unexpected; +use serde::ser::SerializeMap; +use serde::{Deserialize, Deserializer, Serialize, Serializer}; +use serde_json::{Map, Value}; + +use super::algos::SUPPORTED_ALGOS; + +pub enum NixHashMode { + Flat, + Recursive, +} + +impl NixHashMode { + pub fn prefix(self) -> &'static str { + match self { + Self::Flat => "", + Self::Recursive => "r:", + } + } +} + +/// A Nix Hash can either be flat or recursive. +#[derive(Clone, Debug, Eq, PartialEq)] +pub enum NixHashWithMode { + Flat(NixHash), + Recursive(NixHash), +} + +impl NixHashWithMode { + pub fn mode(&self) -> NixHashMode { + match self { + Self::Flat(_) => NixHashMode::Flat, + Self::Recursive(_) => NixHashMode::Recursive, + } + } + + pub fn digest(&self) -> &NixHash { + match self { + Self::Flat(ref h) => h, + Self::Recursive(ref h) => h, + } + } + + /// Formats a [NixHashWithMode] in the Nix default hash format, + /// which is the algo, followed by a colon, then the lower hex encoded digest. + /// In case the hash itself is recursive, a `r:` is added as prefix + pub fn to_nix_hash_string(&self) -> String { + String::from(self.mode().prefix()) + &self.digest().to_nix_hash_string() + } + + /// This takes a serde_json::Map and turns it into this structure. This is necessary to do such + /// shenigans because we have external consumers, like the Derivation parser, who would like to + /// know whether we have a invalid or a missing NixHashWithMode structure in another structure, + /// e.g. Output. + /// This means we have this combinatorial situation: + /// - no hash, no hashAlgo: no NixHashWithMode so we return Ok(None). + /// - present hash, missing hashAlgo: invalid, we will return missing_field + /// - missing hash, present hashAlgo: same + /// - present hash, present hashAlgo: either we return ourselves or a type/value validation + /// error. + /// This function is for internal consumption regarding those needs until we have a better + /// solution. Now this is said, let's explain how this works. + /// + /// We want to map the serde data model into a NixHashWithMode. + /// + /// The serde data model has a `hash` field (containing a digest in nixbase32), + /// and a `hashAlgo` field, containing the stringified hash algo. + /// In case the hash is recursive, hashAlgo also has a `r:` prefix. + /// + /// This is to match how `nix show-derivation` command shows them in JSON + /// representation. + pub(crate) fn from_map<'de, D>(map: &Map<String, Value>) -> Result<Option<Self>, D::Error> + where + D: Deserializer<'de>, + { + // If we don't have hash neither hashAlgo, let's just return None. + if !map.contains_key("hash") && !map.contains_key("hashAlgo") { + return Ok(None); + } + + let digest: Vec<u8> = { + if let Some(v) = map.get("hash") { + if let Some(s) = v.as_str() { + data_encoding::HEXLOWER + .decode(s.as_bytes()) + .map_err(|e| serde::de::Error::custom(e.to_string()))? + } else { + return Err(serde::de::Error::invalid_type( + Unexpected::Other(&v.to_string()), + &"a string", + )); + } + } else { + return Err(serde::de::Error::missing_field( + "couldn't extract `hash` key but `hashAlgo` key present", + )); + } + }; + + if let Some(v) = map.get("hashAlgo") { + if let Some(s) = v.as_str() { + match s.strip_prefix("r:") { + Some(rest) => Ok(Some(Self::Recursive( + ( + HashAlgo::try_from(rest).map_err(|e| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(), + ) + })?, + digest, + ) + .try_into() + .map_err(|e: nixhash::Error| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &"a digest with right length", + ) + })?, + ))), + None => Ok(Some(Self::Flat( + ( + HashAlgo::try_from(s).map_err(|e| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(), + ) + })?, + digest, + ) + .try_into() + .map_err(|e: nixhash::Error| { + serde::de::Error::invalid_value( + Unexpected::Other(&e.to_string()), + &"a digest with right length", + ) + })?, + ))), + } + } else { + Err(serde::de::Error::invalid_type( + Unexpected::Other(&v.to_string()), + &"a string", + )) + } + } else { + Err(serde::de::Error::missing_field( + "couldn't extract `hashAlgo` key, but `hash` key present", + )) + } + } +} + +impl Serialize for NixHashWithMode { + /// map a NixHashWithMode into the serde data model. + fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> + where + S: Serializer, + { + let mut map = serializer.serialize_map(Some(2))?; + match self { + NixHashWithMode::Flat(h) => { + map.serialize_entry("hash", &nixbase32::encode(&h.digest))?; + map.serialize_entry("hashAlgo", &h.algo.to_string())?; + } + NixHashWithMode::Recursive(h) => { + map.serialize_entry("hash", &nixbase32::encode(&h.digest))?; + map.serialize_entry("hashAlgo", &format!("r:{}", &h.algo.to_string()))?; + } + }; + map.end() + } +} + +impl<'de> Deserialize<'de> for NixHashWithMode { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: Deserializer<'de>, + { + let value = Self::from_map::<D>(&Map::deserialize(deserializer)?)?; + + match value { + None => Err(serde::de::Error::custom("couldn't parse as map")), + Some(v) => Ok(v), + } + } +} diff --git a/tvix/nix-compat/src/store_path/mod.rs b/tvix/nix-compat/src/store_path/mod.rs new file mode 100644 index 000000000000..d3317b67f62e --- /dev/null +++ b/tvix/nix-compat/src/store_path/mod.rs @@ -0,0 +1,328 @@ +use crate::nixbase32::{self, Nixbase32DecodeError}; +use data_encoding::BASE64; +use std::{fmt, path::PathBuf, str::FromStr}; +use thiserror; + +#[cfg(target_family = "unix")] +use std::os::unix::ffi::OsStringExt; + +mod utils; + +pub use utils::*; + +pub const DIGEST_SIZE: usize = 20; +// lazy_static doesn't allow us to call NIXBASE32.encode_len(), so we ran it +// manually and have an assert in the tests. +pub const ENCODED_DIGEST_SIZE: usize = 32; + +// The store dir prefix, without trailing slash. +// That's usually where the Nix store is mounted at. +pub const STORE_DIR: &str = "/nix/store"; +pub const STORE_DIR_WITH_SLASH: &str = "/nix/store/"; + +/// Errors that can occur when parsing a literal store path +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum Error { + #[error("Dash is missing between hash and name")] + MissingDash(), + #[error("Hash encoding is invalid: {0}")] + InvalidHashEncoding(Nixbase32DecodeError), + #[error("Invalid length")] + InvalidLength(), + #[error( + "Invalid name: \"{}\", character at position {} is invalid", + std::str::from_utf8(&.0).unwrap_or(&BASE64.encode(.0)), + .1, + )] + InvalidName(Vec<u8>, usize), + #[error("Tried to parse an absolute path which was missing the store dir prefix.")] + MissingStoreDir(), +} + +/// Represents a path in the Nix store (a direct child of [STORE_DIR]). +/// +/// It consists of a digest (20 bytes), and a name, which is a string. +/// The name may only contain ASCII alphanumeric, or one of the following +/// characters: `-`, `_`, `.`, `+`, `?`, `=`. +/// The name is usually used to describe the pname and version of a package. +/// Derivation paths can also be represented as store paths, their names just +/// end with the `.drv` prefix. +/// +/// A [StorePath] does not encode any additional subpath "inside" the store +/// path. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct StorePath { + pub digest: [u8; DIGEST_SIZE], + pub name: String, +} + +impl PartialOrd for StorePath { + fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> { + self.digest.partial_cmp(&other.digest) + } +} + +impl Ord for StorePath { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.digest.cmp(&other.digest) + } +} + +impl FromStr for StorePath { + type Err = Error; + + /// Construct a [StorePath] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + fn from_str(s: &str) -> Result<Self, Self::Err> { + Self::from_bytes(s.as_bytes()) + } +} + +impl StorePath { + /// Construct a [StorePath] by passing the `$digest-$name` string + /// that comes after [STORE_DIR_WITH_SLASH]. + pub fn from_bytes(s: &[u8]) -> Result<StorePath, Error> { + // the whole string needs to be at least: + // + // - 32 characters (encoded hash) + // - 1 dash + // - 1 character for the name + if s.len() < ENCODED_DIGEST_SIZE + 2 { + Err(Error::InvalidLength())? + } + + let digest = match nixbase32::decode(&s[..ENCODED_DIGEST_SIZE]) { + Ok(decoded) => decoded, + Err(decoder_error) => return Err(Error::InvalidHashEncoding(decoder_error)), + }; + + if s[ENCODED_DIGEST_SIZE] != b'-' { + return Err(Error::MissingDash()); + } + + Ok(StorePath { + name: validate_name(&s[ENCODED_DIGEST_SIZE + 1..])?, + digest: digest.try_into().expect("size is known"), + }) + } + + /// Construct a [StorePath] from an absolute store path string. + /// This is equivalent to calling [StorePath::from_bytes], but stripping the + /// [STORE_DIR_WITH_SLASH] prefix before. + pub fn from_absolute_path(s: &[u8]) -> Result<StorePath, Error> { + match s.strip_prefix(STORE_DIR_WITH_SLASH.as_bytes()) { + Some(s_stripped) => Self::from_bytes(s_stripped), + None => Err(Error::MissingStoreDir()), + } + } + + /// Decompose a string into a [StorePath] and a [PathBuf] containing the + /// rest of the path, or an error. + #[cfg(target_family = "unix")] + pub fn from_absolute_path_full(s: &str) -> Result<(StorePath, PathBuf), Error> { + // strip [STORE_DIR_WITH_SLASH] from s + match s.strip_prefix(STORE_DIR_WITH_SLASH) { + None => Err(Error::MissingStoreDir()), + Some(rest) => { + // put rest in a PathBuf + let mut p = PathBuf::new(); + p.push(rest); + + let mut it = p.components(); + + // The first component of the rest must be parse-able as a [StorePath] + if let Some(first_component) = it.next() { + // convert first component to StorePath + let first_component_bytes = first_component.as_os_str().to_owned().into_vec(); + let store_path = StorePath::from_bytes(&first_component_bytes)?; + // collect rest + let rest_buf: PathBuf = it.collect(); + Ok((store_path, rest_buf)) + } else { + Err(Error::InvalidLength()) // Well, or missing "/"? + } + } + } + } + + /// Converts the [StorePath] to an absolute store path string. + /// That is just the string representation, prefixed with the store prefix + /// ([STORE_DIR_WITH_SLASH]), + pub fn to_absolute_path(&self) -> String { + format!("{}{}", STORE_DIR_WITH_SLASH, self) + } +} + +/// Checks a given &[u8] to match the restrictions for [StorePath::name], and +/// returns the name as string if successful. +pub(crate) fn validate_name(s: &[u8]) -> Result<String, Error> { + // Empty names are not allowed. + if s.is_empty() { + return Err(Error::InvalidLength()); + } + + for (i, c) in s.iter().enumerate() { + if c.is_ascii_alphanumeric() + || (*c == b'.' && i != 0) // can't start with a dot + || *c == b'-' + || *c == b'_' + || *c == b'+' + || *c == b'?' + || *c == b'=' + { + continue; + } + + return Err(Error::InvalidName(s.to_vec(), i)); + } + + Ok(String::from_utf8(s.to_vec()).unwrap()) +} + +impl fmt::Display for StorePath { + /// The string representation of a store path starts with a digest (20 + /// bytes), [crate::nixbase32]-encoded, followed by a `-`, + /// and ends with the name. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}-{}", nixbase32::encode(&self.digest), self.name) + } +} + +#[cfg(test)] +mod tests { + use std::path::PathBuf; + + use crate::nixbase32; + use crate::store_path::{DIGEST_SIZE, ENCODED_DIGEST_SIZE}; + use test_case::test_case; + + use super::{Error, StorePath}; + + #[test] + fn encoded_digest_size() { + assert_eq!(ENCODED_DIGEST_SIZE, nixbase32::encode_len(DIGEST_SIZE)); + } + + #[test] + fn happy_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath = StorePath::from_bytes(example_nix_path_str.as_bytes()) + .expect("Error parsing example string"); + + let expected_digest: [u8; DIGEST_SIZE] = [ + 0x8a, 0x12, 0x32, 0x15, 0x22, 0xfd, 0x91, 0xef, 0xbd, 0x60, 0xeb, 0xb2, 0x48, 0x1a, + 0xf8, 0x85, 0x80, 0xf6, 0x16, 0x00, + ]; + + assert_eq!("net-tools-1.60_p20170221182432", nixpath.name); + assert_eq!(nixpath.digest, expected_digest); + + assert_eq!(example_nix_path_str, nixpath.to_string()) + } + + /// This is the store path rejected when `nix-store --add`'ing an + /// empty `.gitignore` file. + /// + /// Nix 2.4 accidentally dropped this behaviour, but this is considered a bug. + /// See https://github.com/NixOS/nix/pull/9095. + #[test] + fn starts_with_dot() { + StorePath::from_bytes(b"fli4bwscgna7lpm7v5xgnjxrxh0yc7ra-.gitignore") + .expect_err("must fail"); + } + + #[test] + fn invalid_hash_length() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yy-net-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn invalid_encoding_hash() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn more_than_just_the_bare_nix_store_path() { + StorePath::from_bytes( + b"00bgd045z0d4icpbc2yyz4gx48aku4la-net-tools-1.60_p20170221182432/bin/arp", + ) + .expect_err("must fail"); + } + + #[test] + fn no_dash_between_hash_and_name() { + StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44lanet-tools-1.60_p20170221182432") + .expect_err("must fail"); + } + + #[test] + fn absolute_path() { + let example_nix_path_str = + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432"; + let nixpath_expected = + StorePath::from_bytes(example_nix_path_str.as_bytes()).expect("must parse"); + + let nixpath_actual = StorePath::from_absolute_path( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432".as_bytes(), + ) + .expect("must parse"); + + assert_eq!(nixpath_expected, nixpath_actual); + + assert_eq!( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + nixpath_actual.to_absolute_path(), + ); + } + + #[test] + fn absolute_path_missing_prefix() { + assert_eq!( + Error::MissingStoreDir(), + StorePath::from_absolute_path(b"foobar-123").expect_err("must fail") + ); + } + + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::new()) + ; "without prefix")] + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::new()) + ; "without prefix, but trailing slash")] + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/bin/arp", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::from("bin/arp")) + ; "with prefix")] + #[test_case( + "/nix/store/00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432/bin/arp/", + (StorePath::from_bytes(b"00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432").unwrap(), PathBuf::from("bin/arp/")) + ; "with prefix and trailing slash")] + fn from_absolute_path_full(s: &str, expected: (StorePath, PathBuf)) { + let actual = StorePath::from_absolute_path_full(s).expect("must succeed"); + assert_eq!(expected, actual); + } + + #[test] + fn from_absolute_path_errors() { + assert_eq!( + Error::InvalidLength(), + StorePath::from_absolute_path_full("/nix/store/").expect_err("must fail") + ); + assert_eq!( + Error::InvalidLength(), + StorePath::from_absolute_path_full("/nix/store/foo").expect_err("must fail") + ); + assert_eq!( + Error::MissingStoreDir(), + StorePath::from_absolute_path_full( + "00bgd045z0d4icpbc2yyz4gx48ak44la-net-tools-1.60_p20170221182432" + ) + .expect_err("must fail") + ); + } +} diff --git a/tvix/nix-compat/src/store_path/utils.rs b/tvix/nix-compat/src/store_path/utils.rs new file mode 100644 index 000000000000..ddece96e63f7 --- /dev/null +++ b/tvix/nix-compat/src/store_path/utils.rs @@ -0,0 +1,312 @@ +use super::{Error, STORE_DIR}; +use crate::nixbase32; +use crate::nixhash::{HashAlgo, NixHash, NixHashWithMode}; +use crate::store_path::StorePath; +use sha2::{Digest, Sha256}; +use thiserror; + +/// Errors that can occur when creating a content-addressed store path. +/// +/// This wraps the main [crate::store_path::Error].. +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +pub enum BuildStorePathError { + #[error("Invalid Store Path: {0}")] + InvalidStorePath(Error), + /// This error occurs when we have references outside the SHA-256 + + /// Recursive case. The restriction comes from upstream Nix. It may be + /// lifted at some point but there isn't a pressing need to anticipate that. + #[error("References were not supported as much as requested")] + InvalidReference(), +} + +/// compress_hash takes an arbitrarily long sequence of bytes (usually +/// a hash digest), and returns a sequence of bytes of length +/// OUTPUT_SIZE. +/// +/// It's calculated by rotating through the bytes in the output buffer +/// (zero- initialized), and XOR'ing with each byte of the passed +/// input. It consumes 1 byte at a time, and XOR's it with the current +/// value in the output buffer. +/// +/// This mimics equivalent functionality in C++ Nix. +pub fn compress_hash<const OUTPUT_SIZE: usize>(input: &[u8]) -> [u8; OUTPUT_SIZE] { + let mut output = [0; OUTPUT_SIZE]; + + for (ii, ch) in input.iter().enumerate() { + output[ii % OUTPUT_SIZE] ^= ch; + } + + output +} + +/// This builds a store path, by calculating the text_hash_string of either a +/// derivation or a literal text file that may contain references. +pub fn build_text_path<S: AsRef<str>, I: IntoIterator<Item = S>, C: AsRef<[u8]>>( + name: &str, + content: C, + references: I, +) -> Result<StorePath, Error> { + build_store_path_from_fingerprint_parts( + &make_type("text", references, false), + // the nix_hash_string representation of the sha256 digest of some contents + &{ + let content_digest = { + let hasher = Sha256::new_with_prefix(content); + hasher.finalize() + }; + + // We populate the struct directly, as we know the sha256 digest has the + // right size. + NixHash { + algo: crate::nixhash::HashAlgo::Sha256, + digest: content_digest.to_vec(), + } + }, + name, + ) +} + +/// This builds a more "regular" content-addressed store path +pub fn build_regular_ca_path<S: AsRef<str>, I: IntoIterator<Item = S>>( + name: &str, + hash_with_mode: &NixHashWithMode, + references: I, + self_reference: bool, +) -> Result<StorePath, BuildStorePathError> { + match &hash_with_mode { + NixHashWithMode::Recursive( + ref hash @ NixHash { + algo: HashAlgo::Sha256, + .. + }, + ) => build_store_path_from_fingerprint_parts( + &make_type("source", references, self_reference), + hash, + name, + ) + .map_err(BuildStorePathError::InvalidStorePath), + _ => { + if references.into_iter().next().is_some() { + return Err(BuildStorePathError::InvalidReference()); + } + if self_reference { + return Err(BuildStorePathError::InvalidReference()); + } + build_store_path_from_fingerprint_parts( + "output:out", + &{ + let content_digest = { + let mut hasher = Sha256::new_with_prefix("fixed:out:"); + hasher.update(hash_with_mode.mode().prefix()); + hasher.update(hash_with_mode.digest().algo.to_string()); + hasher.update(":"); + hasher.update( + &data_encoding::HEXLOWER.encode(&hash_with_mode.digest().digest), + ); + hasher.update(":"); + hasher.finalize() + }; + + // We don't use [NixHash::from_algo_and_digest], as we know [Sha256] has + // the right digest size. + NixHash { + algo: crate::nixhash::HashAlgo::Sha256, + digest: content_digest.to_vec(), + } + }, + name, + ) + .map_err(BuildStorePathError::InvalidStorePath) + } + } +} + +/// For given NAR sha256 digest and name, return the new [StorePath] this would have. +pub fn build_nar_based_store_path(nar_sha256_digest: &[u8; 32], name: &str) -> StorePath { + // We populate the struct directly, as we know the sha256 digest has the + // right size. + let nar_hash_with_mode = NixHashWithMode::Recursive(NixHash { + algo: HashAlgo::Sha256, + digest: nar_sha256_digest.to_vec(), + }); + + build_regular_ca_path(name, &nar_hash_with_mode, Vec::<String>::new(), false).unwrap() +} + +/// This builds an input-addressed store path +/// +/// Input-addresed store paths are always derivation outputs, the "input" in question is the +/// derivation and its closure. +pub fn build_output_path( + drv_hash: &NixHash, + output_name: &str, + output_path_name: &str, +) -> Result<StorePath, Error> { + build_store_path_from_fingerprint_parts( + &(String::from("output:") + output_name), + drv_hash, + output_path_name, + ) +} + +/// This builds a store path from fingerprint parts. +/// Usually, that function is used from [build_text_path] and +/// passed a "text hash string" (starting with "text:" as fingerprint), +/// but other fingerprints starting with "output:" are also used in Derivation +/// output path calculation. +/// +/// The fingerprint is hashed with sha256, its digest is compressed to 20 bytes, +/// and nixbase32-encoded (32 characters). +fn build_store_path_from_fingerprint_parts( + ty: &str, + hash: &NixHash, + name: &str, +) -> Result<StorePath, Error> { + let fingerprint = + String::from(ty) + ":" + &hash.to_nix_hash_string() + ":" + STORE_DIR + ":" + name; + let digest = { + let hasher = Sha256::new_with_prefix(fingerprint); + hasher.finalize() + }; + let compressed = compress_hash::<20>(&digest); + super::validate_name(name.as_bytes())?; + Ok(StorePath { + digest: compressed, + name: name.to_string(), + }) +} + +/// This contains the Nix logic to create "text hash strings", which are used +/// in `builtins.toFile`, as well as in Derivation Path calculation. +/// +/// A text hash is calculated by concatenating the following fields, separated by a `:`: +/// +/// - text +/// - references, individually joined by `:` +/// - the nix_hash_string representation of the sha256 digest of some contents +/// - the value of `storeDir` +/// - the name +fn make_type<S: AsRef<str>, I: IntoIterator<Item = S>>( + ty: &str, + references: I, + self_ref: bool, +) -> String { + let mut s = String::from(ty); + + for reference in references { + s.push(':'); + s.push_str(reference.as_ref()); + } + + if self_ref { + s.push_str(":self"); + } + + s +} + +/// Nix placeholders (i.e. values returned by `builtins.placeholder`) +/// are used to populate outputs with paths that must be +/// string-replaced with the actual placeholders later, at runtime. +/// +/// The actual placeholder is basically just a SHA256 hash encoded in +/// cppnix format. +pub fn hash_placeholder(name: &str) -> String { + let digest = { + let mut hasher = Sha256::new(); + hasher.update(format!("nix-output:{}", name)); + hasher.finalize() + }; + + format!("/{}", nixbase32::encode(&digest)) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::nixhash::{NixHash, NixHashWithMode}; + + #[test] + fn build_text_path_with_zero_references() { + // This hash should match `builtins.toFile`, e.g.: + // + // nix-repl> builtins.toFile "foo" "bar" + // "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo" + + let store_path = build_text_path("foo", "bar", Vec::<String>::new()) + .expect("build_store_path() should succeed"); + + assert_eq!( + store_path.to_absolute_path().as_str(), + "/nix/store/vxjiwkjkn7x4079qvh1jkl5pn05j2aw0-foo" + ); + } + + #[test] + fn build_text_path_with_non_zero_references() { + // This hash should match: + // + // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" + // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + + let inner = build_text_path("foo", "bar", Vec::<String>::new()) + .expect("path_with_references() should succeed"); + let inner_path = inner.to_absolute_path(); + + let outer = build_text_path("baz", &inner_path, vec![inner_path.as_str()]) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + ); + } + + #[test] + fn build_sha1_path() { + let outer = build_regular_ca_path( + "bar", + &NixHashWithMode::Recursive(NixHash { + algo: HashAlgo::Sha1, + digest: data_encoding::HEXLOWER + .decode(b"0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33") + .expect("hex should decode"), + }), + Vec::<String>::new(), + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar" + ); + } + + #[test] + fn build_store_path_with_non_zero_references() { + // This hash should match: + // + // nix-repl> builtins.toFile "baz" "${builtins.toFile "foo" "bar"}" + // "/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz" + // + // $ nix store make-content-addressed /nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz + // rewrote '/nix/store/5xd714cbfnkz02h2vbsj4fm03x3f15nf-baz' to '/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz' + let outer = build_regular_ca_path( + "baz", + &NixHashWithMode::Recursive(NixHash { + algo: HashAlgo::Sha256, + digest: nixbase32::decode(b"1xqkzcb3909fp07qngljr4wcdnrh1gdam1m2n29i6hhrxlmkgkv1") + .expect("hex should decode"), + }), + vec!["/nix/store/dxwkwjzdaq7ka55pkk252gh32bgpmql4-foo"], + false, + ) + .expect("path_with_references() should succeed"); + + assert_eq!( + outer.to_absolute_path().as_str(), + "/nix/store/s89y431zzhmdn3k8r96rvakryddkpv2v-baz" + ); + } +} |