diff options
Diffstat (limited to 'tvix/nix-compat/src/derivation')
29 files changed, 2076 insertions, 0 deletions
diff --git a/tvix/nix-compat/src/derivation/errors.rs b/tvix/nix-compat/src/derivation/errors.rs new file mode 100644 index 000000000000..7dcc3a5d0042 --- /dev/null +++ b/tvix/nix-compat/src/derivation/errors.rs @@ -0,0 +1,58 @@ +//! Contains [DerivationError], exported as [crate::derivation::DerivationError] +use crate::store_path; +use thiserror::Error; + +use super::CAHash; + +/// Errors that can occur during the validation of Derivation structs. +#[derive(Debug, Error, PartialEq)] +pub enum DerivationError { + // outputs + #[error("no outputs defined")] + NoOutputs(), + #[error("invalid output name: {0}")] + InvalidOutputName(String), + #[error("encountered fixed-output derivation, but more than 1 output in total")] + MoreThanOneOutputButFixed(), + #[error("invalid output name for fixed-output derivation: {0}")] + InvalidOutputNameForFixed(String), + #[error("unable to validate output {0}: {1}")] + InvalidOutput(String, OutputError), + #[error("unable to validate output {0}: {1}")] + InvalidOutputDerivationPath(String, store_path::BuildStorePathError), + // input derivation + #[error("unable to parse input derivation path {0}: {1}")] + InvalidInputDerivationPath(String, store_path::Error), + #[error("input derivation {0} doesn't end with .drv")] + InvalidInputDerivationPrefix(String), + #[error("input derivation {0} output names are empty")] + EmptyInputDerivationOutputNames(String), + #[error("input derivation {0} output name {1} is invalid")] + InvalidInputDerivationOutputName(String, String), + + // input sources + #[error("unable to parse input sources path {0}: {1}")] + InvalidInputSourcesPath(String, store_path::Error), + + // platform + #[error("invalid platform field: {0}")] + InvalidPlatform(String), + + // builder + #[error("invalid builder field: {0}")] + InvalidBuilder(String), + + // environment + #[error("invalid environment key {0}")] + InvalidEnvironmentKey(String), +} + +/// Errors that can occur during the validation of a specific +// [crate::derivation::Output] of a [crate::derivation::Derviation]. +#[derive(Debug, Error, PartialEq)] +pub enum OutputError { + #[error("Invalid output path {0}: {1}")] + InvalidOutputPath(String, store_path::Error), + #[error("Invalid CAHash: {:?}", .0)] + InvalidCAHash(CAHash), +} diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs new file mode 100644 index 000000000000..2f90c54d8f76 --- /dev/null +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -0,0 +1,289 @@ +use crate::store_path::{ + self, build_ca_path, build_output_path, build_text_path, StorePath, StorePathRef, +}; +use bstr::BString; +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::collections::{BTreeMap, BTreeSet}; +use std::io; + +mod errors; +mod output; +mod parse_error; +mod parser; +mod validate; +mod write; + +#[cfg(test)] +mod tests; + +// Public API of the crate. +pub use crate::nixhash::{CAHash, NixHash}; +pub use errors::{DerivationError, OutputError}; +pub use output::Output; + +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +pub struct Derivation { + #[serde(rename = "args")] + pub arguments: Vec<String>, + + pub builder: String, + + #[serde(rename = "env")] + pub environment: BTreeMap<String, BString>, + + #[serde(rename = "inputDrvs")] + pub input_derivations: BTreeMap<String, BTreeSet<String>>, + + #[serde(rename = "inputSrcs")] + pub input_sources: BTreeSet<String>, + + pub outputs: BTreeMap<String, Output>, + + pub system: String, +} + +impl Derivation { + /// write the Derivation to the given [std::io::Write], in ATerm format. + /// + /// The only errors returns are these when writing to the passed writer. + pub fn serialize(&self, writer: &mut impl std::io::Write) -> Result<(), io::Error> { + use write::*; + + writer.write_all(write::DERIVATION_PREFIX.as_bytes())?; + write_char(writer, write::PAREN_OPEN)?; + + write_outputs(writer, &self.outputs)?; + write_char(writer, COMMA)?; + + write_input_derivations(writer, &self.input_derivations)?; + write_char(writer, COMMA)?; + + write_input_sources(writer, &self.input_sources)?; + write_char(writer, COMMA)?; + + write_system(writer, &self.system)?; + write_char(writer, COMMA)?; + + write_builder(writer, &self.builder)?; + write_char(writer, COMMA)?; + + write_arguments(writer, &self.arguments)?; + write_char(writer, COMMA)?; + + write_environment(writer, &self.environment)?; + + write_char(writer, PAREN_CLOSE)?; + + Ok(()) + } + + /// return the ATerm serialization. + pub fn to_aterm_bytes(&self) -> Vec<u8> { + let mut buffer: Vec<u8> = Vec::new(); + + // invoke serialize and write to the buffer. + // Note we only propagate errors writing to the writer in serialize, + // which won't panic for the string we write to. + self.serialize(&mut buffer).unwrap(); + + buffer + } + + /// Parse an Derivation in ATerm serialization, and validate it passes our + /// set of validations. + pub fn from_aterm_bytes(b: &[u8]) -> Result<Derivation, parser::Error<&[u8]>> { + parser::parse(b) + } + + /// Returns the drv path of a [Derivation] struct. + /// + /// The drv path is calculated by invoking [build_text_path], using + /// the `name` with a `.drv` suffix as name, all [Derivation::input_sources] and + /// keys of [Derivation::input_derivations] as references, and the ATerm string of + /// the [Derivation] as content. + pub fn calculate_derivation_path(&self, name: &str) -> Result<StorePath, DerivationError> { + // append .drv to the name + let name = &format!("{}.drv", name); + + // collect the list of paths from input_sources and input_derivations + // into a (sorted, guaranteed by BTreeSet) list of references + let references: BTreeSet<String> = { + let mut inputs = self.input_sources.clone(); + let input_derivation_keys: Vec<String> = + self.input_derivations.keys().cloned().collect(); + inputs.extend(input_derivation_keys); + inputs + }; + + build_text_path(name, self.to_aterm_bytes(), references) + .map(|s| s.to_owned()) + .map_err(|_e| DerivationError::InvalidOutputName(name.to_string())) + } + + /// Returns the FOD digest, if the derivation is fixed-output, or None if + /// it's not. + /// TODO: this is kinda the string from [build_ca_path] with a + /// [CAHash::Flat], what's fed to `build_store_path_from_fingerprint_parts` + /// (except the out_output.path being an empty string) + fn fod_digest(&self) -> Option<[u8; 32]> { + if self.outputs.len() != 1 { + return None; + } + + let out_output = self.outputs.get("out")?; + let ca_hash = &out_output.ca_hash.as_ref()?; + + Some( + Sha256::new_with_prefix(format!( + "fixed:out:{}{}:{}", + ca_kind_prefix(ca_hash), + ca_hash.digest().to_nix_hex_string(), + out_output.path + )) + .finalize() + .into(), + ) + } + + /// Calculates the hash of a derivation modulo fixed-output subderivations. + /// + /// This is called `hashDerivationModulo` in nixcpp. + /// + /// It returns a [NixHash], created by calculating the sha256 digest of + /// the derivation ATerm representation, except that: + /// - any input derivation paths have beed replaced "by the result of a + /// recursive call to this function" and that + /// - for fixed-output derivations the special + /// `fixed:out:${algo}:${digest}:${fodPath}` string is hashed instead of + /// the A-Term. + /// + /// If the derivation is not a fixed derivation, it's up to the caller of + /// this function to provide a lookup function to lookup these calculation + /// results of parent derivations at `fn_get_hash_derivation_modulo` (by + /// drv path). + pub fn derivation_or_fod_hash<F>(&self, fn_get_derivation_or_fod_hash: F) -> NixHash + where + F: Fn(&StorePathRef) -> NixHash, + { + // Fixed-output derivations return a fixed hash. + // Non-Fixed-output derivations return a hash of the ATerm notation, but with all + // input_derivation paths replaced by a recursive call to this function. + // We use fn_get_derivation_or_fod_hash here, so callers can precompute this. + let digest = self.fod_digest().unwrap_or({ + // This is a new map from derivation_or_fod_hash.digest (as lowerhex) + // to list of output names + let mut replaced_input_derivations: BTreeMap<String, BTreeSet<String>> = + BTreeMap::new(); + + // For each input_derivation, look up the + // derivation_or_fod_hash, and replace the derivation path with it's HEXLOWER + // digest. + // This is not the [NixHash::to_nix_hash_string], but without the sha256: prefix). + for (drv_path_str, output_names) in &self.input_derivations { + // parse drv_path to StorePathRef + let drv_path = StorePathRef::from_absolute_path(drv_path_str.as_bytes()) + .expect("invalid input derivation path"); + replaced_input_derivations.insert( + data_encoding::HEXLOWER + .encode(fn_get_derivation_or_fod_hash(&drv_path).digest_as_bytes()), + output_names.clone(), + ); + } + + // construct a new derivation struct with these replaced input derivation strings + let replaced_derivation = Derivation { + input_derivations: replaced_input_derivations, + ..self.clone() + }; + + // write the ATerm of that to the hash function + let mut hasher = Sha256::new(); + hasher.update(replaced_derivation.to_aterm_bytes()); + + hasher.finalize().into() + }); + + NixHash::Sha256(digest) + } + + /// This calculates all output paths of a Derivation and updates the struct. + /// It requires the struct to be initially without output paths. + /// This means, self.outputs[$outputName].path needs to be an empty string, + /// and self.environment[$outputName] needs to be an empty string. + /// + /// Output path calculation requires knowledge of the + /// derivation_or_fod_hash [NixHash], which (in case of non-fixed-output + /// derivations) also requires knowledge of other hash_derivation_modulo + /// [NixHash]es. + /// + /// We solve this by asking the caller of this function to provide the + /// hash_derivation_modulo of the current Derivation. + /// + /// On completion, self.environment[$outputName] and + /// self.outputs[$outputName].path are set to the calculated output path for all + /// outputs. + pub fn calculate_output_paths( + &mut self, + name: &str, + derivation_or_fod_hash: &NixHash, + ) -> Result<(), DerivationError> { + // The fingerprint and hash differs per output + for (output_name, output) in self.outputs.iter_mut() { + // Assert that outputs are not yet populated, to avoid using this function wrongly. + // We don't also go over self.environment, but it's a sufficient + // footgun prevention mechanism. + assert!(output.path.is_empty()); + + let path_name = output_path_name(name, output_name); + + // For fixed output derivation we use the per-output info, otherwise we use the + // derivation hash. + let abs_store_path = if let Some(ref hwm) = output.ca_hash { + build_ca_path(&path_name, hwm, Vec::<String>::new(), false).map_err(|e| { + DerivationError::InvalidOutputDerivationPath(output_name.to_string(), e) + })? + } else { + build_output_path(derivation_or_fod_hash, output_name, &path_name).map_err(|e| { + DerivationError::InvalidOutputDerivationPath( + output_name.to_string(), + store_path::BuildStorePathError::InvalidStorePath(e), + ) + })? + }; + + output.path = abs_store_path.to_absolute_path(); + self.environment.insert( + output_name.to_string(), + abs_store_path.to_absolute_path().into(), + ); + } + + Ok(()) + } +} + +/// Calculate the name part of the store path of a derivation [Output]. +/// +/// It's the name, and (if it's the non-out output), the output name +/// after a `-`. +fn output_path_name(derivation_name: &str, output_name: &str) -> String { + let mut output_path_name = derivation_name.to_string(); + if output_name != "out" { + output_path_name.push('-'); + output_path_name.push_str(output_name); + } + output_path_name +} + +/// For a [CAHash], return the "prefix" used for NAR purposes. +/// For [CAHash::Flat], this is an empty string, for [CAHash::Nar], it's "r:". +/// Panics for other [CAHash] kinds, as they're not valid in a derivation +/// context. +fn ca_kind_prefix(ca_hash: &CAHash) -> &'static str { + match ca_hash { + CAHash::Flat(_) => "", + CAHash::Nar(_) => "r:", + _ => panic!("invalid ca hash in derivation context: {:?}", ca_hash), + } +} diff --git a/tvix/nix-compat/src/derivation/output.rs b/tvix/nix-compat/src/derivation/output.rs new file mode 100644 index 000000000000..b7fa1ac379b0 --- /dev/null +++ b/tvix/nix-compat/src/derivation/output.rs @@ -0,0 +1,146 @@ +use crate::derivation::OutputError; +use crate::nixhash::CAHash; +use crate::store_path::StorePathRef; +use serde::{Deserialize, Serialize}; +use serde_json::Map; + +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)] +pub struct Output { + pub path: String, + + #[serde(flatten)] + pub ca_hash: Option<CAHash>, // we can only represent a subset here. +} + +impl<'de> Deserialize<'de> for Output { + fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> + where + D: serde::Deserializer<'de>, + { + let fields = Map::deserialize(deserializer)?; + Ok(Self { + path: fields + .get("path") + .ok_or(serde::de::Error::missing_field( + "`path` is missing but required for outputs", + ))? + .as_str() + .ok_or(serde::de::Error::invalid_type( + serde::de::Unexpected::Other("certainly not a string"), + &"a string", + ))? + .to_owned(), + ca_hash: CAHash::from_map::<D>(&fields)?, + }) + } +} + +impl Output { + pub fn is_fixed(&self) -> bool { + self.ca_hash.is_some() + } + + pub fn validate(&self, validate_output_paths: bool) -> Result<(), OutputError> { + if let Some(fixed_output_hash) = &self.ca_hash { + match fixed_output_hash { + CAHash::Flat(_) | CAHash::Nar(_) => { + // all hashes allowed for Flat, and Nar. + } + _ => return Err(OutputError::InvalidCAHash(fixed_output_hash.clone())), + } + } + + if validate_output_paths { + if let Err(e) = StorePathRef::from_absolute_path(self.path.as_bytes()) { + return Err(OutputError::InvalidOutputPath(self.path.to_string(), e)); + } + } + Ok(()) + } +} + +/// This ensures that a potentially valid input addressed +/// output is deserialized as a non-fixed output. +#[test] +fn deserialize_valid_input_addressed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablabla" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + assert!(!output.is_fixed()); +} + +/// This ensures that a potentially valid fixed output +/// output deserializes fine as a fixed output. +#[test] +fn deserialize_valid_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256" + }"#; + let output: Output = serde_json::from_str(json_bytes).expect("must parse"); + + assert!(output.is_fixed()); +} + +/// This ensures that parsing an input with the invalid hash encoding +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_encoding_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "IAMNOTVALIDNIXBASE32", + "hashAlgo": "r:sha256" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the wrong hash algo +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha1024" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash algo but present hash will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash but present hash algo will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hashAlgo": "r:sha1024" + }"#; + let output: Result<Output, _> = serde_json::from_str(json_bytes); + + assert!(output.is_err()); +} diff --git a/tvix/nix-compat/src/derivation/parse_error.rs b/tvix/nix-compat/src/derivation/parse_error.rs new file mode 100644 index 000000000000..26df13f5772b --- /dev/null +++ b/tvix/nix-compat/src/derivation/parse_error.rs @@ -0,0 +1,81 @@ +//! This contains error and result types that can happen while parsing +//! Derivations from ATerm. +use nom::IResult; + +use crate::nixhash; + +pub type NomResult<I, O> = IResult<I, O, NomError<I>>; + +#[derive(Debug, thiserror::Error, PartialEq)] +pub enum ErrorKind { + /// duplicate key in map + #[error("duplicate map key: {0}")] + DuplicateMapKey(String), + + /// Input derivation has two outputs with the same name + #[error("duplicate output name {1} for input derivation {0}")] + DuplicateInputDerivationOutputName(String, String), + + #[error("duplicate input source: {0}")] + DuplicateInputSource(String), + + #[error("nix hash error: {0}")] + NixHashError(nixhash::Error), + + #[error("nom error: {0:?}")] + Nom(nom::error::ErrorKind), +} + +/// Our own error type to pass along parser-related errors. +#[derive(Debug, PartialEq)] +pub struct NomError<I> { + /// position of the error in the input data + pub input: I, + /// error code + pub code: ErrorKind, +} + +impl<I, E> nom::error::FromExternalError<I, E> for NomError<I> { + fn from_external_error(input: I, kind: nom::error::ErrorKind, _e: E) -> Self { + Self { + input, + code: ErrorKind::Nom(kind), + } + } +} + +impl<I> nom::error::ParseError<I> for NomError<I> { + fn from_error_kind(input: I, kind: nom::error::ErrorKind) -> Self { + Self { + input, + code: ErrorKind::Nom(kind), + } + } + + // FUTUREWORK: implement, so we have support for backtracking through the + // parse tree? + fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self { + other + } +} + +/// This wraps a [nom::error::Error] into our error. +impl<I> From<nom::error::Error<I>> for NomError<I> { + fn from(value: nom::error::Error<I>) -> Self { + Self { + input: value.input, + code: ErrorKind::Nom(value.code), + } + } +} + +/// This essentially implements +/// `From<nom::Err<nom::error::Error<I>>>` for `nom::Err<NomError<I>>`, +/// which we can't because `nom::Err<_>` is a foreign type. +pub(crate) fn into_nomerror<I>(e: nom::Err<nom::error::Error<I>>) -> nom::Err<NomError<I>> { + match e { + nom::Err::Incomplete(n) => nom::Err::Incomplete(n), + nom::Err::Error(e) => nom::Err::Error(e.into()), + nom::Err::Failure(e) => nom::Err::Failure(e.into()), + } +} diff --git a/tvix/nix-compat/src/derivation/parser.rs b/tvix/nix-compat/src/derivation/parser.rs new file mode 100644 index 000000000000..7ffa6fd46eb6 --- /dev/null +++ b/tvix/nix-compat/src/derivation/parser.rs @@ -0,0 +1,510 @@ +//! This module constructs a [Derivation] by parsing its [ATerm][] +//! serialization. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html + +use bstr::BString; +use nom::bytes::complete::tag; +use nom::character::complete::char as nomchar; +use nom::combinator::{all_consuming, map_res}; +use nom::multi::{separated_list0, separated_list1}; +use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; +use std::collections::{btree_map, BTreeMap, BTreeSet}; +use thiserror; + +use crate::derivation::parse_error::{into_nomerror, ErrorKind, NomError, NomResult}; +use crate::derivation::{write, CAHash, Derivation, Output}; +use crate::{aterm, nixhash}; + +#[derive(Debug, thiserror::Error)] +pub enum Error<I> { + #[error("parsing error: {0}")] + Parser(NomError<I>), + #[error("premature EOF")] + Incomplete, + #[error("validation error: {0}")] + Validation(super::DerivationError), +} + +pub(crate) fn parse(i: &[u8]) -> Result<Derivation, Error<&[u8]>> { + match all_consuming(parse_derivation)(i) { + Ok((rest, derivation)) => { + // this shouldn't happen, as all_consuming shouldn't return. + debug_assert!(rest.is_empty()); + + // invoke validate + derivation.validate(true).map_err(Error::Validation)?; + + Ok(derivation) + } + Err(nom::Err::Incomplete(_)) => Err(Error::Incomplete), + Err(nom::Err::Error(e) | nom::Err::Failure(e)) => Err(Error::Parser(e)), + } +} + +/// Consume a string containing the algo, and optionally a `r:` +/// prefix, and a digest (bytes), return a [CAHash::Nar] or [CAHash::Flat]. +fn from_algo_and_mode_and_digest<B: AsRef<[u8]>>( + algo_and_mode: &str, + digest: B, +) -> crate::nixhash::Result<CAHash> { + Ok(match algo_and_mode.strip_prefix("r:") { + Some(algo) => nixhash::CAHash::Nar(nixhash::from_algo_and_digest( + algo.try_into()?, + digest.as_ref(), + )?), + None => nixhash::CAHash::Flat(nixhash::from_algo_and_digest( + algo_and_mode.try_into()?, + digest.as_ref(), + )?), + }) +} + +/// Parse one output in ATerm. This is 4 string fields inside parans: +/// output name, output path, algo (and mode), digest. +/// Returns the output name and [Output] struct. +fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> { + delimited( + nomchar('('), + map_res( + |i| { + tuple(( + terminated(aterm::parse_string_field, nomchar(',')), + terminated(aterm::parse_string_field, nomchar(',')), + terminated(aterm::parse_string_field, nomchar(',')), + aterm::parse_bstr_field, + ))(i) + .map_err(into_nomerror) + }, + |(output_name, output_path, algo_and_mode, encoded_digest)| { + // convert these 4 fields into an [Output]. + let ca_hash_res = { + if algo_and_mode.is_empty() && encoded_digest.is_empty() { + None + } else { + match data_encoding::HEXLOWER.decode(&encoded_digest) { + Ok(digest) => { + Some(from_algo_and_mode_and_digest(&algo_and_mode, digest)) + } + Err(e) => Some(Err(nixhash::Error::InvalidBase64Encoding(e))), + } + } + } + .transpose(); + + match ca_hash_res { + Ok(hash_with_mode) => Ok(( + output_name, + Output { + path: output_path, + ca_hash: hash_with_mode, + }, + )), + Err(e) => Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::NixHashError(e), + })), + } + }, + ), + nomchar(')'), + )(i) +} + +/// Parse multiple outputs in ATerm. This is a list of things acccepted by +/// parse_output, and takes care of turning the (String, Output) returned from +/// it to a BTreeMap. +/// We don't use parse_kv here, as it's dealing with 2-tuples, and these are +/// 4-tuples. +fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap<String, Output>> { + let res = delimited( + nomchar('['), + separated_list1(tag(","), parse_output), + nomchar(']'), + )(i); + + match res { + Ok((rst, outputs_lst)) => { + let mut outputs: BTreeMap<String, Output> = BTreeMap::default(); + for (output_name, output) in outputs_lst.into_iter() { + if outputs.contains_key(&output_name) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateMapKey(output_name), + })); + } + outputs.insert(output_name, output); + } + Ok((rst, outputs)) + } + // pass regular parse errors along + Err(e) => Err(e), + } +} + +fn parse_input_derivations(i: &[u8]) -> NomResult<&[u8], BTreeMap<String, BTreeSet<String>>> { + let (i, input_derivations_list) = parse_kv::<Vec<String>, _>(aterm::parse_str_list)(i)?; + + // This is a HashMap of drv paths to a list of output names. + let mut input_derivations: BTreeMap<String, BTreeSet<String>> = BTreeMap::new(); + + for (input_derivation, output_names) in input_derivations_list { + let mut new_output_names = BTreeSet::new(); + for output_name in output_names.into_iter() { + if new_output_names.contains(&output_name) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateInputDerivationOutputName( + input_derivation.to_string(), + output_name.to_string(), + ), + })); + } else { + new_output_names.insert(output_name); + } + } + input_derivations.insert(input_derivation, new_output_names); + } + + Ok((i, input_derivations)) +} + +fn parse_input_sources(i: &[u8]) -> NomResult<&[u8], BTreeSet<String>> { + let (i, input_sources_lst) = aterm::parse_str_list(i).map_err(into_nomerror)?; + + let mut input_sources: BTreeSet<_> = BTreeSet::new(); + for input_source in input_sources_lst.into_iter() { + if input_sources.contains(&input_source) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateInputSource(input_source), + })); + } else { + input_sources.insert(input_source); + } + } + + Ok((i, input_sources)) +} + +pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> { + use nom::Parser; + preceded( + tag(write::DERIVATION_PREFIX), + delimited( + // inside parens + nomchar('('), + // tuple requires all errors to be of the same type, so we need to be a + // bit verbose here wrapping generic IResult into [NomATermResult]. + tuple(( + // parse outputs + terminated(parse_outputs, nomchar(',')), + // // parse input derivations + terminated(parse_input_derivations, nomchar(',')), + // // parse input sources + terminated(parse_input_sources, nomchar(',')), + // // parse system + |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror), + // // parse builder + |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror), + // // parse arguments + |i| terminated(aterm::parse_str_list, nomchar(','))(i).map_err(into_nomerror), + // parse environment + parse_kv::<BString, _>(aterm::parse_bstr_field), + )), + nomchar(')'), + ) + .map( + |( + outputs, + input_derivations, + input_sources, + system, + builder, + arguments, + environment, + )| { + Derivation { + arguments, + builder, + environment, + input_derivations, + input_sources, + outputs, + system, + } + }, + ), + )(i) +} + +/// Parse a list of key/value pairs into a BTreeMap. +/// The parser for the values can be passed in. +/// In terms of ATerm, this is just a 2-tuple, +/// but we have the additional restriction that the first element needs to be +/// unique across all tuples. +pub(crate) fn parse_kv<'a, V, VF>( + vf: VF, +) -> impl FnMut(&'a [u8]) -> NomResult<&'a [u8], BTreeMap<String, V>> + 'static +where + VF: FnMut(&'a [u8]) -> nom::IResult<&'a [u8], V, nom::error::Error<&'a [u8]>> + Clone + 'static, +{ + move |i| + // inside brackets + delimited( + nomchar('['), + |ii| { + let res = separated_list0( + nomchar(','), + // inside parens + delimited( + nomchar('('), + separated_pair( + aterm::parse_string_field, + nomchar(','), + vf.clone(), + ), + nomchar(')'), + ), + )(ii).map_err(into_nomerror); + + match res { + Ok((rest, pairs)) => { + let mut kvs: BTreeMap<String, V> = BTreeMap::new(); + for (k, v) in pairs.into_iter() { + // collect the 2-tuple to a BTreeMap, + // and fail if the key was already seen before. + match kvs.entry(k) { + btree_map::Entry::Vacant(e) => { e.insert(v); }, + btree_map::Entry::Occupied(e) => { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateMapKey(e.key().clone()), + })); + } + } + } + Ok((rest, kvs)) + } + Err(e) => Err(e), + } + }, + nomchar(']'), + )(i) +} + +#[cfg(test)] +mod tests { + use std::collections::{BTreeMap, BTreeSet}; + + use crate::derivation::{ + parse_error::ErrorKind, parser::from_algo_and_mode_and_digest, CAHash, NixHash, Output, + }; + use bstr::{BString, ByteSlice}; + use hex_literal::hex; + use lazy_static::lazy_static; + use test_case::test_case; + const DIGEST_SHA256: [u8; 32] = + hex!("a5ce9c155ed09397614646c9717fc7cd94b1023d7b76b618d409e4fefd6e9d39"); + + lazy_static! { + pub static ref NIXHASH_SHA256: NixHash = NixHash::Sha256(DIGEST_SHA256); + static ref EXP_MULTI_OUTPUTS: BTreeMap<String, Output> = { + let mut b = BTreeMap::new(); + b.insert( + "lib".to_string(), + Output { + path: "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib" + .to_string(), + ca_hash: None, + }, + ); + b.insert( + "out".to_string(), + Output { + path: "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".to_string(), + ca_hash: None, + }, + ); + b + }; + static ref EXP_AB_MAP: BTreeMap<String, BString> = { + let mut b = BTreeMap::new(); + b.insert("a".to_string(), b"1".as_bstr().to_owned()); + b.insert("b".to_string(), b"2".as_bstr().to_owned()); + b + }; + static ref EXP_INPUT_DERIVATIONS_SIMPLE: BTreeMap<String, BTreeSet<String>> = { + let mut b = BTreeMap::new(); + b.insert( + "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv".to_string(), + { + let mut output_names = BTreeSet::new(); + output_names.insert("out".to_string()); + output_names + }, + ); + b.insert( + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv".to_string(), + { + let mut output_names = BTreeSet::new(); + output_names.insert("out".to_string()); + output_names.insert("lib".to_string()); + output_names + }, + ); + b + }; + static ref EXP_INPUT_DERIVATIONS_SIMPLE_ATERM: String = { + format!( + "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"lib\"])]", + "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv", + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv" + ) + }; + static ref EXP_INPUT_SOURCES_SIMPLE: BTreeSet<String> = { + let mut b = BTreeSet::new(); + b.insert("/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".to_string()); + b.insert("/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib".to_string()); + b + }; + } + + /// Ensure parsing KVs works + #[test_case(b"[]", &BTreeMap::new(), b""; "empty")] + #[test_case(b"[(\"a\",\"1\"),(\"b\",\"2\")]", &EXP_AB_MAP, b""; "simple")] + fn parse_kv(input: &'static [u8], expected: &BTreeMap<String, BString>, exp_rest: &[u8]) { + let (rest, parsed) = super::parse_kv::<BString, _>(crate::aterm::parse_bstr_field)(input) + .expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(*expected, parsed); + } + + /// Ensures the kv parser complains about duplicate map keys + #[test] + fn parse_kv_fail_dup_keys() { + let input: &'static [u8] = b"[(\"a\",\"1\"),(\"a\",\"2\")]"; + let e = super::parse_kv::<BString, _>(crate::aterm::parse_bstr_field)(input) + .expect_err("must fail"); + + match e { + nom::Err::Failure(e) => { + assert_eq!(ErrorKind::DuplicateMapKey("a".to_string()), e.code); + } + _ => panic!("unexpected error"), + } + } + + /// Ensure parsing input derivations works. + #[test_case(b"[]", &BTreeMap::new(); "empty")] + #[test_case(EXP_INPUT_DERIVATIONS_SIMPLE_ATERM.as_bytes(), &EXP_INPUT_DERIVATIONS_SIMPLE; "simple")] + fn parse_input_derivations( + input: &'static [u8], + expected: &BTreeMap<String, BTreeSet<String>>, + ) { + let (rest, parsed) = super::parse_input_derivations(input).expect("must parse"); + + assert_eq!(expected, &parsed, "parsed mismatch"); + assert!(rest.is_empty(), "rest must be empty"); + } + + /// Ensures the input derivation parser complains about duplicate output names + #[test] + fn parse_input_derivations_fail_dup_output_names() { + let input_str = format!( + "[(\"{0}\",[\"out\"]),(\"{1}\",[\"out\",\"out\"])]", + "/nix/store/8bjm87p310sb7r2r0sg4xrynlvg86j8k-hello-2.12.1.tar.gz.drv", + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv" + ); + let e = super::parse_input_derivations(input_str.as_bytes()).expect_err("must fail"); + + match e { + nom::Err::Failure(e) => { + assert_eq!( + ErrorKind::DuplicateInputDerivationOutputName( + "/nix/store/p3jc8aw45dza6h52v81j7lk69khckmcj-bash-5.2-p15.drv".to_string(), + "out".to_string() + ), + e.code + ); + } + _ => panic!("unexpected error"), + } + } + + /// Ensure parsing input sources works + #[test_case(b"[]", &BTreeSet::new(); "empty")] + #[test_case(b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out\",\"/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib\"]", &EXP_INPUT_SOURCES_SIMPLE; "simple")] + fn parse_input_sources(input: &'static [u8], expected: &BTreeSet<String>) { + let (rest, parsed) = super::parse_input_sources(input).expect("must parse"); + + assert_eq!(expected, &parsed, "parsed mismatch"); + assert!(rest.is_empty(), "rest must be empty"); + } + + /// Ensures the input sources parser complains about duplicate input sources + #[test] + fn parse_input_sources_fail_dup_keys() { + let input: &'static [u8] = b"[\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\",\"/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo\"]"; + let e = super::parse_input_sources(input).expect_err("must fail"); + + match e { + nom::Err::Failure(e) => { + assert_eq!( + ErrorKind::DuplicateInputSource( + "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-foo".to_string() + ), + e.code + ); + } + _ => panic!("unexpected error"), + } + } + + #[test_case( + br#"("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")"#, + ("out".to_string(), Output { + path: "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo".to_string(), + ca_hash: None + }); "simple" + )] + #[test_case( + br#"("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")"#, + ("out".to_string(), Output { + path: "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar".to_string(), + ca_hash: Some(from_algo_and_mode_and_digest("r:sha256", + data_encoding::HEXLOWER.decode(b"08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba").unwrap() ).unwrap()), + }); "fod" + )] + fn parse_output(input: &[u8], expected: (String, Output)) { + let (rest, parsed) = super::parse_output(input).expect("must parse"); + assert!(rest.is_empty()); + assert_eq!(expected, parsed); + } + + #[test_case( + br#"[("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")]"#, + &EXP_MULTI_OUTPUTS; + "multi-out" + )] + fn parse_outputs(input: &[u8], expected: &BTreeMap<String, Output>) { + let (rest, parsed) = super::parse_outputs(input).expect("must parse"); + assert!(rest.is_empty()); + assert_eq!(*expected, parsed); + } + + #[test_case("sha256", &DIGEST_SHA256, CAHash::Flat(NIXHASH_SHA256.clone()); "sha256 flat")] + #[test_case("r:sha256", &DIGEST_SHA256, CAHash::Nar(NIXHASH_SHA256.clone()); "sha256 recursive")] + fn test_from_algo_and_mode_and_digest(algo_and_mode: &str, digest: &[u8], expected: CAHash) { + assert_eq!( + expected, + from_algo_and_mode_and_digest(algo_and_mode, digest).unwrap() + ); + } + + #[test] + fn from_algo_and_mode_and_digest_failure() { + assert!(from_algo_and_mode_and_digest("r:sha256", []).is_err()); + assert!(from_algo_and_mode_and_digest("ha256", DIGEST_SHA256).is_err()); + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv new file mode 100644 index 000000000000..072561a29e3a --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")],[("/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("builder",":"),("name","foo"),("name","bar"),("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv new file mode 100644 index 000000000000..a4fea3c5f486 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")],[],[],":",":",[],[("builder",":"),("name","bar"),("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("outputHash","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"),("outputHashAlgo","sha256"),("outputHashMode","recursive"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json new file mode 100644 index 000000000000..c8bbc4cbb5be --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "name": "bar", + "out": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar", + "outputHash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "outputHashAlgo": "sha256", + "outputHashMode": "recursive", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256", + "path": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv new file mode 100644 index 000000000000..f0d9230a5a52 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json","","")],[],[],":",":",[],[("builder",":"),("json","{\"hello\":\"moto\\n\"}"),("name","nested-json"),("out","/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json new file mode 100644 index 000000000000..9cb0b43b4c09 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/292w8yzv5nn7nhdpxcs8b7vby2p27s09-nested-json.drv.json @@ -0,0 +1,19 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "json": "{\"hello\":\"moto\\n\"}", + "name": "nested-json", + "out": "/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/pzr7lsd3q9pqsnb42r9b23jc5sh8irvn-nested-json" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv new file mode 100644 index 000000000000..a2cf9d31f92e --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")],[("/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("builder",":"),("name","foo"),("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json new file mode 100644 index 000000000000..957a85ccab82 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "bar": "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar", + "builder": ":", + "name": "foo", + "out": "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo", + "system": ":" + }, + "inputDrvs": { + "/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv": [ + "out" + ] + }, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv new file mode 100644 index 000000000000..bbe88c02c739 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode","","")],[],[],":",":",[],[("builder",":"),("letters","räksmörgÃ¥s\nrødgrød med fløde\nLübeck\n肥猪\nã“ã‚“ã«ã¡ã¯ / 今日ã¯\n🌮\n"),("name","unicode"),("out","/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json new file mode 100644 index 000000000000..f8f33c1bba17 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv.json @@ -0,0 +1,19 @@ +{ + "outputs": { + "out": { + "path": "/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "letters": "räksmörgÃ¥s\nrødgrød med fløde\nLübeck\n肥猪\nã“ã‚“ã«ã¡ã¯ / 今日ã¯\n🌮\n", + "name": "unicode", + "out": "/nix/store/vgvdj6nf7s8kvfbl2skbpwz9kc7xjazc-unicode", + "system": ":" + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv new file mode 100644 index 000000000000..4b9338c0b953 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs","","")],[],[],":",":",[],[("__json","{\"builder\":\":\",\"name\":\"structured-attrs\",\"system\":\":\"}"),("out","/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json new file mode 100644 index 000000000000..74e3d7df55c5 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv.json @@ -0,0 +1,16 @@ +{ + "args": [], + "builder": ":", + "env": { + "__json": "{\"builder\":\":\",\"name\":\"structured-attrs\",\"system\":\":\"}", + "out": "/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/6a39dl014j57bqka7qx25k0vb20vkqm6-structured-attrs" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv new file mode 100644 index 000000000000..1699c2a75e48 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo","","")],[("/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"),("builder",":"),("name","foo"),("out","/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json new file mode 100644 index 000000000000..831d27956d86 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "bar": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar", + "builder": ":", + "name": "foo", + "out": "/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo", + "system": ":" + }, + "inputDrvs": { + "/nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv": [ + "out" + ] + }, + "inputSrcs": [], + "outputs": { + "out": { + "path": "/nix/store/fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv new file mode 100644 index 000000000000..523612238c76 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv @@ -0,0 +1 @@ +Derive([("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")],[],[],":",":",[],[("builder",":"),("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib"),("name","has-multi-out"),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out"),("outputs","out lib"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json new file mode 100644 index 000000000000..0bd7a2991cc7 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "lib": "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib", + "name": "has-multi-out", + "out": "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out", + "outputs": "out lib", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "lib": { + "path": "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib" + }, + "out": { + "path": "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv new file mode 100644 index 000000000000..6a7a35c58c3f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252","","")],[],[],":",":",[],[("builder",":"),("chars","ÅÄÖ"),("name","cp1252"),("out","/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json new file mode 100644 index 000000000000..9d6ba8b7977f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv": { + "outputs": { + "out": { + "path": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "ÅÄÖ", + "name": "cp1252", + "out": "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv new file mode 100644 index 000000000000..559e93ed0ed6 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar","r:sha1","0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33")],[],[],":",":",[],[("builder",":"),("name","bar"),("out","/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar"),("outputHash","0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33"),("outputHashAlgo","sha1"),("outputHashMode","recursive"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json new file mode 100644 index 000000000000..e297d271592f --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv.json @@ -0,0 +1,23 @@ +{ + "args": [], + "builder": ":", + "env": { + "builder": ":", + "name": "bar", + "out": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar", + "outputHash": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", + "outputHashAlgo": "sha1", + "outputHashMode": "recursive", + "system": ":" + }, + "inputDrvs": {}, + "inputSrcs": [], + "outputs": { + "out": { + "hash": "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33", + "hashAlgo": "r:sha1", + "path": "/nix/store/mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar" + } + }, + "system": ":" +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv new file mode 100644 index 000000000000..b19fd8eb2ce4 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1","","")],[],[],":",":",[],[("builder",":"),("chars","ÅÄÖ"),("name","latin1"),("out","/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json new file mode 100644 index 000000000000..ffd5c08da830 --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/ok/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv.json @@ -0,0 +1,21 @@ +{ + "/nix/store/x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv": { + "outputs": { + "out": { + "path": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1" + } + }, + "inputSrcs": [], + "inputDrvs": {}, + "system": ":", + "builder": ":", + "args": [], + "env": { + "builder": ":", + "chars": "ÅÄÖ", + "name": "latin1", + "out": "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1", + "system": ":" + } + } +} diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs new file mode 100644 index 000000000000..168e11d46f1e --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -0,0 +1,416 @@ +use super::parse_error::ErrorKind; +use crate::derivation::output::Output; +use crate::derivation::parse_error::NomError; +use crate::derivation::parser::Error; +use crate::derivation::Derivation; +use crate::store_path::StorePath; +use bstr::{BStr, BString}; +use std::collections::BTreeSet; +use std::fs::File; +use std::io::Read; +use std::path::Path; +use std::str::FromStr; +use test_case::test_case; +use test_generator::test_resources; + +const RESOURCES_PATHS: &str = "src/derivation/tests/derivation_tests"; + +fn read_file(path: &str) -> BString { + let path = Path::new(path); + let mut file = File::open(path).unwrap(); + let mut file_contents = Vec::new(); + + file.read_to_end(&mut file_contents).unwrap(); + + file_contents.into() +} + +#[test_resources("src/derivation/tests/derivation_tests/ok/*.drv")] +fn check_serialization(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + let mut serialized_derivation = Vec::new(); + derivation.serialize(&mut serialized_derivation).unwrap(); + + let expected = read_file(path_to_drv_file); + + assert_eq!(expected, BStr::new(&serialized_derivation)); +} + +#[test_resources("src/derivation/tests/derivation_tests/ok/*.drv")] +fn validate(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + derivation + .validate(true) + .expect("derivation failed to validate") +} + +#[test_resources("src/derivation/tests/derivation_tests/ok/*.drv")] +fn check_to_aterm_bytes(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + let expected = read_file(path_to_drv_file); + + assert_eq!(expected, BStr::new(&derivation.to_aterm_bytes())); +} + +/// Reads in derivations in ATerm representation, parses with that parser, +/// then compares the structs with the ones obtained by parsing the JSON +/// representations. +#[test_resources("src/derivation/tests/derivation_tests/ok/*.drv")] +fn from_aterm_bytes(path_to_drv_file: &str) { + // Read in ATerm representation. + let aterm_bytes = read_file(path_to_drv_file); + let parsed_drv = Derivation::from_aterm_bytes(&aterm_bytes).expect("must succeed"); + + // For where we're able to load JSON fixtures, parse them and compare the structs. + // For where we're not, compare the bytes manually. + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + assert_eq!( + &[0xc5, 0xc4, 0xd6][..], + parsed_drv.environment.get("chars").unwrap(), + "expected bytes to match", + ); + } else { + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let fixture_derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + assert_eq!(fixture_derivation, parsed_drv); + } + + // Finally, write the ATerm serialization to another buffer, ensuring it's + // stable (and we compare all fields we couldn't compare in the non-utf8 + // derivations) + + assert_eq!( + &aterm_bytes, + &parsed_drv.to_aterm_bytes(), + "expected serialized ATerm to match initial input" + ); +} + +#[test] +fn from_aterm_bytes_duplicate_map_key() { + let buf: Vec<u8> = read_file(&format!("{}/{}", RESOURCES_PATHS, "duplicate.drv")).into(); + + let err = Derivation::from_aterm_bytes(&buf).expect_err("must fail"); + + match err { + Error::Parser(NomError { input: _, code }) => { + assert_eq!(code, ErrorKind::DuplicateMapKey("name".to_string())); + } + _ => { + panic!("unexpected error"); + } + } +} + +/// Read in a derivation in ATerm, but add some garbage at the end. +/// Ensure the parser detects and fails in this case. +#[test] +fn from_aterm_bytes_trailer() { + let mut buf: Vec<u8> = read_file(&format!( + "{}/ok/{}", + RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" + )) + .into(); + + buf.push(0x00); + + Derivation::from_aterm_bytes(&buf).expect_err("must fail"); +} + +#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] +#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")] +#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")] +#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")] +#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")] +#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")] +#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] +fn derivation_path(name: &str, expected_path: &str) { + let json_bytes = read_file(&format!("{}/ok/{}.json", RESOURCES_PATHS, expected_path)); + let derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + assert_eq!( + derivation.calculate_derivation_path(name).unwrap(), + StorePath::from_str(expected_path).unwrap() + ); +} + +/// This trims all output paths from a Derivation struct, +/// by setting outputs[$outputName].path and environment[$outputName] to the empty string. +fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { + let mut trimmed_env = derivation.environment.clone(); + let mut trimmed_outputs = derivation.outputs.clone(); + + for (output_name, output) in &derivation.outputs { + trimmed_env.insert(output_name.clone(), "".into()); + assert!(trimmed_outputs.contains_key(output_name)); + trimmed_outputs.insert( + output_name.to_string(), + Output { + path: "".to_string(), + ..output.clone() + }, + ); + } + + // replace environment and outputs with the trimmed variants + Derivation { + environment: trimmed_env, + outputs: trimmed_outputs, + ..derivation.clone() + } +} + +#[test_case("0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv", "sha256:724f3e3634fce4cbbbd3483287b8798588e80280660b9a63fd13a1bc90485b33"; "fixed_sha256")] +#[test_case("ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv", "sha256:c79aebd0ce3269393d4a1fde2cbd1d975d879b40f0bf40a48f550edc107fd5df";"fixed-sha1")] +fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) { + // read in the fixture + let json_bytes = read_file(&format!("{}/ok/{}.json", RESOURCES_PATHS, drv_path)); + let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); + + let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); + + assert_eq!(expected_nix_hash_string, actual.to_nix_hex_string()); +} + +/// This reads a Derivation (in A-Term), trims out all fields containing +/// calculated output paths, then triggers the output path calculation and +/// compares the struct to match what was originally read in. +#[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] +#[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")] +#[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")] +#[test_case("foo", "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv"; "simple-sha1")] +#[test_case("has-multi-out", "h32dahq0bx5rp1krcdx3a53asj21jvhk-has-multi-out.drv"; "multiple-outputs")] +#[test_case("structured-attrs", "9lj1lkjm2ag622mh4h9rpy6j607an8g2-structured-attrs.drv"; "structured-attrs")] +#[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] +#[test_case("cp1252", "m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv"; "cp1252")] +#[test_case("latin1", "x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv"; "latin1")] +fn output_paths(name: &str, drv_path_str: &str) { + // read in the derivation + let expected_derivation = Derivation::from_aterm_bytes( + read_file(&format!("{}/ok/{}", RESOURCES_PATHS, drv_path_str)).as_ref(), + ) + .expect("must succeed"); + + // create a version with trimmed output paths, simulating we constructed + // the struct. + let mut derivation = derivation_with_trimmed_output_paths(&expected_derivation); + + // calculate the derivation_or_fod_hash of derivation + // We don't expect the lookup function to be called for most derivations. + let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|parent_drv_path| { + // 4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv may lookup /nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv + // ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv may lookup /nix/store/ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv + if name == "foo" + && ((drv_path_str == "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv" + && parent_drv_path.to_string() == "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv") + || (drv_path_str == "ch49594n9avinrf8ip0aslidkc4lxkqv-foo.drv" + && parent_drv_path.to_string() == "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv")) + { + // do the lookup, by reading in the fixture of the requested + // drv_name, and calculating its drv replacement (on the non-stripped version) + // In a real-world scenario you would have already done this during construction. + + let json_bytes = read_file(&format!( + "{}/ok/{}.json", + RESOURCES_PATHS, + Path::new(&parent_drv_path.to_string()) + .file_name() + .unwrap() + .to_string_lossy() + )); + + let drv: Derivation = serde_json::from_slice(&json_bytes).expect("must deserialize"); + + // calculate derivation_or_fod_hash for each parent. + // This may not trigger subsequent requests, as both parents are FOD. + drv.derivation_or_fod_hash(|_| panic!("must not lookup")) + } else { + // we only expect this to be called in the "foo" testcase, for the "bar derivations" + panic!("may only be called for foo testcase on bar derivations"); + } + }); + + derivation + .calculate_output_paths(name, &calculated_derivation_or_fod_hash) + .unwrap(); + + // The derivation should now look like it was before + assert_eq!(expected_derivation, derivation); +} + +/// Exercises the output path calculation functions like a constructing client +/// (an implementation of builtins.derivation) would do: +/// +/// ```nix +/// rec { +/// bar = builtins.derivation { +/// name = "bar"; +/// builder = ":"; +/// system = ":"; +/// outputHash = "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba"; +/// outputHashAlgo = "sha256"; +/// outputHashMode = "recursive"; +/// }; +/// +/// foo = builtins.derivation { +/// name = "foo"; +/// builder = ":"; +/// system = ":"; +/// inherit bar; +/// }; +/// } +/// ``` +/// It first assembles the bar derivation, does the output path calculation on +/// it, then continues with the foo derivation. +/// +/// The code ensures the resulting Derivations match our fixtures. +#[test] +fn output_path_construction() { + // create the bar derivation + let mut bar_drv = Derivation { + builder: ":".to_string(), + system: ":".to_string(), + ..Default::default() + }; + + // assemble bar env + let bar_env = &mut bar_drv.environment; + bar_env.insert("builder".to_string(), ":".into()); + bar_env.insert("name".to_string(), "bar".into()); + bar_env.insert("out".to_string(), "".into()); // will be calculated + bar_env.insert( + "outputHash".to_string(), + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba".into(), + ); + bar_env.insert("outputHashAlgo".to_string(), "sha256".into()); + bar_env.insert("outputHashMode".to_string(), "recursive".into()); + bar_env.insert("system".to_string(), ":".into()); + + // assemble bar outputs + bar_drv.outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), // will be calculated + ca_hash: Some(crate::nixhash::CAHash::Nar( + crate::nixhash::from_algo_and_digest( + crate::nixhash::HashAlgo::Sha256, + &data_encoding::HEXLOWER + .decode( + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba" + .as_bytes(), + ) + .unwrap(), + ) + .unwrap(), + )), + }, + ); + + // calculate bar output paths + let bar_calc_result = bar_drv.calculate_output_paths( + "bar", + &bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")), + ); + assert!(bar_calc_result.is_ok()); + + // ensure it matches our bar fixture + let bar_json_bytes = read_file(&format!( + "{}/ok/{}.json", + RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" + )); + let bar_drv_expected: Derivation = + serde_json::from_slice(&bar_json_bytes).expect("must deserialize"); + assert_eq!(bar_drv_expected, bar_drv); + + // now construct foo, which requires bar_drv + // Note how we refer to the output path, drv name and replacement_str (with calculated output paths) of bar. + let bar_output_path = &bar_drv.outputs.get("out").expect("must exist").path; + let bar_drv_derivation_or_fod_hash = + bar_drv.derivation_or_fod_hash(|_| panic!("is FOD, should not lookup")); + + let bar_drv_path = bar_drv + .calculate_derivation_path("bar") + .expect("must succeed"); + + // create foo derivation + let mut foo_drv = Derivation { + builder: ":".to_string(), + system: ":".to_string(), + ..Default::default() + }; + + // assemble foo env + let foo_env = &mut foo_drv.environment; + foo_env.insert("bar".to_string(), bar_output_path.to_owned().into()); + foo_env.insert("builder".to_string(), ":".into()); + foo_env.insert("name".to_string(), "foo".into()); + foo_env.insert("out".to_string(), "".into()); // will be calculated + foo_env.insert("system".to_string(), ":".into()); + + // asssemble foo outputs + foo_drv.outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), // will be calculated + ca_hash: None, + }, + ); + + // assemble foo input_derivations + foo_drv.input_derivations.insert( + bar_drv_path.to_absolute_path(), + BTreeSet::from(["out".to_string()]), + ); + + // calculate foo output paths + let foo_calc_result = foo_drv.calculate_output_paths( + "foo", + &foo_drv.derivation_or_fod_hash(|drv_path| { + if drv_path.to_string() != "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" { + panic!("lookup called with unexpected drv_path: {}", drv_path); + } + bar_drv_derivation_or_fod_hash.clone() + }), + ); + assert!(foo_calc_result.is_ok()); + + // ensure it matches our foo fixture + let foo_json_bytes = read_file(&format!( + "{}/ok/{}.json", + RESOURCES_PATHS, "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv", + )); + let foo_drv_expected: Derivation = + serde_json::from_slice(&foo_json_bytes).expect("must deserialize"); + assert_eq!(foo_drv_expected, foo_drv); + + assert_eq!( + StorePath::from_str("4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv").expect("must succeed"), + foo_drv + .calculate_derivation_path("foo") + .expect("must succeed") + ); +} diff --git a/tvix/nix-compat/src/derivation/validate.rs b/tvix/nix-compat/src/derivation/validate.rs new file mode 100644 index 000000000000..d711f5ce1de2 --- /dev/null +++ b/tvix/nix-compat/src/derivation/validate.rs @@ -0,0 +1,158 @@ +use crate::derivation::{Derivation, DerivationError}; +use crate::store_path::{self, StorePathRef}; + +impl Derivation { + /// validate ensures a Derivation struct is properly populated, + /// and returns a [DerivationError] if not. + /// + /// if `validate_output_paths` is set to false, the output paths are + /// excluded from validation. + /// + /// This is helpful to validate struct population before invoking + /// [Derivation::calculate_output_paths]. + pub fn validate(&self, validate_output_paths: bool) -> Result<(), DerivationError> { + // Ensure the number of outputs is > 1 + if self.outputs.is_empty() { + return Err(DerivationError::NoOutputs()); + } + + // Validate all outputs + for (output_name, output) in &self.outputs { + // empty output names are invalid. + // + // `drv` is an invalid output name too, as this would cause + // a `builtins.derivation` call to return an attrset with a + // `drvPath` key (which already exists) and has a different + // meaning. + // + // Other output names that don't match the name restrictions from + // [StorePathRef] will fail the [StorePathRef::validate_name] check. + if output_name.is_empty() + || output_name == "drv" + || store_path::validate_name(output_name.as_bytes()).is_err() + { + return Err(DerivationError::InvalidOutputName(output_name.to_string())); + } + + if output.is_fixed() { + if self.outputs.len() != 1 { + return Err(DerivationError::MoreThanOneOutputButFixed()); + } + if output_name != "out" { + return Err(DerivationError::InvalidOutputNameForFixed( + output_name.to_string(), + )); + } + } + + if let Err(e) = output.validate(validate_output_paths) { + return Err(DerivationError::InvalidOutput(output_name.to_string(), e)); + } + } + + // Validate all input_derivations + for (input_derivation_path, output_names) in &self.input_derivations { + // Validate input_derivation_path + if let Err(e) = StorePathRef::from_absolute_path(input_derivation_path.as_bytes()) { + return Err(DerivationError::InvalidInputDerivationPath( + input_derivation_path.to_string(), + e, + )); + } + + if !input_derivation_path.ends_with(".drv") { + return Err(DerivationError::InvalidInputDerivationPrefix( + input_derivation_path.to_string(), + )); + } + + if output_names.is_empty() { + return Err(DerivationError::EmptyInputDerivationOutputNames( + input_derivation_path.to_string(), + )); + } + + for output_name in output_names.iter() { + // empty output names are invalid. + // + // `drv` is an invalid output name too, as this would cause + // a `builtins.derivation` call to return an attrset with a + // `drvPath` key (which already exists) and has a different + // meaning. + // + // Other output names that don't match the name restrictions from + // [StorePath] will fail the [StorePathRef::validate_name] check. + if output_name.is_empty() + || output_name == "drv" + || store_path::validate_name(output_name.as_bytes()).is_err() + { + return Err(DerivationError::InvalidInputDerivationOutputName( + input_derivation_path.to_string(), + output_name.to_string(), + )); + } + } + } + + // Validate all input_sources + for input_source in self.input_sources.iter() { + if let Err(e) = StorePathRef::from_absolute_path(input_source.as_bytes()) { + return Err(DerivationError::InvalidInputSourcesPath( + input_source.to_string(), + e, + )); + } + } + + // validate platform + if self.system.is_empty() { + return Err(DerivationError::InvalidPlatform(self.system.to_string())); + } + + // validate builder + if self.builder.is_empty() { + return Err(DerivationError::InvalidBuilder(self.builder.to_string())); + } + + // validate env, none of the keys may be empty. + // We skip the `name` validation seen in go-nix. + for k in self.environment.keys() { + if k.is_empty() { + return Err(DerivationError::InvalidEnvironmentKey(k.to_string())); + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::collections::BTreeMap; + + use crate::derivation::{CAHash, Derivation, Output}; + + /// Regression test: produce a Derivation that's almost valid, except its + /// fixed-output output has the wrong hash specified. + #[test] + fn output_validate() { + let mut outputs = BTreeMap::new(); + outputs.insert( + "out".to_string(), + Output { + path: "".to_string(), + ca_hash: Some(CAHash::Text([0; 32])), // This is disallowed + }, + ); + + let drv = Derivation { + arguments: vec![], + builder: "/bin/sh".to_string(), + outputs, + system: "x86_64-linux".to_string(), + ..Default::default() + }; + + drv.validate(false).expect_err("must fail"); + } +} diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs new file mode 100644 index 000000000000..0981dfccae22 --- /dev/null +++ b/tvix/nix-compat/src/derivation/write.rs @@ -0,0 +1,196 @@ +//! This module implements the serialisation of derivations into the +//! [ATerm][] format used by C++ Nix. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html + +use crate::aterm::escape_bytes; +use crate::derivation::{ca_kind_prefix, output::Output}; +use bstr::BString; +use std::{ + collections::{BTreeMap, BTreeSet}, + io, + io::Error, + io::Write, +}; + +pub const DERIVATION_PREFIX: &str = "Derive"; +pub const PAREN_OPEN: char = '('; +pub const PAREN_CLOSE: char = ')'; +pub const BRACKET_OPEN: char = '['; +pub const BRACKET_CLOSE: char = ']'; +pub const COMMA: char = ','; +pub const QUOTE: char = '"'; + +// Writes a character to the writer. +pub(crate) fn write_char(writer: &mut impl Write, c: char) -> io::Result<()> { + let mut buf = [0; 4]; + let b = c.encode_utf8(&mut buf).as_bytes(); + writer.write_all(b) +} + +// Write a string `s` as a quoted field to the writer. +// The `escape` argument controls whether escaping will be skipped. +// This is the case if `s` is known to only contain characters that need no +// escaping. +pub(crate) fn write_field<S: AsRef<[u8]>>( + writer: &mut impl Write, + s: S, + escape: bool, +) -> io::Result<()> { + write_char(writer, QUOTE)?; + + if !escape { + writer.write_all(s.as_ref())?; + } else { + writer.write_all(&escape_bytes(s.as_ref()))?; + } + + write_char(writer, QUOTE)?; + + Ok(()) +} + +fn write_array_elements<S: AsRef<[u8]>>( + writer: &mut impl Write, + elements: &[S], +) -> Result<(), io::Error> { + for (index, element) in elements.iter().enumerate() { + if index > 0 { + write_char(writer, COMMA)?; + } + + write_field(writer, element, true)?; + } + + Ok(()) +} + +pub fn write_outputs( + writer: &mut impl Write, + outputs: &BTreeMap<String, Output>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + for (ii, (output_name, output)) in outputs.iter().enumerate() { + if ii > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + + let mut elements: Vec<&str> = vec![output_name, &output.path]; + + let (mode_and_algo, digest) = match &output.ca_hash { + Some(ca_hash) => ( + format!("{}{}", ca_kind_prefix(ca_hash), ca_hash.digest().algo()), + data_encoding::HEXLOWER.encode(ca_hash.digest().digest_as_bytes()), + ), + None => ("".to_string(), "".to_string()), + }; + + elements.push(&mode_and_algo); + elements.push(&digest); + + write_array_elements(writer, &elements)?; + + write_char(writer, PAREN_CLOSE)?; + } + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_input_derivations( + writer: &mut impl Write, + input_derivations: &BTreeMap<String, BTreeSet<String>>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + + for (ii, (input_derivation, output_names)) in input_derivations.iter().enumerate() { + if ii > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + write_field(writer, input_derivation.as_str(), false)?; + write_char(writer, COMMA)?; + + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &output_names + .iter() + .map(String::as_bytes) + .collect::<Vec<_>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + write_char(writer, PAREN_CLOSE)?; + } + + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_input_sources( + writer: &mut impl Write, + input_sources: &BTreeSet<String>, +) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &input_sources.iter().map(String::from).collect::<Vec<_>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_system(writer: &mut impl Write, platform: &str) -> Result<(), Error> { + write_field(writer, platform, true)?; + Ok(()) +} + +pub fn write_builder(writer: &mut impl Write, builder: &str) -> Result<(), Error> { + write_field(writer, builder, true)?; + Ok(()) +} + +pub fn write_arguments(writer: &mut impl Write, arguments: &[String]) -> Result<(), io::Error> { + write_char(writer, BRACKET_OPEN)?; + write_array_elements( + writer, + &arguments + .iter() + .map(|s| s.as_bytes().to_vec().into()) + .collect::<Vec<BString>>(), + )?; + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} + +pub fn write_environment<E, K, V>(writer: &mut impl Write, environment: E) -> Result<(), io::Error> +where + E: IntoIterator<Item = (K, V)>, + K: AsRef<[u8]>, + V: AsRef<[u8]>, +{ + write_char(writer, BRACKET_OPEN)?; + + for (i, (k, v)) in environment.into_iter().enumerate() { + if i > 0 { + write_char(writer, COMMA)?; + } + + write_char(writer, PAREN_OPEN)?; + write_field(writer, k, false)?; + write_char(writer, COMMA)?; + write_field(writer, v, true)?; + write_char(writer, PAREN_CLOSE)?; + } + + write_char(writer, BRACKET_CLOSE)?; + + Ok(()) +} |