From 2410f2292f53a17242ed54b0af2d7b04ec3173f6 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Mon, 31 Jul 2023 15:46:39 +0200 Subject: feat(nix-compat/{aterm,derivation}): init parser This provides a nom-based parser for Nix derivations in ATerm format, which can be reached via `Derivation::from_aterm_bytes`. Some of the lower-level ATerm primitives are moved into a (new) aterm module, and some more higher-level ones that construct derivation- specific types. Also, move the escape_bytes function into there, this is a generic ATerm thing. Change-Id: I2b03b8a1461c7ea2fcb8640c2fc3d1fa3ea719fb Reviewed-on: https://cl.tvl.fyi/c/depot/+/9730 Autosubmit: flokli Reviewed-by: raitobezarius Tested-by: BuildkiteCI --- tvix/nix-compat/Cargo.toml | 2 + tvix/nix-compat/src/aterm/escape.rs | 27 ++ tvix/nix-compat/src/aterm/mod.rs | 7 + tvix/nix-compat/src/aterm/parser.rs | 123 ++++++++ tvix/nix-compat/src/derivation/escape.rs | 27 -- tvix/nix-compat/src/derivation/mod.rs | 9 +- tvix/nix-compat/src/derivation/parse_error.rs | 73 +++++ tvix/nix-compat/src/derivation/parser.rs | 327 +++++++++++++++++++++ .../tests/derivation_tests/duplicate.drv | 1 + tvix/nix-compat/src/derivation/tests/mod.rs | 70 +++++ tvix/nix-compat/src/derivation/write.rs | 2 +- tvix/nix-compat/src/lib.rs | 1 + tvix/nix-compat/src/nixhash/mod.rs | 2 +- 13 files changed, 641 insertions(+), 30 deletions(-) create mode 100644 tvix/nix-compat/src/aterm/escape.rs create mode 100644 tvix/nix-compat/src/aterm/mod.rs create mode 100644 tvix/nix-compat/src/aterm/parser.rs delete mode 100644 tvix/nix-compat/src/derivation/escape.rs create mode 100644 tvix/nix-compat/src/derivation/parse_error.rs create mode 100644 tvix/nix-compat/src/derivation/parser.rs create mode 100644 tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv (limited to 'tvix/nix-compat') diff --git a/tvix/nix-compat/Cargo.toml b/tvix/nix-compat/Cargo.toml index df7cc2d7b56d..0fff23fc4421 100644 --- a/tvix/nix-compat/Cargo.toml +++ b/tvix/nix-compat/Cargo.toml @@ -12,6 +12,8 @@ async = ["futures-util"] bstr = { version = "1.6.0", features = ["alloc", "unicode", "serde"] } data-encoding = "2.3.3" futures-util = { version = "0.3.28", features = ["io"], optional = true } +glob = "0.3.0" +nom = "7.1.3" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" sha2 = "0.10.6" diff --git a/tvix/nix-compat/src/aterm/escape.rs b/tvix/nix-compat/src/aterm/escape.rs new file mode 100644 index 000000000000..06b550bbf02d --- /dev/null +++ b/tvix/nix-compat/src/aterm/escape.rs @@ -0,0 +1,27 @@ +use bstr::ByteSlice; + +/// Escapes a byte sequence. Does not add surrounding quotes. +pub fn escape_bytes>(s: P) -> Vec { + let mut s: Vec = s.as_ref().to_vec(); + + s = s.replace(b"\\", b"\\\\"); + s = s.replace(b"\n", b"\\n"); + s = s.replace(b"\r", b"\\r"); + s = s.replace(b"\t", b"\\t"); + s = s.replace(b"\"", b"\\\""); + + s +} + +#[cfg(test)] +mod tests { + use super::escape_bytes; + use test_case::test_case; + + #[test_case(b"", b""; "empty")] + #[test_case(b"\"", b"\\\""; "doublequote")] + #[test_case(b":", b":"; "colon")] + fn escape(input: &[u8], expected: &[u8]) { + assert_eq!(expected, escape_bytes(input)) + } +} diff --git a/tvix/nix-compat/src/aterm/mod.rs b/tvix/nix-compat/src/aterm/mod.rs new file mode 100644 index 000000000000..8806b6caf2e5 --- /dev/null +++ b/tvix/nix-compat/src/aterm/mod.rs @@ -0,0 +1,7 @@ +mod escape; +mod parser; + +pub(crate) use escape::escape_bytes; +pub(crate) use parser::parse_bstr_field; +pub(crate) use parser::parse_str_list; +pub(crate) use parser::parse_string_field; diff --git a/tvix/nix-compat/src/aterm/parser.rs b/tvix/nix-compat/src/aterm/parser.rs new file mode 100644 index 000000000000..883eeb60b984 --- /dev/null +++ b/tvix/nix-compat/src/aterm/parser.rs @@ -0,0 +1,123 @@ +//! This module implements parsing code for some basic building blocks +//! of the [ATerm][] format, which is used by C++ Nix to serialize Derivations. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html +use bstr::BString; +use nom::branch::alt; +use nom::bytes::complete::{escaped_transform, is_not, tag}; +use nom::character::complete::char as nomchar; +use nom::combinator::{map, value}; +use nom::multi::separated_list0; +use nom::sequence::delimited; +use nom::IResult; + +/// Parse a bstr and undo any escaping. +fn parse_escaped_bstr(i: &[u8]) -> IResult<&[u8], BString> { + escaped_transform( + is_not("\"\\"), + '\\', + alt(( + value("\\".as_bytes(), nomchar('\\')), + value("\n".as_bytes(), nomchar('n')), + value("\t".as_bytes(), nomchar('t')), + value("\r".as_bytes(), nomchar('r')), + value("\"".as_bytes(), nomchar('\"')), + )), + )(i) + .map(|(i, v)| (i, BString::new(v))) +} + +/// Parse a field in double quotes, undo any escaping, and return the unquoted +/// and decoded Vec. +pub(crate) fn parse_bstr_field(i: &[u8]) -> IResult<&[u8], BString> { + // inside double quotes… + delimited( + nomchar('\"'), + // There is + alt(( + // …either is a bstr after unescaping + parse_escaped_bstr, + // …or an empty string. + map(tag(b""), |_| BString::default()), + )), + nomchar('\"'), + )(i) +} + +/// Parse a field in double quotes, undo any escaping, and return the unquoted +/// and decoded string, if it's a valid string. Or fail parsing if the bytes are +/// no valid UTF-8. +pub(crate) fn parse_string_field(i: &[u8]) -> IResult<&[u8], String> { + // inside double quotes… + delimited( + nomchar('\"'), + // There is + alt(( + // either is a String after unescaping + nom::combinator::map_opt(parse_escaped_bstr, |escaped_bstr| { + String::from_utf8(escaped_bstr.into()).ok() + }), + // or an empty string. + map(tag(b""), |_| String::new()), + )), + nomchar('\"'), + )(i) +} + +/// Parse a list of of string fields (enclosed in brackets) +pub(crate) fn parse_str_list(i: &[u8]) -> IResult<&[u8], Vec> { + // inside brackets + delimited( + nomchar('['), + separated_list0(nomchar(','), parse_string_field), + nomchar(']'), + )(i) +} + +#[cfg(test)] +mod tests { + use test_case::test_case; + + #[test_case(br#""""#, b"", b""; "empty")] + #[test_case(br#""Hello World""#, b"Hello World", b""; "hello world")] + #[test_case(br#""\"""#, br#"""#, b""; "doublequote")] + #[test_case(br#"":""#, b":", b""; "colon")] + #[test_case(br#""\""Rest"#, br#"""#, b"Rest"; "doublequote rest")] + fn parse_bstr_field(input: &[u8], expected: &[u8], exp_rest: &[u8]) { + let (rest, parsed) = super::parse_bstr_field(input).expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(expected, parsed); + } + + #[test_case(br#""""#, "", b""; "empty")] + #[test_case(br#""Hello World""#, "Hello World", b""; "hello world")] + #[test_case(br#""\"""#, r#"""#, b""; "doublequote")] + #[test_case(br#"":""#, ":", b""; "colon")] + #[test_case(br#""\""Rest"#, r#"""#, b"Rest"; "doublequote rest")] + fn parse_string_field(input: &[u8], expected: &str, exp_rest: &[u8]) { + let (rest, parsed) = super::parse_string_field(input).expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(expected, &parsed); + } + + #[test] + fn parse_string_field_invalid_encoding_fail() { + let mut input: Vec = Vec::new(); + input.push(b'"'); + input.push(0xc5); + input.push(0xc4); + input.push(0xd6); + input.push(b'"'); + + super::parse_string_field(&input).expect_err("must fail"); + } + + #[test_case(br#"["foo"]"#, vec!["foo".to_string()], b""; "single foo")] + #[test_case(b"[]", vec![], b""; "empty list")] + #[test_case(b"[]blub", vec![], b"blub"; "empty list with rest")] + fn parse_list(input: &[u8], expected: Vec, exp_rest: &[u8]) { + let (rest, parsed) = super::parse_str_list(input).expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(expected, parsed); + } +} diff --git a/tvix/nix-compat/src/derivation/escape.rs b/tvix/nix-compat/src/derivation/escape.rs deleted file mode 100644 index 06b550bbf02d..000000000000 --- a/tvix/nix-compat/src/derivation/escape.rs +++ /dev/null @@ -1,27 +0,0 @@ -use bstr::ByteSlice; - -/// Escapes a byte sequence. Does not add surrounding quotes. -pub fn escape_bytes>(s: P) -> Vec { - let mut s: Vec = s.as_ref().to_vec(); - - s = s.replace(b"\\", b"\\\\"); - s = s.replace(b"\n", b"\\n"); - s = s.replace(b"\r", b"\\r"); - s = s.replace(b"\t", b"\\t"); - s = s.replace(b"\"", b"\\\""); - - s -} - -#[cfg(test)] -mod tests { - use super::escape_bytes; - use test_case::test_case; - - #[test_case(b"", b""; "empty")] - #[test_case(b"\"", b"\\\""; "doublequote")] - #[test_case(b":", b":"; "colon")] - fn escape(input: &[u8], expected: &[u8]) { - assert_eq!(expected, escape_bytes(input)) - } -} diff --git a/tvix/nix-compat/src/derivation/mod.rs b/tvix/nix-compat/src/derivation/mod.rs index a06f8eb160ae..d7f894c89a8f 100644 --- a/tvix/nix-compat/src/derivation/mod.rs +++ b/tvix/nix-compat/src/derivation/mod.rs @@ -8,8 +8,9 @@ use std::collections::{BTreeMap, BTreeSet}; use std::io; mod errors; -mod escape; mod output; +mod parse_error; +mod parser; mod validate; mod write; @@ -89,6 +90,12 @@ impl Derivation { buffer } + /// Parse an Derivation in ATerm serialization, and validate it passes our + /// set of validations. + pub fn from_aterm_bytes(b: &[u8]) -> Result> { + parser::parse(b) + } + /// Returns the drv path of a [Derivation] struct. /// /// The drv path is calculated by invoking [build_text_path], using diff --git a/tvix/nix-compat/src/derivation/parse_error.rs b/tvix/nix-compat/src/derivation/parse_error.rs new file mode 100644 index 000000000000..a064d4faba7b --- /dev/null +++ b/tvix/nix-compat/src/derivation/parse_error.rs @@ -0,0 +1,73 @@ +//! This contains error and result types that can happen while parsing +//! Derivations from ATerm. +use nom::IResult; + +use crate::nixhash; + +pub type NomResult = IResult>; + +#[derive(Debug, PartialEq)] +pub enum ErrorKind { + // duplicate key in map + DuplicateMapKey(String), + + // Digest parsing error + NixHashError(nixhash::Error), + + // error kind wrapped from native nom errors + Nom(nom::error::ErrorKind), +} + +/// Our own error type to pass along parser-related errors. +#[derive(Debug, PartialEq)] +pub struct NomError { + /// position of the error in the input data + pub input: I, + /// error code + pub code: ErrorKind, +} + +impl nom::error::FromExternalError for NomError { + fn from_external_error(input: I, kind: nom::error::ErrorKind, _e: E) -> Self { + Self { + input, + code: ErrorKind::Nom(kind), + } + } +} + +impl nom::error::ParseError for NomError { + fn from_error_kind(input: I, kind: nom::error::ErrorKind) -> Self { + Self { + input, + code: ErrorKind::Nom(kind), + } + } + + // FUTUREWORK: implement, so we have support for backtracking through the + // parse tree? + fn append(_input: I, _kind: nom::error::ErrorKind, other: Self) -> Self { + other + } +} + +/// This wraps a [nom::error::Error] into our error. +impl From> for NomError { + fn from(value: nom::error::Error) -> Self { + Self { + input: value.input, + code: ErrorKind::Nom(value.code), + } + } +} + +/// This essentially implements +/// From>> for nom::Err>, +/// which we can't because nom::Err<_> is a foreign type. +pub(crate) fn into_nomerror(e: nom::Err>) -> nom::Err> { + match e { + nom::Err::Incomplete(n) => nom::Err::Incomplete(n), + nom::Err::Error(e) => nom::Err::Error(e.into()), + nom::Err::Failure(e) => nom::Err::Failure(e.into()), + } +} diff --git a/tvix/nix-compat/src/derivation/parser.rs b/tvix/nix-compat/src/derivation/parser.rs new file mode 100644 index 000000000000..dfcd327e4f0f --- /dev/null +++ b/tvix/nix-compat/src/derivation/parser.rs @@ -0,0 +1,327 @@ +//! This module constructs a [Derivation] by parsing its [ATerm][] +//! serialization. +//! +//! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html + +use bstr::BString; +use nom::bytes::complete::tag; +use nom::character::complete::char as nomchar; +use nom::combinator::{all_consuming, map_res}; +use nom::multi::{separated_list0, separated_list1}; +use nom::sequence::{delimited, preceded, separated_pair, terminated, tuple}; +use std::collections::{BTreeMap, BTreeSet}; +use thiserror; + +use super::parse_error::{into_nomerror, ErrorKind, NomError, NomResult}; +use super::{write, Derivation, NixHashWithMode, Output}; +use crate::{aterm, nixhash}; + +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("parsing error: {0}")] + ParseError(NomError), + #[error("premature EOF")] + Incomplete, + #[error("validation error: {0}")] + ValidationError(super::DerivationError), +} + +pub(crate) fn parse(i: &[u8]) -> Result> { + match all_consuming(parse_derivation)(i) { + Ok((rest, derivation)) => { + // this shouldn't happen, as all_consuming shouldn't return. + debug_assert!(rest.is_empty()); + + // invoke validate + derivation.validate(true).map_err(Error::ValidationError)?; + + Ok(derivation) + } + Err(nom::Err::Incomplete(_)) => Err(Error::Incomplete), + Err(nom::Err::Error(e) | nom::Err::Failure(e)) => Err(Error::ParseError(e)), + } +} + +/// Parse one output in ATerm. This is 4 string fields inside parans: +/// output name, output path, algo (and mode), digest. +/// Returns the output name and [Output] struct. +fn parse_output(i: &[u8]) -> NomResult<&[u8], (String, Output)> { + delimited( + nomchar('('), + map_res( + |i| { + tuple(( + terminated(aterm::parse_string_field, nomchar(',')), + terminated(aterm::parse_string_field, nomchar(',')), + terminated(aterm::parse_string_field, nomchar(',')), + aterm::parse_bstr_field, + ))(i) + .map_err(into_nomerror) + }, + |(output_name, output_path, algo_and_mode, encoded_digest)| { + // convert these 4 fields into an [Output]. + let hash_with_mode_res = { + if algo_and_mode.is_empty() && encoded_digest.is_empty() { + None + } else { + match data_encoding::HEXLOWER.decode(&encoded_digest) { + Ok(digest) => Some(NixHashWithMode::from_algo_mode_hash( + &algo_and_mode, + &digest, + )), + Err(e) => Some(Err(nixhash::Error::InvalidBase64Encoding(e))), + } + } + } + .transpose(); + + match hash_with_mode_res { + Ok(hash_with_mode) => Ok(( + output_name, + Output { + path: output_path, + hash_with_mode, + }, + )), + Err(e) => Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::NixHashError(e), + })), + } + }, + ), + nomchar(')'), + )(i) +} + +/// Parse multiple outputs in ATerm. This is a list of things acccepted by +/// parse_output, and takes care of turning the (String, Output) returned from +/// it to a BTreeMap. +/// We don't use parse_kv here, as it's dealing with 2-tuples, and these are +/// 4-tuples. +fn parse_outputs(i: &[u8]) -> NomResult<&[u8], BTreeMap> { + let res = delimited( + nomchar('['), + separated_list1(tag(","), parse_output), + nomchar(']'), + )(i); + + match res { + Ok((rst, outputs_lst)) => { + let mut outputs: BTreeMap = BTreeMap::default(); + for (output_name, output) in outputs_lst.into_iter() { + if outputs.contains_key(&output_name) { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateMapKey(output_name), + })); + } + outputs.insert(output_name, output); + } + Ok((rst, outputs)) + } + // pass regular parse errors along + Err(e) => Err(e), + } +} + +fn parse_input_derivations(i: &[u8]) -> NomResult<&[u8], BTreeMap>> { + parse_kv::, _>(aterm::parse_str_list)(i) +} + +pub fn parse_derivation(i: &[u8]) -> NomResult<&[u8], Derivation> { + use nom::Parser; + preceded( + tag(write::DERIVATION_PREFIX), + delimited( + // inside parens + nomchar('('), + // tuple requires all errors to be of the same type, so we need to be a + // bit verbose here wrapping generic IResult into [NomATermResult]. + tuple(( + // parse outputs + terminated(parse_outputs, nomchar(',')), + // // parse input derivations + terminated(parse_input_derivations, nomchar(',')), + // // parse input sources + |i| terminated(aterm::parse_str_list, nomchar(','))(i).map_err(into_nomerror), + // // parse system + |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror), + // // parse builder + |i| terminated(aterm::parse_string_field, nomchar(','))(i).map_err(into_nomerror), + // // parse arguments + |i| terminated(aterm::parse_str_list, nomchar(','))(i).map_err(into_nomerror), + // parse environment + parse_kv::(aterm::parse_bstr_field), + )), + nomchar(')'), + ) + .map( + |( + outputs, + input_derivations, + input_sources, + system, + builder, + arguments, + environment, + )| { + // All values in input_derivations need to be converted from + // Vec to BTreeSet + let mut input_derivations_new: BTreeMap<_, BTreeSet<_>> = BTreeMap::new(); + for (k, v) in input_derivations.into_iter() { + let values_new: BTreeSet<_> = BTreeSet::from_iter(v.into_iter()); + input_derivations_new.insert(k, values_new); + // TODO: actually check they're not duplicate in the parser side! + } + + // Input sources need to be converted from Vec<_> to BTreeSet<_> + let input_sources_new: BTreeSet<_> = BTreeSet::from_iter(input_sources); + + Derivation { + arguments, + builder, + environment, + input_derivations: input_derivations_new, + input_sources: input_sources_new, + outputs, + system, + } + }, + ), + )(i) +} + +/// Parse a list of key/value pairs into a BTreeMap. +/// The parser for the values can be passed in. +/// In terms of ATerm, this is just a 2-tuple, +/// but we have the additional restriction that the first element needs to be +/// unique across all tuples. +pub(crate) fn parse_kv<'a, V, VF>( + vf: VF, +) -> impl FnMut(&'a [u8]) -> NomResult<&'a [u8], BTreeMap> + 'static +where + VF: FnMut(&'a [u8]) -> nom::IResult<&'a [u8], V, nom::error::Error<&'a [u8]>> + Clone + 'static, +{ + move |i| + // inside brackets + delimited( + nomchar('['), + |ii| { + let res = separated_list0( + nomchar(','), + // inside parens + delimited( + nomchar('('), + separated_pair( + aterm::parse_string_field, + nomchar(','), + vf.clone(), + ), + nomchar(')'), + ), + )(ii).map_err(into_nomerror); + + match res { + Ok((rest, pairs)) => { + let mut kvs: BTreeMap = BTreeMap::new(); + for (k, v) in pairs.into_iter() { + // collect the 2-tuple to a BTreeMap, + // and fail if the key was already seen before. + if kvs.insert(k.clone(), v).is_some() { + return Err(nom::Err::Failure(NomError { + input: i, + code: ErrorKind::DuplicateMapKey(k), + })); + } + } + Ok((rest, kvs)) + } + Err(e) => Err(e), + } + }, + nomchar(']'), + )(i) +} + +#[cfg(test)] +mod tests { + use std::collections::BTreeMap; + + use crate::derivation::Output; + use bstr::{BString, ByteSlice}; + use lazy_static::lazy_static; + use test_case::test_case; + + lazy_static! { + static ref EXP_MULTI_OUTPUTS: BTreeMap = { + let mut b = BTreeMap::new(); + b.insert( + "lib".to_string(), + Output { + path: "/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib" + .to_string(), + hash_with_mode: None, + }, + ); + b.insert( + "out".to_string(), + Output { + path: "/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out".to_string(), + hash_with_mode: None, + }, + ); + b + }; + static ref EXP_AB_MAP: BTreeMap = { + let mut b = BTreeMap::new(); + b.insert("a".to_string(), b"1".as_bstr().to_owned()); + b.insert("b".to_string(), b"2".as_bstr().to_owned()); + b + }; + } + + #[test_case(b"[(\"a\",\"1\"),(\"b\",\"2\")]", &EXP_AB_MAP, b""; "simple")] + fn parse_kv(input: &'static [u8], expected: &BTreeMap, exp_rest: &[u8]) { + let (rest, parsed) = super::parse_kv::(crate::aterm::parse_bstr_field)(input) + .expect("must parse"); + assert_eq!(exp_rest, rest, "expected remainder"); + assert_eq!(*expected, parsed); + } + + #[test_case( + br#"("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")"#, + ("out".to_string(), Output { + path: "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo".to_string(), + hash_with_mode: None + }); "simple" + )] + #[test_case( + br#"("out","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar","r:sha256","08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba")"#, + ("out".to_string(), Output { + path: "/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar".to_string(), + hash_with_mode: Some(crate::derivation::NixHashWithMode::Recursive( + crate::nixhash::from_algo_and_digest ( + crate::nixhash::HashAlgo::Sha256, + &data_encoding::HEXLOWER.decode(b"08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba").unwrap() + ).unwrap() + )), + }); "fod" + )] + fn parse_output(input: &[u8], expected: (String, Output)) { + let (rest, parsed) = super::parse_output(input).expect("must parse"); + assert!(rest.is_empty()); + assert_eq!(expected, parsed); + } + + #[test_case( + br#"[("lib","/nix/store/2vixb94v0hy2xc6p7mbnxxcyc095yyia-has-multi-out-lib","",""),("out","/nix/store/55lwldka5nyxa08wnvlizyqw02ihy8ic-has-multi-out","","")]"#, + &EXP_MULTI_OUTPUTS; + "multi-out" + )] + fn parse_outputs(input: &[u8], expected: &BTreeMap) { + let (rest, parsed) = super::parse_outputs(input).expect("must parse"); + assert!(rest.is_empty()); + assert_eq!(*expected, parsed); + } +} diff --git a/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv b/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv new file mode 100644 index 000000000000..072561a29e3a --- /dev/null +++ b/tvix/nix-compat/src/derivation/tests/derivation_tests/duplicate.drv @@ -0,0 +1 @@ +Derive([("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo","","")],[("/nix/store/0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv",["out"])],[],":",":",[],[("bar","/nix/store/4q0pg5zpfmznxscq3avycvf9xdvx50n3-bar"),("builder",":"),("name","foo"),("name","bar"),("out","/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"),("system",":")]) \ No newline at end of file diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index 2ae80b620d7a..9f0e548471d4 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -1,4 +1,7 @@ +use super::parse_error::ErrorKind; use crate::derivation::output::Output; +use crate::derivation::parse_error::NomError; +use crate::derivation::parser::Error::ParseError; use crate::derivation::Derivation; use crate::store_path::StorePath; use bstr::{BStr, BString}; @@ -70,6 +73,73 @@ fn check_to_aterm_bytes(path_to_drv_file: &str) { assert_eq!(expected, BStr::new(&derivation.to_aterm_bytes())); } +/// Reads in derivations in ATerm representation, parses with that parser, +/// then compares the structs with the ones obtained by parsing the JSON +/// representations. +#[test_resources("src/derivation/tests/derivation_tests/ok/*.drv")] +fn from_aterm_bytes(path_to_drv_file: &str) { + // Read in ATerm representation. + let aterm_bytes = read_file(path_to_drv_file); + let parsed_drv = Derivation::from_aterm_bytes(&aterm_bytes).expect("must succeed"); + + // For where we're able to load JSON fixtures, parse them and compare the structs. + // For where we're not, compare the bytes manually. + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + assert_eq!( + &[0xc5, 0xc4, 0xd6][..], + parsed_drv.environment.get("chars").unwrap(), + "expected bytes to match", + ); + } else { + let json_bytes = read_file(&format!("{}.json", path_to_drv_file)); + let fixture_derivation: Derivation = + serde_json::from_slice(&json_bytes).expect("JSON was not well-formatted"); + + assert_eq!(fixture_derivation, parsed_drv); + } + + // Finally, write the ATerm serialization to another buffer, ensuring it's + // stable (and we compare all fields we couldn't compare in the non-utf8 + // derivations) + + assert_eq!( + &aterm_bytes, + &parsed_drv.to_aterm_bytes(), + "expected serialized ATerm to match initial input" + ); +} + +#[test] +fn from_aterm_bytes_duplicate_map_key() { + let buf: Vec = read_file(&format!("{}/{}", RESOURCES_PATHS, "duplicate.drv")).into(); + + let err = Derivation::from_aterm_bytes(&buf).expect_err("must fail"); + + match err { + ParseError(NomError { input: _, code }) => { + assert_eq!(code, ErrorKind::DuplicateMapKey("name".to_string())); + } + _ => { + panic!("unexpected error"); + } + } +} + +/// Read in a derivation in ATerm, but add some garbage at the end. +/// Ensure the parser detects and fails in this case. +#[test] +fn from_aterm_bytes_trailer() { + let mut buf: Vec = read_file(&format!( + "{}/ok/{}", + RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" + )) + .into(); + + buf.push(0x00); + + Derivation::from_aterm_bytes(&buf).expect_err("must fail"); +} + #[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] #[test_case("foo", "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv"; "simple-sha256")] #[test_case("bar", "ss2p4wmxijn652haqyd7dckxwl4c7hxx-bar.drv"; "fixed-sha1")] diff --git a/tvix/nix-compat/src/derivation/write.rs b/tvix/nix-compat/src/derivation/write.rs index 0655cb8cded2..7ebbbffa4b55 100644 --- a/tvix/nix-compat/src/derivation/write.rs +++ b/tvix/nix-compat/src/derivation/write.rs @@ -3,7 +3,7 @@ //! //! [ATerm]: http://program-transformation.org/Tools/ATermFormat.html -use crate::derivation::escape::escape_bytes; +use crate::aterm::escape_bytes; use crate::derivation::output::Output; use bstr::BString; use std::{ diff --git a/tvix/nix-compat/src/lib.rs b/tvix/nix-compat/src/lib.rs index 37d79f38fb37..6ec60f3cc8d3 100644 --- a/tvix/nix-compat/src/lib.rs +++ b/tvix/nix-compat/src/lib.rs @@ -1,3 +1,4 @@ +pub(crate) mod aterm; pub mod derivation; pub mod nar; pub mod nixbase32; diff --git a/tvix/nix-compat/src/nixhash/mod.rs b/tvix/nix-compat/src/nixhash/mod.rs index 9586473d1b9a..5b12f466e3fb 100644 --- a/tvix/nix-compat/src/nixhash/mod.rs +++ b/tvix/nix-compat/src/nixhash/mod.rs @@ -81,7 +81,7 @@ pub fn from_algo_and_digest(algo: HashAlgo, digest: &[u8]) -> Result { } /// Errors related to NixHash construction. -#[derive(Debug, thiserror::Error)] +#[derive(Debug, Eq, PartialEq, thiserror::Error)] pub enum Error { #[error("invalid hash algo: {0}")] InvalidAlgo(String), -- cgit 1.4.1