From 79531c3dab1c24ff3171c0aa067004c8e6c92e3f Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 29 Jul 2023 21:14:44 +0200 Subject: refactor(tvix/nix-compat): support non-unicode Derivations Derivations can have non-unicode strings in their env values, so the ATerm representations are not necessarily String anymore, but Vec. Change-Id: Ic23839471eb7f68d9c3c30667c878830946b6607 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8990 Tested-by: BuildkiteCI Reviewed-by: raitobezarius Autosubmit: flokli --- tvix/nix-compat/src/derivation/tests/mod.rs | 159 ++++++++++++++++++++++------ 1 file changed, 128 insertions(+), 31 deletions(-) (limited to 'tvix/nix-compat/src/derivation/tests/mod.rs') diff --git a/tvix/nix-compat/src/derivation/tests/mod.rs b/tvix/nix-compat/src/derivation/tests/mod.rs index f4070b2496..8fbaa52e23 100644 --- a/tvix/nix-compat/src/derivation/tests/mod.rs +++ b/tvix/nix-compat/src/derivation/tests/mod.rs @@ -2,7 +2,8 @@ use crate::derivation::output::Output; use crate::derivation::Derivation; use crate::nixhash::NixHash; use crate::store_path::StorePath; -use std::collections::BTreeSet; +use bstr::{BStr, BString}; +use std::collections::{BTreeMap, BTreeSet}; use std::fs::File; use std::io::Read; use std::path::Path; @@ -12,33 +13,43 @@ use test_generator::test_resources; const RESOURCES_PATHS: &str = "src/derivation/tests/derivation_tests"; -fn read_file(path: &str) -> String { +fn read_file(path: &str) -> BString { let path = Path::new(path); let mut file = File::open(path).unwrap(); - let mut data = String::new(); + let mut data = Vec::new(); - file.read_to_string(&mut data).unwrap(); + file.read_to_end(&mut data).unwrap(); - data + data.into() } #[test_resources("src/derivation/tests/derivation_tests/*.drv")] fn check_serizaliation(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } let data = read_file(&format!("{}.json", path_to_drv_file)); - let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted"); + let derivation: Derivation = + serde_json::from_slice(&data).expect("JSON was not well-formatted"); - let mut serialized_derivation = String::new(); + let mut serialized_derivation = Vec::new(); derivation.serialize(&mut serialized_derivation).unwrap(); let expected = read_file(path_to_drv_file); - assert_eq!(expected, serialized_derivation); + assert_eq!(expected, BStr::new(&serialized_derivation)); } #[test_resources("src/derivation/tests/derivation_tests/*.drv")] fn validate(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } let data = read_file(&format!("{}.json", path_to_drv_file)); - let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted"); + let derivation: Derivation = + serde_json::from_slice(&data).expect("JSON was not well-formatted"); derivation .validate(true) @@ -46,13 +57,18 @@ fn validate(path_to_drv_file: &str) { } #[test_resources("src/derivation/tests/derivation_tests/*.drv")] -fn check_to_aterm_string(path_to_drv_file: &str) { +fn check_to_aterm_bytes(path_to_drv_file: &str) { + // skip JSON files known to fail parsing + if path_to_drv_file.ends_with("cp1252.drv") || path_to_drv_file.ends_with("latin1.drv") { + return; + } let data = read_file(&format!("{}.json", path_to_drv_file)); - let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted"); + let derivation: Derivation = + serde_json::from_slice(&data).expect("JSON was not well-formatted"); let expected = read_file(path_to_drv_file); - assert_eq!(expected, derivation.to_aterm_string()); + assert_eq!(expected, BStr::new(&derivation.to_aterm_bytes())); } #[test_case("bar","0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv"; "fixed_sha256")] @@ -64,7 +80,8 @@ fn check_to_aterm_string(path_to_drv_file: &str) { #[test_case("unicode", "52a9id8hx688hvlnz4d1n25ml1jdykz0-unicode.drv"; "unicode")] fn derivation_path(name: &str, expected_path: &str) { let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, expected_path)); - let derivation: Derivation = serde_json::from_str(&data).expect("JSON was not well-formatted"); + let derivation: Derivation = + serde_json::from_slice(&data).expect("JSON was not well-formatted"); assert_eq!( derivation.calculate_derivation_path(name).unwrap(), @@ -79,7 +96,7 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { let mut trimmed_outputs = derivation.outputs.clone(); for (output_name, output) in &derivation.outputs { - trimmed_env.insert(output_name.clone(), "".to_string()); + trimmed_env.insert(output_name.clone(), "".into()); assert!(trimmed_outputs.contains_key(output_name)); trimmed_outputs.insert( output_name.to_string(), @@ -103,7 +120,7 @@ fn derivation_with_trimmed_output_paths(derivation: &Derivation) -> Derivation { fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) { // read in the fixture let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); - let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); + let drv: Derivation = serde_json::from_slice(&data).expect("must deserialize"); let actual = drv.derivation_or_fod_hash(|_| panic!("must not be called")); @@ -120,7 +137,7 @@ fn derivation_or_fod_hash(drv_path: &str, expected_nix_hash_string: &str) { fn output_paths(name: &str, drv_path: &str) { // read in the fixture let data = read_file(&format!("{}/{}.json", RESOURCES_PATHS, drv_path)); - let expected_derivation: Derivation = serde_json::from_str(&data).expect("must deserialize"); + let expected_derivation: Derivation = serde_json::from_slice(&data).expect("must deserialize"); let mut derivation = derivation_with_trimmed_output_paths(&expected_derivation); @@ -148,7 +165,7 @@ fn output_paths(name: &str, drv_path: &str) { .to_string_lossy() )); - let drv: Derivation = serde_json::from_str(&data).expect("must deserialize"); + let drv: Derivation = serde_json::from_slice(&data).expect("must deserialize"); // calculate derivation_or_fod_hash for each parent. // This may not trigger subsequent requests, as both parents are FOD. @@ -204,16 +221,16 @@ fn output_path_construction() { // assemble bar env let bar_env = &mut bar_drv.environment; - bar_env.insert("builder".to_string(), ":".to_string()); - bar_env.insert("name".to_string(), "bar".to_string()); - bar_env.insert("out".to_string(), "".to_string()); // will be calculated + bar_env.insert("builder".to_string(), ":".into()); + bar_env.insert("name".to_string(), "bar".into()); + bar_env.insert("out".to_string(), "".into()); // will be calculated bar_env.insert( "outputHash".to_string(), - "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba".to_string(), + "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba".into(), ); - bar_env.insert("outputHashAlgo".to_string(), "sha256".to_string()); - bar_env.insert("outputHashMode".to_string(), "recursive".to_string()); - bar_env.insert("system".to_string(), ":".to_string()); + bar_env.insert("outputHashAlgo".to_string(), "sha256".into()); + bar_env.insert("outputHashMode".to_string(), "recursive".into()); + bar_env.insert("system".to_string(), ":".into()); // assemble bar outputs bar_drv.outputs.insert( @@ -244,7 +261,7 @@ fn output_path_construction() { "{}/{}.json", RESOURCES_PATHS, "0hm2f1psjpcwg8fijsmr4wwxrx59s092-bar.drv" )); - let bar_drv_expected: Derivation = serde_json::from_str(&bar_data).expect("must deserialize"); + let bar_drv_expected: Derivation = serde_json::from_slice(&bar_data).expect("must deserialize"); assert_eq!(bar_drv_expected, bar_drv); // now construct foo, which requires bar_drv @@ -266,11 +283,11 @@ fn output_path_construction() { // assemble foo env let foo_env = &mut foo_drv.environment; - foo_env.insert("bar".to_string(), bar_output_path.to_string()); - foo_env.insert("builder".to_string(), ":".to_string()); - foo_env.insert("name".to_string(), "foo".to_string()); - foo_env.insert("out".to_string(), "".to_string()); // will be calculated - foo_env.insert("system".to_string(), ":".to_string()); + foo_env.insert("bar".to_string(), bar_output_path.to_owned().into()); + foo_env.insert("builder".to_string(), ":".into()); + foo_env.insert("name".to_string(), "foo".into()); + foo_env.insert("out".to_string(), "".into()); // will be calculated + foo_env.insert("system".to_string(), ":".into()); // asssemble foo outputs foo_drv.outputs.insert( @@ -304,7 +321,7 @@ fn output_path_construction() { "{}/{}.json", RESOURCES_PATHS, "4wvvbi4jwn0prsdxb7vs673qa5h9gr7x-foo.drv", )); - let foo_drv_expected: Derivation = serde_json::from_str(&foo_data).expect("must deserialize"); + let foo_drv_expected: Derivation = serde_json::from_slice(&foo_data).expect("must deserialize"); assert_eq!(foo_drv_expected, foo_drv); assert_eq!( @@ -314,3 +331,83 @@ fn output_path_construction() { .expect("must succeed") ); } + +/// This constructs a Derivation using cp1252 encoding and ensures the +/// calculated derivation path matches the one Nix does calculate, as +/// well as the ATerm serialization. +/// We can't add this as a test_case to `output_paths`, as the JSON parser +/// refuses to parse our JSONs. +/// It looks like more recent versions of Nix also seem to not produce these +/// JSON files anymore, however, it still happily produces the .drv files in +/// the store. +#[test_case( + "cp1252", + vec![0xc5, 0xc4, 0xd6], + "/nix/store/drr2mjp9fp9vvzsf5f9p0a80j33dxy7m-cp1252", + "m1vfixn8iprlf0v9abmlrz7mjw1xj8kp-cp1252.drv"; + "cp1252" +)] +#[test_case( + "latin1", + vec![0xc5, 0xc4, 0xd6], + "/nix/store/x1f6jfq9qgb6i8jrmpifkn9c64fg4hcm-latin1", + "x6p0hg79i3wg0kkv7699935f7rrj9jf3-latin1.drv"; + "latin1" +)] +fn non_unicode(name: &str, chars: Vec, exp_output_path: &str, exp_derivation_path: &str) { + // construct the Derivation + let mut outputs: BTreeMap = BTreeMap::new(); + outputs.insert( + "out".to_string(), + Output { + path: exp_output_path.to_string(), + ..Default::default() + }, + ); + + let mut environment: BTreeMap = BTreeMap::new(); + environment.insert("builder".to_string(), ":".into()); + environment.insert("chars".to_string(), chars.into()); + environment.insert("name".to_string(), name.into()); + environment.insert("out".to_string(), exp_output_path.into()); + environment.insert("system".to_string(), ":".into()); + let derivation: Derivation = Derivation { + builder: ":".to_string(), + environment, + outputs, + system: ":".to_string(), + ..Default::default() + }; + + // check the derivation_path matches what Nix calculated. + let actual_drv_path = derivation.calculate_derivation_path(name).unwrap(); + assert_eq!(exp_derivation_path.to_string(), actual_drv_path.to_string()); + + // Now wipe the output path info, and ensure we calculate the same output + // path. + { + let mut derivation = derivation_with_trimmed_output_paths(&derivation); + let calculated_derivation_or_fod_hash = derivation.derivation_or_fod_hash(|_| { + panic!("No parents expected"); + }); + derivation + .calculate_output_paths(name, &calculated_derivation_or_fod_hash) + .unwrap(); + + assert_eq!( + exp_output_path.to_string(), + derivation.outputs.get("out").unwrap().path, + "expected calculated output path to match" + ); + } + + // Construct the ATerm representation and compare with our fixture. + { + let data = read_file(&format!("{}/{}", RESOURCES_PATHS, exp_derivation_path)); + assert_eq!( + data, + BStr::new(&derivation.to_aterm_bytes()), + "expected ATerm serialization to match", + ); + } +} -- cgit 1.4.1