From d504b440c2ac420b6618b5caa5b08684575cf2ba Mon Sep 17 00:00:00 2001 From: Ryan Lahfa Date: Sat, 19 Aug 2023 18:36:27 +0200 Subject: feat(tvix/nix-compat): don't swallow hash validation errors Previously, Output deserialization would silence validation errors and provide `None` for `hash_with_mode` as soon as a validation error would happen inside of the `NixHashWithMode` deserialization, e.g. invalid hash length would not provide an validation error but a silent `None` value. This is problematic, we workaround a serde limitation here by writing our own Deserializer. As you can see, we write some boilerplate code unfortunately, as, for example: - `#[serde(fail_if_unparsed_as="Option::is_none")]` is not a thing, otherwise, we could have been able to just bubble up errors in case of "not fully parsed" (and not missing) values. - `From<&serde_json::Value> for serde::de::Unexpected` is not a thing, otherwise, we could just map invalid type errors and reuse the existing types instead of doing extremely bizarre things with `serde::de::Unexpected::Other`, note: this is a not problem for expected, we know what we expect, we don't know what we received in practice. I decided to write a `NixHashWithMode::from_map` which will eat a map deserialized via `serde_json`, so our serde magic is not totally "data model" agnostic. I wanted to go for data model agnosticity and enable maximal performance, e.g. building the structure as the map values are streamed in the Visitor, this is needlessly painful because `Output` and `NixHashWithMode` are in different files and this really makes sense only if we write the full implementation in one file, indeed, otherwise, you end up duplicating the work or having strange coupling. So, for now, we will allocate a full map of the fields inside the `Output`, i.e. if any "unknown field" is in that map, we will deserialize it for no reason. Doing it properly, as I repeat it in the code and to flokli at C3Camp 2023, requires to patch serde upstream IMHO. Change-Id: I46fe6ccb8c390c48d6934fd3e3f02a0dfe59557b Reviewed-on: https://cl.tvl.fyi/c/depot/+/9107 Tested-by: BuildkiteCI Reviewed-by: flokli --- tvix/nix-compat/src/derivation/output.rs | 112 ++++++++++++++++++++++++++++++- 1 file changed, 111 insertions(+), 1 deletion(-) (limited to 'tvix/nix-compat/src/derivation') diff --git a/tvix/nix-compat/src/derivation/output.rs b/tvix/nix-compat/src/derivation/output.rs index 37161cbe98b5..ece414a5091c 100644 --- a/tvix/nix-compat/src/derivation/output.rs +++ b/tvix/nix-compat/src/derivation/output.rs @@ -2,8 +2,9 @@ use crate::derivation::OutputError; use crate::nixhash::{HashAlgo, NixHashWithMode}; use crate::store_path::StorePath; use serde::{Deserialize, Serialize}; +use serde_json::Map; -#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)] pub struct Output { pub path: String, @@ -11,6 +12,29 @@ pub struct Output { pub hash_with_mode: Option, } +impl<'de> Deserialize<'de> for Output { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + let fields = Map::deserialize(deserializer)?; + Ok(Self { + path: fields + .get("path") + .ok_or(serde::de::Error::missing_field( + &"`path` is missing but required for outputs", + ))? + .as_str() + .ok_or(serde::de::Error::invalid_type( + serde::de::Unexpected::Other(&"certainly not a string"), + &"a string", + ))? + .to_owned(), + hash_with_mode: NixHashWithMode::from_map::(&fields)?, + }) + } +} + impl Output { pub fn is_fixed(&self) -> bool { self.hash_with_mode.is_some() @@ -34,3 +58,89 @@ impl Output { Ok(()) } } + +/// This ensures that a potentially valid input addressed +/// output is deserialized as a non-fixed output. +#[test] +fn deserialize_valid_input_addressed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablabla" + }"#; + let output: Output = serde_json::from_str(&json_bytes).expect("must parse"); + + assert!(!output.is_fixed()); +} + +/// This ensures that a potentially valid fixed output +/// output deserializes fine as a fixed output. +#[test] +fn deserialize_valid_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha256" + }"#; + let output: Output = serde_json::from_str(&json_bytes).expect("must parse"); + + assert!(output.is_fixed()); +} + +/// This ensures that parsing an input with the invalid hash encoding +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_encoding_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "IAMNOTVALIDNIXBASE32", + "hashAlgo": "r:sha256" + }"#; + let output: Result = serde_json::from_str(&json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the wrong hash algo +/// will result in a parsing failure. +#[test] +fn deserialize_with_error_invalid_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + "hashAlgo": "r:sha1024" + }"#; + let output: Result = serde_json::from_str(&json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash algo but present hash will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_algo_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba", + }"#; + let output: Result = serde_json::from_str(&json_bytes); + + assert!(output.is_err()); +} + +/// This ensures that parsing an input with the missing hash but present hash algo will result in a +/// parsing failure. +#[test] +fn deserialize_with_error_missing_hash_fixed_output() { + let json_bytes = r#" + { + "path": "/nix/store/blablablabla", + "hashAlgo": "r:sha1024" + }"#; + let output: Result = serde_json::from_str(&json_bytes); + + assert!(output.is_err()); +} -- cgit 1.4.1