about summary refs log tree commit diff
diff options
context:
space:
mode:
authorRyan Lahfa <tvl@lahfa.xyz>2023-08-19T16·36+0200
committerraitobezarius <tvl@lahfa.xyz>2023-08-20T19·34+0000
commitd504b440c2ac420b6618b5caa5b08684575cf2ba (patch)
treed40014f01b2010f26312610dfbaba6cbe5e4fdb2
parentbb48d8c61bed61e3c9318e214c838df74b60ee8e (diff)
feat(tvix/nix-compat): don't swallow hash validation errors r/6494
Previously, Output deserialization would silence validation errors and
provide `None` for `hash_with_mode` as soon as a validation error would
happen inside of the `NixHashWithMode` deserialization, e.g. invalid
hash length would not provide an validation error but a silent `None`
value.

This is problematic, we workaround a serde limitation here by writing
our own Deserializer.

As you can see, we write some boilerplate code unfortunately, as, for
example:

- `#[serde(fail_if_unparsed_as="Option::is_none")]` is not a thing,
  otherwise, we could have been able to just bubble up errors in case of
  "not fully parsed" (and not missing) values.

- `From<&serde_json::Value> for serde::de::Unexpected` is not a thing,
  otherwise, we could just map invalid type errors and reuse the
  existing types instead of doing extremely bizarre things with
  `serde::de::Unexpected::Other`, note: this is a not problem for
  expected, we know what we expect, we don't know what we received in
  practice.

I decided to write a `NixHashWithMode::from_map` which will eat a map
deserialized via `serde_json`, so our serde magic is not totally "data
model" agnostic.

I wanted to go for data model agnosticity and enable maximal
performance, e.g. building the structure as the map values are streamed
in the Visitor, this is needlessly painful because `Output` and
`NixHashWithMode` are in different files and this really makes sense
only if we write the full implementation in one file, indeed, otherwise,
you end up duplicating the work or having strange coupling.

So, for now, we will allocate a full map of the fields inside the
`Output`, i.e. if any "unknown field" is in that map, we will
deserialize it for no reason.

Doing it properly, as I repeat it in the code and to flokli at C3Camp
2023, requires to patch serde upstream IMHO.

Change-Id: I46fe6ccb8c390c48d6934fd3e3f02a0dfe59557b
Reviewed-on: https://cl.tvl.fyi/c/depot/+/9107
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
-rw-r--r--tvix/nix-compat/src/derivation/output.rs112
-rw-r--r--tvix/nix-compat/src/nixhash/algos.rs5
-rw-r--r--tvix/nix-compat/src/nixhash/with_mode.rs149
3 files changed, 208 insertions, 58 deletions
diff --git a/tvix/nix-compat/src/derivation/output.rs b/tvix/nix-compat/src/derivation/output.rs
index 37161cbe98b5..ece414a5091c 100644
--- a/tvix/nix-compat/src/derivation/output.rs
+++ b/tvix/nix-compat/src/derivation/output.rs
@@ -2,8 +2,9 @@ use crate::derivation::OutputError;
 use crate::nixhash::{HashAlgo, NixHashWithMode};
 use crate::store_path::StorePath;
 use serde::{Deserialize, Serialize};
+use serde_json::Map;
 
-#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize)]
+#[derive(Clone, Debug, Default, Eq, PartialEq, Serialize)]
 pub struct Output {
     pub path: String,
 
@@ -11,6 +12,29 @@ pub struct Output {
     pub hash_with_mode: Option<NixHashWithMode>,
 }
 
+impl<'de> Deserialize<'de> for Output {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        let fields = Map::deserialize(deserializer)?;
+        Ok(Self {
+            path: fields
+                .get("path")
+                .ok_or(serde::de::Error::missing_field(
+                    &"`path` is missing but required for outputs",
+                ))?
+                .as_str()
+                .ok_or(serde::de::Error::invalid_type(
+                    serde::de::Unexpected::Other(&"certainly not a string"),
+                    &"a string",
+                ))?
+                .to_owned(),
+            hash_with_mode: NixHashWithMode::from_map::<D>(&fields)?,
+        })
+    }
+}
+
 impl Output {
     pub fn is_fixed(&self) -> bool {
         self.hash_with_mode.is_some()
@@ -34,3 +58,89 @@ impl Output {
         Ok(())
     }
 }
+
+/// This ensures that a potentially valid input addressed
+/// output is deserialized as a non-fixed output.
+#[test]
+fn deserialize_valid_input_addressed_output() {
+    let json_bytes = r#"
+    {
+      "path": "/nix/store/blablabla"
+    }"#;
+    let output: Output = serde_json::from_str(&json_bytes).expect("must parse");
+
+    assert!(!output.is_fixed());
+}
+
+/// This ensures that a potentially valid fixed output
+/// output deserializes fine as a fixed output.
+#[test]
+fn deserialize_valid_fixed_output() {
+    let json_bytes = r#"
+    {
+        "path": "/nix/store/blablablabla",
+        "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba",
+        "hashAlgo": "r:sha256"
+    }"#;
+    let output: Output = serde_json::from_str(&json_bytes).expect("must parse");
+
+    assert!(output.is_fixed());
+}
+
+/// This ensures that parsing an input with the invalid hash encoding
+/// will result in a parsing failure.
+#[test]
+fn deserialize_with_error_invalid_hash_encoding_fixed_output() {
+    let json_bytes = r#"
+    {
+        "path": "/nix/store/blablablabla",
+        "hash": "IAMNOTVALIDNIXBASE32",
+        "hashAlgo": "r:sha256"
+    }"#;
+    let output: Result<Output, _> = serde_json::from_str(&json_bytes);
+
+    assert!(output.is_err());
+}
+
+/// This ensures that parsing an input with the wrong hash algo
+/// will result in a parsing failure.
+#[test]
+fn deserialize_with_error_invalid_hash_algo_fixed_output() {
+    let json_bytes = r#"
+    {
+        "path": "/nix/store/blablablabla",
+        "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba",
+        "hashAlgo": "r:sha1024"
+    }"#;
+    let output: Result<Output, _> = serde_json::from_str(&json_bytes);
+
+    assert!(output.is_err());
+}
+
+/// This ensures that parsing an input with the missing hash algo but present hash will result in a
+/// parsing failure.
+#[test]
+fn deserialize_with_error_missing_hash_algo_fixed_output() {
+    let json_bytes = r#"
+    {
+        "path": "/nix/store/blablablabla",
+        "hash": "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba",
+    }"#;
+    let output: Result<Output, _> = serde_json::from_str(&json_bytes);
+
+    assert!(output.is_err());
+}
+
+/// This ensures that parsing an input with the missing hash but present hash algo will result in a
+/// parsing failure.
+#[test]
+fn deserialize_with_error_missing_hash_fixed_output() {
+    let json_bytes = r#"
+    {
+        "path": "/nix/store/blablablabla",
+        "hashAlgo": "r:sha1024"
+    }"#;
+    let output: Result<Output, _> = serde_json::from_str(&json_bytes);
+
+    assert!(output.is_err());
+}
diff --git a/tvix/nix-compat/src/nixhash/algos.rs b/tvix/nix-compat/src/nixhash/algos.rs
index d6b0bf47bdb7..45cc242dc330 100644
--- a/tvix/nix-compat/src/nixhash/algos.rs
+++ b/tvix/nix-compat/src/nixhash/algos.rs
@@ -24,6 +24,11 @@ impl Display for HashAlgo {
     }
 }
 
+/// TODO(Raito): this could be automated via macros, I suppose.
+/// But this may be more expensive than just doing it by hand
+/// and ensuring that is kept in sync.
+pub const SUPPORTED_ALGOS: [&str; 4] = ["md5", "sha1", "sha256", "sha512"];
+
 impl TryFrom<&str> for HashAlgo {
     type Error = Error;
 
diff --git a/tvix/nix-compat/src/nixhash/with_mode.rs b/tvix/nix-compat/src/nixhash/with_mode.rs
index 1908f27b4759..344322046614 100644
--- a/tvix/nix-compat/src/nixhash/with_mode.rs
+++ b/tvix/nix-compat/src/nixhash/with_mode.rs
@@ -1,7 +1,11 @@
 use crate::nixbase32;
 use crate::nixhash::{HashAlgo, NixHash};
+use serde::de::Unexpected;
 use serde::ser::SerializeMap;
 use serde::{Deserialize, Deserializer, Serialize, Serializer};
+use serde_json::{Map, Value};
+
+use super::algos::SUPPORTED_ALGOS;
 
 pub enum NixHashMode {
     Flat,
@@ -45,6 +49,90 @@ impl NixHashWithMode {
     pub fn to_nix_hash_string(&self) -> String {
         String::from(self.mode().prefix()) + &self.digest().to_nix_hash_string()
     }
+
+    /// This takes a serde_json::Map and turns it into this structure. This is necessary to do such
+    /// shenigans because we have external consumers, like the Derivation parser, who would like to
+    /// know whether we have a invalid or a missing NixHashWithMode structure in another structure,
+    /// e.g. Output.
+    /// This means we have this combinatorial situation:
+    /// - no hash, no hashAlgo: no NixHashWithMode so we return Ok(None).
+    /// - present hash, missing hashAlgo: invalid, we will return missing_field
+    /// - missing hash, present hashAlgo: same
+    /// - present hash, present hashAlgo: either we return ourselves or a type/value validation
+    /// error.
+    /// This function is for internal consumption regarding those needs until we have a better
+    /// solution. Now this is said, let's explain how this works.
+    ///
+    /// We want to map the serde data model into a NixHashWithMode.
+    ///
+    /// The serde data model has a `hash` field (containing a digest in nixbase32),
+    /// and a `hashAlgo` field, containing the stringified hash algo.
+    /// In case the hash is recursive, hashAlgo also has a `r:` prefix.
+    ///
+    /// This is to match how `nix show-derivation` command shows them in JSON
+    /// representation.
+    pub(crate) fn from_map<'de, D>(map: &Map<String, Value>) -> Result<Option<Self>, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        // If we don't have hash neither hashAlgo, let's just return None.
+        if !map.contains_key("hash") && !map.contains_key("hashAlgo") {
+            return Ok(None);
+        }
+
+        let digest: Vec<u8> = {
+            if let Some(v) = map.get("hash") {
+                if let Some(s) = v.as_str() {
+                    data_encoding::HEXLOWER
+                        .decode(s.as_bytes())
+                        .map_err(|e| serde::de::Error::custom(e.to_string()))?
+                } else {
+                    return Err(serde::de::Error::invalid_type(
+                        Unexpected::Other(&v.to_string()),
+                        &"a string",
+                    ));
+                }
+            } else {
+                return Err(serde::de::Error::missing_field(
+                    "couldn't extract `hash` key but `hashAlgo` key present",
+                ));
+            }
+        };
+
+        if let Some(v) = map.get("hashAlgo") {
+            if let Some(s) = v.as_str() {
+                match s.strip_prefix("r:") {
+                    Some(rest) => Ok(Some(Self::Recursive(NixHash::new(
+                        HashAlgo::try_from(rest).map_err(|e| {
+                            serde::de::Error::invalid_value(
+                                Unexpected::Other(&e.to_string()),
+                                &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(),
+                            )
+                        })?,
+                        digest,
+                    )))),
+                    None => Ok(Some(Self::Flat(NixHash::new(
+                        HashAlgo::try_from(s).map_err(|e| {
+                            serde::de::Error::invalid_value(
+                                Unexpected::Other(&e.to_string()),
+                                &format!("one of {}", SUPPORTED_ALGOS.join(",")).as_str(),
+                            )
+                        })?,
+                        digest,
+                    )))),
+                }
+            } else {
+                Err(serde::de::Error::invalid_type(
+                    Unexpected::Other(&v.to_string()),
+                    &"a string",
+                ))
+            }
+        } else {
+            Err(serde::de::Error::missing_field(
+                "couldn't extract `hashAlgo` key, but `hash` key present",
+            ))
+        }
+    }
 }
 
 impl Serialize for NixHashWithMode {
@@ -69,68 +157,15 @@ impl Serialize for NixHashWithMode {
 }
 
 impl<'de> Deserialize<'de> for NixHashWithMode {
-    /// map the serde data model into a NixHashWithMode.
-    ///
-    /// The serde data model has a `hash` field (containing a digest in nixbase32),
-    /// and a `hashAlgo` field, containing the stringified hash algo.
-    /// In case the hash is recursive, hashAlgo also has a `r:` prefix.
-    ///
-    /// This is to match how `nix show-derivation` command shows them in JSON
-    /// representation.
     fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
     where
         D: Deserializer<'de>,
     {
-        // TODO: don't use serde_json here?
-        // TODO: serde seems to simply set `hash_with_mode` to None if hash
-        // and hashAlgo fail, but that should be a proper deserialization error
-        // that should be propagated to the user!
+        let value = Self::from_map::<D>(&Map::deserialize(deserializer)?)?;
 
-        let json = serde_json::Value::deserialize(deserializer)?;
-        match json.as_object() {
-            None => Err(serde::de::Error::custom("couldn't parse as map"))?,
-            Some(map) => {
-                let digest: Vec<u8> = {
-                    if let Some(v) = map.get("hash") {
-                        if let Some(s) = v.as_str() {
-                            data_encoding::HEXLOWER
-                                .decode(s.as_bytes())
-                                .map_err(|e| serde::de::Error::custom(e.to_string()))?
-                        } else {
-                            return Err(serde::de::Error::custom(
-                                "couldn't parse 'hash' as string",
-                            ));
-                        }
-                    } else {
-                        return Err(serde::de::Error::custom("couldn't extract 'hash' key"));
-                    }
-                };
-
-                if let Some(v) = map.get("hashAlgo") {
-                    if let Some(s) = v.as_str() {
-                        match s.strip_prefix("r:") {
-                            Some(rest) => Ok(NixHashWithMode::Recursive(NixHash::new(
-                                HashAlgo::try_from(rest).map_err(|e| {
-                                    serde::de::Error::custom(format!("unable to parse algo: {}", e))
-                                })?,
-                                digest,
-                            ))),
-                            None => Ok(NixHashWithMode::Flat(NixHash::new(
-                                HashAlgo::try_from(s).map_err(|e| {
-                                    serde::de::Error::custom(format!("unable to parse algo: {}", e))
-                                })?,
-                                digest,
-                            ))),
-                        }
-                    } else {
-                        Err(serde::de::Error::custom(
-                            "couldn't parse 'hashAlgo' as string",
-                        ))
-                    }
-                } else {
-                    Err(serde::de::Error::custom("couldn't extract 'hashAlgo' key"))
-                }
-            }
+        match value {
+            None => Err(serde::de::Error::custom("couldn't parse as map")),
+            Some(v) => Ok(v),
         }
     }
 }