about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2022-12-27T17·10+0100
committerflokli <flokli@flokli.de>2022-12-28T10·58+0000
commit7163d3ad37105a97fdc7afdf4ad0da7579494fab (patch)
treee3ff096674fed257b4680be9300f4d2318aaed94
parent0c7e545fd061e9c2429eda8a89ba9b9d1a1c5212 (diff)
feat(tvix/store): implement Directory::validate() r/5520
Change-Id: I4c6ae79d705b8e19a3e2ed54812366e88935d7a6
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7650
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
-rw-r--r--tvix/Cargo.lock14
-rw-r--r--tvix/Cargo.nix20
-rw-r--r--tvix/store/Cargo.toml2
-rw-r--r--tvix/store/src/proto.rs328
4 files changed, 351 insertions, 13 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index 7ca7b330cb..04af420aea 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -34,9 +34,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
 
 [[package]]
 name = "anyhow"
-version = "1.0.66"
+version = "1.0.68"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6"
+checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61"
 
 [[package]]
 name = "arrayref"
@@ -1775,18 +1775,18 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
 
 [[package]]
 name = "thiserror"
-version = "1.0.37"
+version = "1.0.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e"
+checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0"
 dependencies = [
  "thiserror-impl",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "1.0.37"
+version = "1.0.38"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb"
+checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f"
 dependencies = [
  "proc-macro2 1.0.47",
  "quote 1.0.21",
@@ -2066,10 +2066,12 @@ version = "0.0.0"
 name = "tvix-store"
 version = "0.1.0"
 dependencies = [
+ "anyhow",
  "blake3",
  "lazy_static",
  "prost",
  "prost-build",
+ "thiserror",
  "tonic",
  "tonic-build",
 ]
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index 8076dd2c4a..8e55db79c4 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -200,9 +200,9 @@ rec {
       };
       "anyhow" = rec {
         crateName = "anyhow";
-        version = "1.0.66";
+        version = "1.0.68";
         edition = "2018";
-        sha256 = "1xj3ahmwjlbiqsajhkaa0q6hqwb4l3l5rkfxa7jk1498r3fn2qi1";
+        sha256 = "0qdb5x89jpngjrl40fpp8047xlydm38n8bz8iaml3lcds64zkcic";
         authors = [
           "David Tolnay <dtolnay@gmail.com>"
         ];
@@ -5028,9 +5028,9 @@ rec {
       };
       "thiserror" = rec {
         crateName = "thiserror";
-        version = "1.0.37";
+        version = "1.0.38";
         edition = "2018";
-        sha256 = "0gky83x4i87gd87w3fknnp920wvk9yycp7dgkf5h3jg364vb7phh";
+        sha256 = "1l7yh18iqcr2jnl6qjx3ywvhny98cvda3biwc334ap3xm65d373a";
         authors = [
           "David Tolnay <dtolnay@gmail.com>"
         ];
@@ -5044,9 +5044,9 @@ rec {
       };
       "thiserror-impl" = rec {
         crateName = "thiserror-impl";
-        version = "1.0.37";
+        version = "1.0.38";
         edition = "2018";
-        sha256 = "1fydmpksd14x1mkc24zas01qjssz8q43sbn2ywl6n527dda1fbcq";
+        sha256 = "0vzkcjqkzzgrwwby92xvnbp11a8d70b1gkybm0zx1r458spjgcqz";
         procMacro = true;
         authors = [
           "David Tolnay <dtolnay@gmail.com>"
@@ -6163,6 +6163,10 @@ rec {
           else ./store;
         dependencies = [
           {
+            name = "anyhow";
+            packageId = "anyhow";
+          }
+          {
             name = "blake3";
             packageId = "blake3";
             features = [ "rayon" "std" ];
@@ -6176,6 +6180,10 @@ rec {
             packageId = "prost";
           }
           {
+            name = "thiserror";
+            packageId = "thiserror";
+          }
+          {
             name = "tonic";
             packageId = "tonic";
           }
diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml
index 66eac52063..cf57723d21 100644
--- a/tvix/store/Cargo.toml
+++ b/tvix/store/Cargo.toml
@@ -4,9 +4,11 @@ version = "0.1.0"
 edition = "2021"
 
 [dependencies]
+anyhow = "1.0.68"
 blake3 = { version = "1.3.1", features = ["rayon", "std"] }
 lazy_static = "1.4.0"
 prost = "0.11.2"
+thiserror = "1.0.38"
 tonic = "0.8.2"
 
 [build-dependencies]
diff --git a/tvix/store/src/proto.rs b/tvix/store/src/proto.rs
index ccc3d5a3bf..33b6bc44b3 100644
--- a/tvix/store/src/proto.rs
+++ b/tvix/store/src/proto.rs
@@ -1,7 +1,82 @@
+use anyhow::Result;
+use std::collections::HashSet;
+use thiserror::Error;
+
 use prost::Message;
 
 tonic::include_proto!("tvix.store.v1");
 
+/// Errors that can occur during the validation of Directory messages.
+#[derive(Debug, Error, PartialEq)]
+pub enum ValidateDirectoryError {
+    /// Elements are not in sorted order
+    #[error("{0} is not sorted")]
+    WrongSorting(String),
+    /// Multiple elements with the same name encountered
+    #[error("{0} is a duplicate name")]
+    DuplicateName(String),
+    /// Invalid name encountered
+    #[error("Invalid name in {0}")]
+    InvalidName(String),
+    /// Invalid digest length encountered
+    #[error("Ivalid Digest length: {0}")]
+    InvalidDigestLen(usize),
+}
+
+/// Checks a name for validity.
+/// We disallow slashes, null bytes, '.', '..' and the empty string.
+/// Depending on the context, a [DirectoryNode], [FileNode] or [SymlinkNode]
+/// message with an empty string as name is allowed, but they don't occur
+/// inside a Directory message.
+fn validate_node_name(name: &str) -> Result<(), ValidateDirectoryError> {
+    if name == "" || name == ".." || name == "." || name.contains("\x00") || name.contains("/") {
+        return Err(ValidateDirectoryError::InvalidName(
+            name.to_string().clone(),
+        ));
+    }
+    Ok(())
+}
+
+/// Checks a digest for validity.
+/// Digests are 32 bytes long, as we store blake3 digests.
+fn validate_digest(digest: &Vec<u8>) -> Result<(), ValidateDirectoryError> {
+    if digest.len() != 32 {
+        return Err(ValidateDirectoryError::InvalidDigestLen(digest.len()));
+    }
+    Ok(())
+}
+
+/// Accepts a name, and a mutable reference to the previous name.
+/// If the passed name is larger than the previous one, the reference is updated.
+/// If it's not, an error is returned.
+fn update_if_lt_prev<'set, 'n>(
+    prev_name: &'set mut &'n str,
+    name: &'n str,
+) -> Result<(), ValidateDirectoryError> {
+    if *name < **prev_name {
+        return Err(ValidateDirectoryError::WrongSorting(
+            name.to_string().clone(),
+        ));
+    }
+    *prev_name = name;
+    Ok(())
+}
+
+/// Inserts the given name into a HashSet if it's not already in there.
+/// If it is, an error is returned.
+fn insert_once<'n>(
+    seen_names: &mut HashSet<&'n str>,
+    name: &'n str,
+) -> Result<(), ValidateDirectoryError> {
+    if seen_names.get(name).is_some() {
+        return Err(ValidateDirectoryError::DuplicateName(
+            name.to_string().clone(),
+        ));
+    }
+    seen_names.insert(name);
+    Ok(())
+}
+
 impl Directory {
     // The size of a directory is the number of all regular and symlink elements,
     // the number of directory elements, and their size fields.
@@ -19,11 +94,52 @@ impl Directory {
 
         hasher.update(&self.encode_to_vec()).finalize().as_bytes()[..].to_vec()
     }
+
+    /// validate checks the directory for invalid data, such as:
+    /// - violations of name restrictions
+    /// - invalid digest lengths
+    /// - not properly sorted lists
+    /// - duplicate names in the three lists
+    pub fn validate(&self) -> Result<(), ValidateDirectoryError> {
+        let mut seen_names: HashSet<&str> = HashSet::new();
+
+        let mut last_directory_name: &str = "";
+        let mut last_file_name: &str = "";
+        let mut last_symlink_name: &str = "";
+
+        // check directories
+        for directory_node in &self.directories {
+            validate_node_name(&directory_node.name)?;
+            validate_digest(&directory_node.digest)?;
+
+            update_if_lt_prev(&mut last_directory_name, &mut directory_node.name.as_str())?;
+            insert_once(&mut seen_names, &directory_node.name.as_str())?;
+        }
+
+        // check files
+        for file_node in &self.files {
+            validate_node_name(&file_node.name)?;
+            validate_digest(&file_node.digest)?;
+
+            update_if_lt_prev(&mut last_file_name, &mut file_node.name.as_str())?;
+            insert_once(&mut seen_names, &file_node.name.as_str())?;
+        }
+
+        // check symlinks
+        for symlink_node in &self.symlinks {
+            validate_node_name(&symlink_node.name)?;
+
+            update_if_lt_prev(&mut last_symlink_name, &mut symlink_node.name.as_str())?;
+            insert_once(&mut seen_names, &symlink_node.name.as_str())?;
+        }
+
+        Ok(())
+    }
 }
 
 #[cfg(test)]
 mod tests {
-    use super::{Directory, DirectoryNode, FileNode, SymlinkNode};
+    use super::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError};
     use lazy_static::lazy_static;
 
     lazy_static! {
@@ -98,4 +214,214 @@ mod tests {
             ]
         )
     }
+
+    #[test]
+    fn test_directory_validate_empty() {
+        let d = Directory::default();
+        assert_eq!(d.validate(), Ok(()));
+    }
+
+    #[test]
+    fn test_directory_validate_invalid_names() {
+        {
+            let d = Directory {
+                directories: vec![DirectoryNode {
+                    name: "".to_string(),
+                    digest: DUMMY_DIGEST.to_vec(),
+                    size: 42,
+                }],
+                ..Default::default()
+            };
+            match d.validate().expect_err("must fail") {
+                ValidateDirectoryError::InvalidName(n) => {
+                    assert_eq!(n, "")
+                }
+                _ => panic!("unexpected error"),
+            };
+        }
+
+        {
+            let d = Directory {
+                directories: vec![DirectoryNode {
+                    name: ".".to_string(),
+                    digest: DUMMY_DIGEST.to_vec(),
+                    size: 42,
+                }],
+                ..Default::default()
+            };
+            match d.validate().expect_err("must fail") {
+                ValidateDirectoryError::InvalidName(n) => {
+                    assert_eq!(n, ".")
+                }
+                _ => panic!("unexpected error"),
+            };
+        }
+
+        {
+            let d = Directory {
+                files: vec![FileNode {
+                    name: "..".to_string(),
+                    digest: DUMMY_DIGEST.to_vec(),
+                    size: 42,
+                    executable: false,
+                }],
+                ..Default::default()
+            };
+            match d.validate().expect_err("must fail") {
+                ValidateDirectoryError::InvalidName(n) => {
+                    assert_eq!(n, "..")
+                }
+                _ => panic!("unexpected error"),
+            };
+        }
+
+        {
+            let d = Directory {
+                symlinks: vec![SymlinkNode {
+                    name: "\x00".to_string(),
+                    target: "foo".to_string(),
+                }],
+                ..Default::default()
+            };
+            match d.validate().expect_err("must fail") {
+                ValidateDirectoryError::InvalidName(n) => {
+                    assert_eq!(n, "\x00")
+                }
+                _ => panic!("unexpected error"),
+            };
+        }
+
+        {
+            let d = Directory {
+                symlinks: vec![SymlinkNode {
+                    name: "foo/bar".to_string(),
+                    target: "foo".to_string(),
+                }],
+                ..Default::default()
+            };
+            match d.validate().expect_err("must fail") {
+                ValidateDirectoryError::InvalidName(n) => {
+                    assert_eq!(n, "foo/bar")
+                }
+                _ => panic!("unexpected error"),
+            };
+        }
+    }
+
+    #[test]
+    fn test_directory_validate_invalid_digest() {
+        let d = Directory {
+            directories: vec![DirectoryNode {
+                name: "foo".to_string(),
+                digest: vec![0x00, 0x42], // invalid length
+                size: 42,
+            }],
+            ..Default::default()
+        };
+        match d.validate().expect_err("must fail") {
+            ValidateDirectoryError::InvalidDigestLen(n) => {
+                assert_eq!(n, 2)
+            }
+            _ => panic!("unexpected error"),
+        }
+    }
+
+    #[test]
+    fn test_directory_validate_sorting() {
+        // "b" comes before "a", bad.
+        {
+            let d = Directory {
+                directories: vec![
+                    DirectoryNode {
+                        name: "b".to_string(),
+                        digest: DUMMY_DIGEST.to_vec(),
+                        size: 42,
+                    },
+                    DirectoryNode {
+                        name: "a".to_string(),
+                        digest: DUMMY_DIGEST.to_vec(),
+                        size: 42,
+                    },
+                ],
+                ..Default::default()
+            };
+            match d.validate().expect_err("must fail") {
+                ValidateDirectoryError::WrongSorting(s) => {
+                    assert_eq!(s, "a".to_string());
+                }
+                _ => panic!("unexpected error"),
+            }
+        }
+
+        // "a" exists twice, bad.
+        {
+            let d = Directory {
+                directories: vec![
+                    DirectoryNode {
+                        name: "a".to_string(),
+                        digest: DUMMY_DIGEST.to_vec(),
+                        size: 42,
+                    },
+                    DirectoryNode {
+                        name: "a".to_string(),
+                        digest: DUMMY_DIGEST.to_vec(),
+                        size: 42,
+                    },
+                ],
+                ..Default::default()
+            };
+            match d.validate().expect_err("must fail") {
+                ValidateDirectoryError::DuplicateName(s) => {
+                    assert_eq!(s, "a".to_string());
+                }
+                _ => panic!("unexpected error"),
+            }
+        }
+
+        // "a" comes before "b", all good.
+        {
+            let d = Directory {
+                directories: vec![
+                    DirectoryNode {
+                        name: "a".to_string(),
+                        digest: DUMMY_DIGEST.to_vec(),
+                        size: 42,
+                    },
+                    DirectoryNode {
+                        name: "b".to_string(),
+                        digest: DUMMY_DIGEST.to_vec(),
+                        size: 42,
+                    },
+                ],
+                ..Default::default()
+            };
+
+            d.validate().expect("validate shouldn't error");
+        }
+
+        // [b, c] and [a] are both properly sorted.
+        {
+            let d = Directory {
+                directories: vec![
+                    DirectoryNode {
+                        name: "b".to_string(),
+                        digest: DUMMY_DIGEST.to_vec(),
+                        size: 42,
+                    },
+                    DirectoryNode {
+                        name: "c".to_string(),
+                        digest: DUMMY_DIGEST.to_vec(),
+                        size: 42,
+                    },
+                ],
+                symlinks: vec![SymlinkNode {
+                    name: "a".to_string(),
+                    target: "foo".to_string(),
+                }],
+                ..Default::default()
+            };
+
+            d.validate().expect("validate shouldn't error");
+        }
+    }
 }