From 7163d3ad37105a97fdc7afdf4ad0da7579494fab Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Tue, 27 Dec 2022 18:10:46 +0100 Subject: feat(tvix/store): implement Directory::validate() Change-Id: I4c6ae79d705b8e19a3e2ed54812366e88935d7a6 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7650 Reviewed-by: tazjin Tested-by: BuildkiteCI --- tvix/Cargo.lock | 14 ++- tvix/Cargo.nix | 20 ++- tvix/store/Cargo.toml | 2 + tvix/store/src/proto.rs | 328 +++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 351 insertions(+), 13 deletions(-) diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index 7ca7b330cb5c..04af420aeac3 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -34,9 +34,9 @@ checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" [[package]] name = "anyhow" -version = "1.0.66" +version = "1.0.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "216261ddc8289130e551ddcd5ce8a064710c0d064a4d2895c67151c92b5443f6" +checksum = "2cb2f989d18dd141ab8ae82f64d1a8cdd37e0840f73a406896cf5e99502fab61" [[package]] name = "arrayref" @@ -1775,18 +1775,18 @@ checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d" [[package]] name = "thiserror" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10deb33631e3c9018b9baf9dcbbc4f737320d2b576bac10f6aefa048fa407e3e" +checksum = "6a9cd18aa97d5c45c6603caea1da6628790b37f7a34b6ca89522331c5180fed0" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.37" +version = "1.0.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "982d17546b47146b28f7c22e3d08465f6b8903d0ea13c1660d9d84a6e7adcdbb" +checksum = "1fb327af4685e4d03fa8cbcf1716380da910eeb2bb8be417e7f9fd3fb164f36f" dependencies = [ "proc-macro2 1.0.47", "quote 1.0.21", @@ -2066,10 +2066,12 @@ version = "0.0.0" name = "tvix-store" version = "0.1.0" dependencies = [ + "anyhow", "blake3", "lazy_static", "prost", "prost-build", + "thiserror", "tonic", "tonic-build", ] diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 8076dd2c4ada..8e55db79c45f 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -200,9 +200,9 @@ rec { }; "anyhow" = rec { crateName = "anyhow"; - version = "1.0.66"; + version = "1.0.68"; edition = "2018"; - sha256 = "1xj3ahmwjlbiqsajhkaa0q6hqwb4l3l5rkfxa7jk1498r3fn2qi1"; + sha256 = "0qdb5x89jpngjrl40fpp8047xlydm38n8bz8iaml3lcds64zkcic"; authors = [ "David Tolnay " ]; @@ -5028,9 +5028,9 @@ rec { }; "thiserror" = rec { crateName = "thiserror"; - version = "1.0.37"; + version = "1.0.38"; edition = "2018"; - sha256 = "0gky83x4i87gd87w3fknnp920wvk9yycp7dgkf5h3jg364vb7phh"; + sha256 = "1l7yh18iqcr2jnl6qjx3ywvhny98cvda3biwc334ap3xm65d373a"; authors = [ "David Tolnay " ]; @@ -5044,9 +5044,9 @@ rec { }; "thiserror-impl" = rec { crateName = "thiserror-impl"; - version = "1.0.37"; + version = "1.0.38"; edition = "2018"; - sha256 = "1fydmpksd14x1mkc24zas01qjssz8q43sbn2ywl6n527dda1fbcq"; + sha256 = "0vzkcjqkzzgrwwby92xvnbp11a8d70b1gkybm0zx1r458spjgcqz"; procMacro = true; authors = [ "David Tolnay " @@ -6162,6 +6162,10 @@ rec { then lib.cleanSourceWith { filter = sourceFilter; src = ./store; } else ./store; dependencies = [ + { + name = "anyhow"; + packageId = "anyhow"; + } { name = "blake3"; packageId = "blake3"; @@ -6175,6 +6179,10 @@ rec { name = "prost"; packageId = "prost"; } + { + name = "thiserror"; + packageId = "thiserror"; + } { name = "tonic"; packageId = "tonic"; diff --git a/tvix/store/Cargo.toml b/tvix/store/Cargo.toml index 66eac5206363..cf57723d2159 100644 --- a/tvix/store/Cargo.toml +++ b/tvix/store/Cargo.toml @@ -4,9 +4,11 @@ version = "0.1.0" edition = "2021" [dependencies] +anyhow = "1.0.68" blake3 = { version = "1.3.1", features = ["rayon", "std"] } lazy_static = "1.4.0" prost = "0.11.2" +thiserror = "1.0.38" tonic = "0.8.2" [build-dependencies] diff --git a/tvix/store/src/proto.rs b/tvix/store/src/proto.rs index ccc3d5a3bf44..33b6bc44b340 100644 --- a/tvix/store/src/proto.rs +++ b/tvix/store/src/proto.rs @@ -1,7 +1,82 @@ +use anyhow::Result; +use std::collections::HashSet; +use thiserror::Error; + use prost::Message; tonic::include_proto!("tvix.store.v1"); +/// Errors that can occur during the validation of Directory messages. +#[derive(Debug, Error, PartialEq)] +pub enum ValidateDirectoryError { + /// Elements are not in sorted order + #[error("{0} is not sorted")] + WrongSorting(String), + /// Multiple elements with the same name encountered + #[error("{0} is a duplicate name")] + DuplicateName(String), + /// Invalid name encountered + #[error("Invalid name in {0}")] + InvalidName(String), + /// Invalid digest length encountered + #[error("Ivalid Digest length: {0}")] + InvalidDigestLen(usize), +} + +/// Checks a name for validity. +/// We disallow slashes, null bytes, '.', '..' and the empty string. +/// Depending on the context, a [DirectoryNode], [FileNode] or [SymlinkNode] +/// message with an empty string as name is allowed, but they don't occur +/// inside a Directory message. +fn validate_node_name(name: &str) -> Result<(), ValidateDirectoryError> { + if name == "" || name == ".." || name == "." || name.contains("\x00") || name.contains("/") { + return Err(ValidateDirectoryError::InvalidName( + name.to_string().clone(), + )); + } + Ok(()) +} + +/// Checks a digest for validity. +/// Digests are 32 bytes long, as we store blake3 digests. +fn validate_digest(digest: &Vec) -> Result<(), ValidateDirectoryError> { + if digest.len() != 32 { + return Err(ValidateDirectoryError::InvalidDigestLen(digest.len())); + } + Ok(()) +} + +/// Accepts a name, and a mutable reference to the previous name. +/// If the passed name is larger than the previous one, the reference is updated. +/// If it's not, an error is returned. +fn update_if_lt_prev<'set, 'n>( + prev_name: &'set mut &'n str, + name: &'n str, +) -> Result<(), ValidateDirectoryError> { + if *name < **prev_name { + return Err(ValidateDirectoryError::WrongSorting( + name.to_string().clone(), + )); + } + *prev_name = name; + Ok(()) +} + +/// Inserts the given name into a HashSet if it's not already in there. +/// If it is, an error is returned. +fn insert_once<'n>( + seen_names: &mut HashSet<&'n str>, + name: &'n str, +) -> Result<(), ValidateDirectoryError> { + if seen_names.get(name).is_some() { + return Err(ValidateDirectoryError::DuplicateName( + name.to_string().clone(), + )); + } + seen_names.insert(name); + Ok(()) +} + impl Directory { // The size of a directory is the number of all regular and symlink elements, // the number of directory elements, and their size fields. @@ -19,11 +94,52 @@ impl Directory { hasher.update(&self.encode_to_vec()).finalize().as_bytes()[..].to_vec() } + + /// validate checks the directory for invalid data, such as: + /// - violations of name restrictions + /// - invalid digest lengths + /// - not properly sorted lists + /// - duplicate names in the three lists + pub fn validate(&self) -> Result<(), ValidateDirectoryError> { + let mut seen_names: HashSet<&str> = HashSet::new(); + + let mut last_directory_name: &str = ""; + let mut last_file_name: &str = ""; + let mut last_symlink_name: &str = ""; + + // check directories + for directory_node in &self.directories { + validate_node_name(&directory_node.name)?; + validate_digest(&directory_node.digest)?; + + update_if_lt_prev(&mut last_directory_name, &mut directory_node.name.as_str())?; + insert_once(&mut seen_names, &directory_node.name.as_str())?; + } + + // check files + for file_node in &self.files { + validate_node_name(&file_node.name)?; + validate_digest(&file_node.digest)?; + + update_if_lt_prev(&mut last_file_name, &mut file_node.name.as_str())?; + insert_once(&mut seen_names, &file_node.name.as_str())?; + } + + // check symlinks + for symlink_node in &self.symlinks { + validate_node_name(&symlink_node.name)?; + + update_if_lt_prev(&mut last_symlink_name, &mut symlink_node.name.as_str())?; + insert_once(&mut seen_names, &symlink_node.name.as_str())?; + } + + Ok(()) + } } #[cfg(test)] mod tests { - use super::{Directory, DirectoryNode, FileNode, SymlinkNode}; + use super::{Directory, DirectoryNode, FileNode, SymlinkNode, ValidateDirectoryError}; use lazy_static::lazy_static; lazy_static! { @@ -98,4 +214,214 @@ mod tests { ] ) } + + #[test] + fn test_directory_validate_empty() { + let d = Directory::default(); + assert_eq!(d.validate(), Ok(())); + } + + #[test] + fn test_directory_validate_invalid_names() { + { + let d = Directory { + directories: vec![DirectoryNode { + name: "".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, "") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + directories: vec![DirectoryNode { + name: ".".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, ".") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + files: vec![FileNode { + name: "..".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + executable: false, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, "..") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + symlinks: vec![SymlinkNode { + name: "\x00".to_string(), + target: "foo".to_string(), + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, "\x00") + } + _ => panic!("unexpected error"), + }; + } + + { + let d = Directory { + symlinks: vec![SymlinkNode { + name: "foo/bar".to_string(), + target: "foo".to_string(), + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidName(n) => { + assert_eq!(n, "foo/bar") + } + _ => panic!("unexpected error"), + }; + } + } + + #[test] + fn test_directory_validate_invalid_digest() { + let d = Directory { + directories: vec![DirectoryNode { + name: "foo".to_string(), + digest: vec![0x00, 0x42], // invalid length + size: 42, + }], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::InvalidDigestLen(n) => { + assert_eq!(n, 2) + } + _ => panic!("unexpected error"), + } + } + + #[test] + fn test_directory_validate_sorting() { + // "b" comes before "a", bad. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "b".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + DirectoryNode { + name: "a".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + ], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::WrongSorting(s) => { + assert_eq!(s, "a".to_string()); + } + _ => panic!("unexpected error"), + } + } + + // "a" exists twice, bad. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "a".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + DirectoryNode { + name: "a".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + ], + ..Default::default() + }; + match d.validate().expect_err("must fail") { + ValidateDirectoryError::DuplicateName(s) => { + assert_eq!(s, "a".to_string()); + } + _ => panic!("unexpected error"), + } + } + + // "a" comes before "b", all good. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "a".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + DirectoryNode { + name: "b".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + ], + ..Default::default() + }; + + d.validate().expect("validate shouldn't error"); + } + + // [b, c] and [a] are both properly sorted. + { + let d = Directory { + directories: vec![ + DirectoryNode { + name: "b".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + DirectoryNode { + name: "c".to_string(), + digest: DUMMY_DIGEST.to_vec(), + size: 42, + }, + ], + symlinks: vec![SymlinkNode { + name: "a".to_string(), + target: "foo".to_string(), + }], + ..Default::default() + }; + + d.validate().expect("validate shouldn't error"); + } + } } -- cgit 1.4.1