From 90979d39f330d788f50528056a5180912973831b Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Fri, 20 Jan 2023 23:40:06 +0100 Subject: feat(tvix/store/directory): validate Directory and sizes This calls out to Directory::validate() for all received Directory messages, and also makes sure the sizes we refer a Directory message as matches the sizes that have been calculated. Change-Id: I316f9191d5872ee4ba6d78b9a4326f069b22fa63 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7882 Tested-by: BuildkiteCI Reviewed-by: tazjin --- tvix/store/src/sled_directory_service.rs | 46 ++++++++++++++++++----- tvix/store/src/tests/directory_service.rs | 62 ++++++++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 10 deletions(-) diff --git a/tvix/store/src/sled_directory_service.rs b/tvix/store/src/sled_directory_service.rs index 3d56b8ef3e..4b2c6ed54d 100644 --- a/tvix/store/src/sled_directory_service.rs +++ b/tvix/store/src/sled_directory_service.rs @@ -1,4 +1,5 @@ use data_encoding::BASE64; +use std::collections::HashMap; use std::collections::HashSet; use std::collections::VecDeque; use std::path::PathBuf; @@ -131,20 +132,47 @@ fn insert_directories( txn: &sled::transaction::TransactionalTree, directories: &Vec, ) -> Result, Status> { - // This keeps track of the seen directory keys. + // This keeps track of the seen directory keys, and their size. + // This is used to validate the size field of a reference to a previously sent directory. // We don't need to keep the contents around, they're stored in the DB. - let mut seen_directory_dgsts: HashSet> = HashSet::new(); + let mut seen_directories_sizes: HashMap, u32> = HashMap::new(); let mut last_directory_dgst: Option> = None; for directory in directories { + // validate the directory itself. + if let Err(e) = directory.validate() { + return Err(Status::invalid_argument(format!( + "directory {} failed validation: {}", + BASE64.encode(&directory.digest()), + e, + ))); + } + // for each child directory this directory refers to, we need - // to ensure it has been seen already in this stream. + // to ensure it has been seen already in this stream, and that the size + // matches what we recorded. for child_directory in &directory.directories { - if !seen_directory_dgsts.contains(&child_directory.digest) { - return Err(Status::invalid_argument(format!( - "referred child directory {} not seen yet", - BASE64.encode(&child_directory.digest) - ))); + match seen_directories_sizes.get(&child_directory.digest) { + None => { + return Err(Status::invalid_argument(format!( + "child directory '{}' ({}) in directory '{}' not seen yet", + child_directory.name, + BASE64.encode(&child_directory.digest), + BASE64.encode(&directory.digest()), + ))); + } + Some(seen_child_directory_size) => { + if seen_child_directory_size != &child_directory.size { + return Err(Status::invalid_argument(format!( + "child directory '{}' ({}) in directory '{}' referred with wrong size, expected {}, actual {}", + child_directory.name, + BASE64.encode(&child_directory.digest), + BASE64.encode(&directory.digest()), + seen_child_directory_size, + child_directory.size, + ))); + } + } } } @@ -153,7 +181,7 @@ fn insert_directories( // disconnected graphs at the same timeā€¦ let dgst = directory.digest(); - seen_directory_dgsts.insert(dgst.clone()); + seen_directories_sizes.insert(dgst.clone(), directory.size()); last_directory_dgst = Some(dgst.clone()); // check if the directory already exists in the database. We can skip diff --git a/tvix/store/src/tests/directory_service.rs b/tvix/store/src/tests/directory_service.rs index 38cc0897c2..8c66ccb538 100644 --- a/tvix/store/src/tests/directory_service.rs +++ b/tvix/store/src/tests/directory_service.rs @@ -5,7 +5,7 @@ use tonic::Status; use crate::proto::directory_service_server::DirectoryService; use crate::proto::get_directory_request::ByWhat; use crate::proto::GetDirectoryRequest; -use crate::proto::{Directory, DirectoryNode}; +use crate::proto::{Directory, DirectoryNode, SymlinkNode}; use crate::sled_directory_service::SledDirectoryService; use lazy_static::lazy_static; @@ -206,3 +206,63 @@ async fn put_get_dedup() -> anyhow::Result<()> { Ok(()) } + +/// Trying to upload a Directory failing validation should fail. +#[tokio::test] +async fn put_reject_failed_validation() -> anyhow::Result<()> { + let service = SledDirectoryService::new(TempDir::new()?.path().to_path_buf())?; + + // construct a broken Directory message that fails validation + let broken_directory = Directory { + symlinks: vec![SymlinkNode { + name: "".to_string(), + target: "doesntmatter".to_string(), + }], + ..Default::default() + }; + assert!(broken_directory.validate().is_err()); + + // send it over, it must fail + let put_resp = service + .put(tonic_mock::streaming_request(vec![broken_directory])) + .await + .expect_err("must fail"); + + assert_eq!(put_resp.code(), tonic::Code::InvalidArgument); + + Ok(()) +} + +/// Trying to upload a Directory with wrong size should fail. +#[tokio::test] +async fn put_reject_wrong_size() -> anyhow::Result<()> { + let service = SledDirectoryService::new(TempDir::new()?.path().to_path_buf())?; + + // Construct a directory referring to DIRECTORY_A, but with wrong size. + let broken_parent_directory = Directory { + directories: vec![DirectoryNode { + name: "foo".to_string(), + digest: DIRECTORY_A.digest(), + size: 42, + }], + ..Default::default() + }; + // Make sure we got the size wrong. + assert_ne!( + broken_parent_directory.directories[0].size, + DIRECTORY_A.size() + ); + + // now upload both (first A, then the broken parent). This must fail. + let put_resp = service + .put(tonic_mock::streaming_request(vec![ + DIRECTORY_A.clone(), + broken_parent_directory, + ])) + .await + .expect_err("must fail"); + + assert_eq!(put_resp.code(), tonic::Code::InvalidArgument); + + Ok(()) +} -- cgit 1.4.1