From 56fa533e438bd367aa5cae6fa505508aced42156 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 17 Aug 2024 22:00:06 +0300 Subject: refactor(tvix/castore): have PathComponent-specific errors Don't use DirectoryError, but PathComponentError. Also add checks for too long path components. Change-Id: Ia9deb9dd0351138baadb2e9c9454c3e019d5a45e Reviewed-on: https://cl.tvl.fyi/c/depot/+/12229 Tested-by: BuildkiteCI Reviewed-by: Ilan Joselevich Autosubmit: flokli Reviewed-by: edef --- tvix/castore/src/errors.rs | 6 +- tvix/castore/src/lib.rs | 2 +- tvix/castore/src/path/component.rs | 218 ++++++++++++++++++++++++++---- tvix/castore/src/path/mod.rs | 6 +- tvix/castore/src/proto/mod.rs | 39 ++++-- tvix/castore/src/proto/tests/directory.rs | 51 +++---- 6 files changed, 246 insertions(+), 76 deletions(-) (limited to 'tvix/castore/src') diff --git a/tvix/castore/src/errors.rs b/tvix/castore/src/errors.rs index 6c213cb06743..ec59c7734584 100644 --- a/tvix/castore/src/errors.rs +++ b/tvix/castore/src/errors.rs @@ -3,7 +3,7 @@ use thiserror::Error; use tokio::task::JoinError; use tonic::Status; -use crate::path::PathComponent; +use crate::path::{PathComponent, PathComponentError}; /// Errors related to communication with the store. #[derive(Debug, Error, PartialEq)] @@ -47,8 +47,8 @@ pub enum DirectoryError { #[error("Total size exceeds u32::MAX")] SizeOverflow, /// Invalid name encountered - #[error("Invalid name: {}", .0.as_bstr())] - InvalidName(bytes::Bytes), + #[error("Invalid name: {0}")] + InvalidName(PathComponentError), /// Elements are not in sorted order. Can only happen on protos #[error("{:?} is not sorted", .0.as_bstr())] WrongSorting(bytes::Bytes), diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs index 6e1b36231b9d..8ac6ca3dd66a 100644 --- a/tvix/castore/src/lib.rs +++ b/tvix/castore/src/lib.rs @@ -14,7 +14,7 @@ mod nodes; pub use nodes::*; mod path; -pub use path::{Path, PathBuf, PathComponent}; +pub use path::{Path, PathBuf, PathComponent, PathComponentError}; pub mod import; pub mod proto; diff --git a/tvix/castore/src/path/component.rs b/tvix/castore/src/path/component.rs index f755f06e62a8..78aca03c50fe 100644 --- a/tvix/castore/src/path/component.rs +++ b/tvix/castore/src/path/component.rs @@ -1,6 +1,3 @@ -// TODO: split out this error -use crate::DirectoryError; - use bstr::ByteSlice; use std::fmt::{self, Debug, Display}; @@ -8,12 +5,17 @@ use std::fmt::{self, Debug, Display}; /// Internally uses a [bytes::Bytes], but disallows /// slashes, and null bytes to be present, as well as /// '.', '..' and the empty string. +/// It also rejects components that are too long (> 255 bytes). #[repr(transparent)] #[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)] pub struct PathComponent { pub(super) inner: bytes::Bytes, } +/// The maximum length an individual path component can have. +/// Linux allows 255 bytes of actual name, so we pick that. +pub const MAX_NAME_LEN: usize = 255; + impl AsRef<[u8]> for PathComponent { fn as_ref(&self) -> &[u8] { self.inner.as_ref() @@ -26,18 +28,24 @@ impl From for bytes::Bytes { } } -pub(super) fn is_valid_name>(name: B) -> bool { - let v = name.as_ref(); - - !v.is_empty() && v != *b".." && v != *b"." && !v.contains(&0x00) && !v.contains(&b'/') +pub(super) fn validate_name>(name: B) -> Result<(), PathComponentError> { + match name.as_ref() { + b"" => Err(PathComponentError::Empty), + b".." => Err(PathComponentError::Parent), + b"." => Err(PathComponentError::CurDir), + v if v.len() > MAX_NAME_LEN => Err(PathComponentError::TooLong), + v if v.contains(&0x00) => Err(PathComponentError::Null), + v if v.contains(&b'/') => Err(PathComponentError::Slashes), + _ => Ok(()), + } } impl TryFrom for PathComponent { - type Error = DirectoryError; + type Error = PathComponentError; fn try_from(value: bytes::Bytes) -> Result { - if !is_valid_name(&value) { - return Err(DirectoryError::InvalidName(value)); + if let Err(e) = validate_name(&value) { + return Err(PathComponentError::Convert(value, Box::new(e))); } Ok(Self { inner: value }) @@ -45,14 +53,13 @@ impl TryFrom for PathComponent { } impl TryFrom<&'static [u8]> for PathComponent { - type Error = DirectoryError; + type Error = PathComponentError; fn try_from(value: &'static [u8]) -> Result { - if !is_valid_name(value) { - return Err(DirectoryError::InvalidName(bytes::Bytes::from_static( - value, - ))); + if let Err(e) = validate_name(value) { + return Err(PathComponentError::Convert(value.into(), Box::new(e))); } + Ok(Self { inner: bytes::Bytes::from_static(value), }) @@ -60,14 +67,16 @@ impl TryFrom<&'static [u8]> for PathComponent { } impl TryFrom<&str> for PathComponent { - type Error = DirectoryError; + type Error = PathComponentError; fn try_from(value: &str) -> Result { - if !is_valid_name(value) { - return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice( - value.as_bytes(), - ))); + if let Err(e) = validate_name(value) { + return Err(PathComponentError::Convert( + value.to_owned().into(), + Box::new(e), + )); } + Ok(Self { inner: bytes::Bytes::copy_from_slice(value.as_bytes()), }) @@ -75,16 +84,19 @@ impl TryFrom<&str> for PathComponent { } impl TryFrom<&std::ffi::CStr> for PathComponent { - type Error = DirectoryError; + type Error = PathComponentError; fn try_from(value: &std::ffi::CStr) -> Result { - if !is_valid_name(value.to_bytes()) { - return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice( - value.to_bytes(), - ))); + let value = value.to_bytes(); + if let Err(e) = validate_name(value) { + return Err(PathComponentError::Convert( + value.to_owned().into(), + Box::new(e), + )); } + Ok(Self { - inner: bytes::Bytes::copy_from_slice(value.to_bytes()), + inner: bytes::Bytes::copy_from_slice(value), }) } } @@ -100,3 +112,157 @@ impl Display for PathComponent { Display::fmt(self.inner.as_bstr(), f) } } + +/// Errors created when parsing / validating [PathComponent]. +#[derive(Debug, PartialEq, thiserror::Error)] +#[cfg_attr(test, derive(Clone))] +pub enum PathComponentError { + #[error("cannot be empty")] + Empty, + #[error("cannot contain null bytes")] + Null, + #[error("cannot be '.'")] + CurDir, + #[error("cannot be '..'")] + Parent, + #[error("cannot contain slashes")] + Slashes, + #[error("cannot be over {} bytes long", MAX_NAME_LEN)] + TooLong, + #[error("unable to convert '{:?}'", .0.as_bstr())] + Convert(bytes::Bytes, #[source] Box), +} + +#[cfg(test)] +mod tests { + use std::ffi::CString; + + use bytes::Bytes; + use rstest::rstest; + + use super::{validate_name, PathComponent, PathComponentError}; + + #[rstest] + #[case::empty(b"", PathComponentError::Empty)] + #[case::null(b"foo\0", PathComponentError::Null)] + #[case::curdir(b".", PathComponentError::CurDir)] + #[case::parent(b"..", PathComponentError::Parent)] + #[case::slashes1(b"a/b", PathComponentError::Slashes)] + #[case::slashes2(b"/", PathComponentError::Slashes)] + fn errors(#[case] v: &'static [u8], #[case] err: PathComponentError) { + { + assert_eq!( + Err(err.clone()), + validate_name(v), + "validate_name must fail as expected" + ); + } + + let exp_err_v = Bytes::from_static(v); + + // Bytes + { + let v = Bytes::from_static(v); + assert_eq!( + Err(PathComponentError::Convert( + exp_err_v.clone(), + Box::new(err.clone()) + )), + PathComponent::try_from(v), + "conversion must fail as expected" + ); + } + // &[u8] + { + assert_eq!( + Err(PathComponentError::Convert( + exp_err_v.clone(), + Box::new(err.clone()) + )), + PathComponent::try_from(v), + "conversion must fail as expected" + ); + } + // &str, if it is valid UTF-8 + { + if let Ok(v) = std::str::from_utf8(v) { + assert_eq!( + Err(PathComponentError::Convert( + exp_err_v.clone(), + Box::new(err.clone()) + )), + PathComponent::try_from(v), + "conversion must fail as expected" + ); + } + } + // &CStr, if it can be constructed (fails if the payload contains null bytes) + { + if let Ok(v) = CString::new(v) { + let v = v.as_ref(); + assert_eq!( + Err(PathComponentError::Convert( + exp_err_v.clone(), + Box::new(err.clone()) + )), + PathComponent::try_from(v), + "conversion must fail as expected" + ); + } + } + } + + #[test] + fn error_toolong() { + assert_eq!( + Err(PathComponentError::TooLong), + validate_name("X".repeat(500).into_bytes().as_slice()) + ) + } + + #[test] + fn success() { + let exp = PathComponent { inner: "aa".into() }; + + // Bytes + { + let v: Bytes = "aa".into(); + assert_eq!( + Ok(exp.clone()), + PathComponent::try_from(v), + "conversion must succeed" + ); + } + + // &[u8] + { + let v: &[u8] = b"aa"; + assert_eq!( + Ok(exp.clone()), + PathComponent::try_from(v), + "conversion must succeed" + ); + } + + // &str + { + let v: &str = "aa"; + assert_eq!( + Ok(exp.clone()), + PathComponent::try_from(v), + "conversion must succeed" + ); + } + + // &CStr + { + let v = CString::new("aa").expect("CString must construct"); + let v = v.as_c_str(); + assert_eq!( + Ok(exp.clone()), + PathComponent::try_from(v), + "conversion must succeed" + ); + } + } +} diff --git a/tvix/castore/src/path/mod.rs b/tvix/castore/src/path/mod.rs index 5a68f1add416..15f31a570da9 100644 --- a/tvix/castore/src/path/mod.rs +++ b/tvix/castore/src/path/mod.rs @@ -9,7 +9,7 @@ use std::{ }; mod component; -pub use component::PathComponent; +pub use component::{PathComponent, PathComponentError}; /// Represents a Path in the castore model. /// These are always relative, and platform-independent, which distinguishes @@ -37,7 +37,7 @@ impl Path { if !bytes.is_empty() { // Ensure all components are valid castore node names. for component in bytes.split_str(b"/") { - if !component::is_valid_name(component) { + if component::validate_name(component).is_err() { return None; } } @@ -233,7 +233,7 @@ impl PathBuf { /// Adjoins `name` to self. pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> { - if !component::is_valid_name(name) { + if component::validate_name(name).is_err() { return Err(std::io::ErrorKind::InvalidData.into()); } diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index a40a67f53cf3..67f7b2fa72a2 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -1,6 +1,5 @@ -use std::{cmp::Ordering, str}; - use prost::Message; +use std::cmp::Ordering; mod grpc_blobservice_wrapper; mod grpc_directoryservice_wrapper; @@ -80,31 +79,42 @@ impl TryFrom for crate::Directory { .try_fold(&b""[..], |prev_name, e| { match e.name.as_ref().cmp(prev_name) { Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), - Ordering::Equal => { - Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?)) - } + Ordering::Equal => Err(DirectoryError::DuplicateName( + e.name + .to_owned() + .try_into() + .map_err(DirectoryError::InvalidName)?, + )), Ordering::Greater => Ok(e.name.as_ref()), } })?; value.files.iter().try_fold(&b""[..], |prev_name, e| { match e.name.as_ref().cmp(prev_name) { Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), - Ordering::Equal => { - Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?)) - } + Ordering::Equal => Err(DirectoryError::DuplicateName( + e.name + .to_owned() + .try_into() + .map_err(DirectoryError::InvalidName)?, + )), Ordering::Greater => Ok(e.name.as_ref()), } })?; value.symlinks.iter().try_fold(&b""[..], |prev_name, e| { match e.name.as_ref().cmp(prev_name) { Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), - Ordering::Equal => { - Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?)) - } + Ordering::Equal => Err(DirectoryError::DuplicateName( + e.name + .to_owned() + .try_into() + .map_err(DirectoryError::InvalidName)?, + )), Ordering::Greater => Ok(e.name.as_ref()), } })?; + // FUTUREWORK: use is_sorted() once stable, and/or implement the producer for + // [crate::Directory::try_from_iter] iterating over all three and doing all checks inline. let mut elems: Vec<(PathComponent, crate::Node)> = Vec::with_capacity(value.directories.len() + value.files.len() + value.symlinks.len()); @@ -184,7 +194,7 @@ impl Node { pub fn into_name_and_node(self) -> Result<(PathComponent, crate::Node), DirectoryError> { match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? { node::Node::Directory(n) => { - let name: PathComponent = n.name.try_into()?; + let name: PathComponent = n.name.try_into().map_err(DirectoryError::InvalidName)?; let digest = B3Digest::try_from(n.digest) .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?; @@ -196,7 +206,7 @@ impl Node { Ok((name, node)) } node::Node::File(n) => { - let name: PathComponent = n.name.try_into()?; + let name: PathComponent = n.name.try_into().map_err(DirectoryError::InvalidName)?; let digest = B3Digest::try_from(n.digest) .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?; @@ -210,7 +220,8 @@ impl Node { } node::Node::Symlink(n) => { - let name: PathComponent = n.name.try_into()?; + let name: PathComponent = n.name.try_into().map_err(DirectoryError::InvalidName)?; + let node = crate::Node::Symlink { target: n .target diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs index 26c434ab46c0..efbc4e9f2af1 100644 --- a/tvix/castore/src/proto/tests/directory.rs +++ b/tvix/castore/src/proto/tests/directory.rs @@ -161,12 +161,9 @@ fn validate_invalid_names() { }], ..Default::default() }; - match crate::Directory::try_from(d).expect_err("must fail") { - DirectoryError::InvalidName(n) => { - assert_eq!(n.as_ref(), b"\0") - } - _ => panic!("unexpected error"), - }; + + let e = crate::Directory::try_from(d).expect_err("must fail"); + assert!(matches!(e, DirectoryError::InvalidName(_))); } { @@ -178,12 +175,8 @@ fn validate_invalid_names() { }], ..Default::default() }; - match crate::Directory::try_from(d).expect_err("must fail") { - DirectoryError::InvalidName(n) => { - assert_eq!(n.as_ref(), b".") - } - _ => panic!("unexpected error"), - }; + let e = crate::Directory::try_from(d).expect_err("must fail"); + assert!(matches!(e, DirectoryError::InvalidName(_))); } { @@ -196,12 +189,8 @@ fn validate_invalid_names() { }], ..Default::default() }; - match crate::Directory::try_from(d).expect_err("must fail") { - DirectoryError::InvalidName(n) => { - assert_eq!(n.as_ref(), b"..") - } - _ => panic!("unexpected error"), - }; + let e = crate::Directory::try_from(d).expect_err("must fail"); + assert!(matches!(e, DirectoryError::InvalidName(_))); } { @@ -212,12 +201,8 @@ fn validate_invalid_names() { }], ..Default::default() }; - match crate::Directory::try_from(d).expect_err("must fail") { - DirectoryError::InvalidName(n) => { - assert_eq!(n.as_ref(), b"\x00") - } - _ => panic!("unexpected error"), - }; + let e = crate::Directory::try_from(d).expect_err("must fail"); + assert!(matches!(e, DirectoryError::InvalidName(_))); } { @@ -228,12 +213,20 @@ fn validate_invalid_names() { }], ..Default::default() }; - match crate::Directory::try_from(d).expect_err("must fail") { - DirectoryError::InvalidName(n) => { - assert_eq!(n.as_ref(), b"foo/bar") - } - _ => panic!("unexpected error"), + let e = crate::Directory::try_from(d).expect_err("must fail"); + assert!(matches!(e, DirectoryError::InvalidName(_))); + } + + { + let d = Directory { + symlinks: vec![SymlinkNode { + name: bytes::Bytes::copy_from_slice("X".repeat(500).into_bytes().as_slice()), + target: "foo".into(), + }], + ..Default::default() }; + let e = crate::Directory::try_from(d).expect_err("must fail"); + assert!(matches!(e, DirectoryError::InvalidName(_))); } } -- cgit 1.4.1