diff options
author | Florian Klink <flokli@flokli.de> | 2024-08-16T14·32+0300 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-08-17T15·59+0000 |
commit | 5ec93b57e6a263eef91ee583aba9f04581e4a66b (patch) | |
tree | 896407c00900d630a38ee82176ff12e0870f7a20 /tvix/castore/src/path.rs | |
parent | 8ea7d2b60eb4052d934820078c31ff25786376a4 (diff) |
refactor(tvix/castore): add PathComponent type for checked components r/8506
This encodes a verified component on the type level. Internally, it contains a bytes::Bytes. The castore Path/PathBuf component() and file_name() methods now return this type, the old ones returning bytes were renamed to component_bytes() and component_file_name() respectively. We can drop the directory_reject_invalid_name test - it's not possible anymore to pass an invalid name to Directories::add. Invalid names in the Directory proto are still being tested to be rejected in the validate_invalid_names tests. Change-Id: Ide4d16415dfd50b7e2d7e0c36d42a3bbeeb9b6c5 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12217 Autosubmit: flokli <flokli@flokli.de> Reviewed-by: Connor Brewster <cbrewster@hey.com> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/castore/src/path.rs')
-rw-r--r-- | tvix/castore/src/path.rs | 450 |
1 files changed, 0 insertions, 450 deletions
diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path.rs deleted file mode 100644 index 8a55e9f5a1d3..000000000000 --- a/tvix/castore/src/path.rs +++ /dev/null @@ -1,450 +0,0 @@ -//! Contains data structures to deal with Paths in the tvix-castore model. - -use std::{ - borrow::Borrow, - fmt::{self, Debug, Display}, - mem, - ops::Deref, - str::FromStr, -}; - -use bstr::ByteSlice; - -use crate::Directory; - -/// Represents a Path in the castore model. -/// These are always relative, and platform-independent, which distinguishes -/// them from the ones provided in the standard library. -#[derive(Eq, Hash, PartialEq)] -#[repr(transparent)] // SAFETY: Representation has to match [u8] -pub struct Path { - // As node names in the castore model cannot contain slashes, - // we use them as component separators here. - inner: [u8], -} - -#[allow(dead_code)] -impl Path { - // SAFETY: The empty path is valid. - pub const ROOT: &'static Path = unsafe { Path::from_bytes_unchecked(&[]) }; - - /// Convert a byte slice to a path, without checking validity. - const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Path { - // SAFETY: &[u8] and &Path have the same representation. - unsafe { mem::transmute(bytes) } - } - - fn from_bytes(bytes: &[u8]) -> Option<&Path> { - if !bytes.is_empty() { - // Ensure all components are valid castore node names. - for component in bytes.split_str(b"/") { - if !Directory::is_valid_name(component) { - return None; - } - } - } - - // SAFETY: We have verified that the path contains no empty components. - Some(unsafe { Path::from_bytes_unchecked(bytes) }) - } - - pub fn into_boxed_bytes(self: Box<Path>) -> Box<[u8]> { - // SAFETY: Box<Path> and Box<[u8]> have the same representation. - unsafe { mem::transmute(self) } - } - - /// Returns the path without its final component, if there is one. - /// - /// Note that the parent of a bare file name is [Path::ROOT]. - /// [Path::ROOT] is the only path without a parent. - pub fn parent(&self) -> Option<&Path> { - // The root does not have a parent. - if self.inner.is_empty() { - return None; - } - - Some( - if let Some((parent, _file_name)) = self.inner.rsplit_once_str(b"/") { - // SAFETY: The parent of a valid Path is a valid Path. - unsafe { Path::from_bytes_unchecked(parent) } - } else { - // The parent of a bare file name is the root. - Path::ROOT - }, - ) - } - - /// Creates a PathBuf with `name` adjoined to self. - pub fn try_join(&self, name: &[u8]) -> Result<PathBuf, std::io::Error> { - let mut v = PathBuf::with_capacity(self.inner.len() + name.len() + 1); - v.inner.extend_from_slice(&self.inner); - v.try_push(name)?; - - Ok(v) - } - - /// Produces an iterator over the components of the path, which are - /// individual byte slices. - /// In case the path is empty, an empty iterator is returned. - pub fn components(&self) -> impl Iterator<Item = &[u8]> { - let mut iter = self.inner.split_str(&b"/"); - - // We don't want to return an empty element, consume it if it's the only one. - if self.inner.is_empty() { - let _ = iter.next(); - } - - iter - } - - /// Returns the final component of the Path, if there is one. - pub fn file_name(&self) -> Option<&[u8]> { - self.components().last() - } - - pub fn as_bytes(&self) -> &[u8] { - &self.inner - } -} - -impl Debug for Path { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - Debug::fmt(self.inner.as_bstr(), f) - } -} - -impl Display for Path { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - Display::fmt(self.inner.as_bstr(), f) - } -} - -impl AsRef<Path> for Path { - fn as_ref(&self) -> &Path { - self - } -} - -/// Represents a owned PathBuf in the castore model. -/// These are always relative, and platform-independent, which distinguishes -/// them from the ones provided in the standard library. -#[derive(Clone, Default, Eq, Hash, PartialEq)] -pub struct PathBuf { - inner: Vec<u8>, -} - -impl Deref for PathBuf { - type Target = Path; - - fn deref(&self) -> &Self::Target { - // SAFETY: PathBuf always contains a valid Path. - unsafe { Path::from_bytes_unchecked(&self.inner) } - } -} - -impl AsRef<Path> for PathBuf { - fn as_ref(&self) -> &Path { - self - } -} - -impl ToOwned for Path { - type Owned = PathBuf; - - fn to_owned(&self) -> Self::Owned { - PathBuf { - inner: self.inner.to_owned(), - } - } -} - -impl Borrow<Path> for PathBuf { - fn borrow(&self) -> &Path { - self - } -} - -impl From<Box<Path>> for PathBuf { - fn from(value: Box<Path>) -> Self { - // SAFETY: Box<Path> is always a valid path. - unsafe { PathBuf::from_bytes_unchecked(value.into_boxed_bytes().into_vec()) } - } -} - -impl From<&Path> for PathBuf { - fn from(value: &Path) -> Self { - value.to_owned() - } -} - -impl FromStr for PathBuf { - type Err = std::io::Error; - - fn from_str(s: &str) -> Result<PathBuf, Self::Err> { - Ok(Path::from_bytes(s.as_bytes()) - .ok_or(std::io::ErrorKind::InvalidData)? - .to_owned()) - } -} - -impl Debug for PathBuf { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - Debug::fmt(&**self, f) - } -} - -impl Display for PathBuf { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - Display::fmt(&**self, f) - } -} - -impl PathBuf { - pub fn new() -> PathBuf { - Self::default() - } - - pub fn with_capacity(capacity: usize) -> PathBuf { - // SAFETY: The empty path is a valid path. - Self { - inner: Vec::with_capacity(capacity), - } - } - - /// Adjoins `name` to self. - pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> { - if !Directory::is_valid_name(name) { - return Err(std::io::ErrorKind::InvalidData.into()); - } - - if !self.inner.is_empty() { - self.inner.push(b'/'); - } - - self.inner.extend_from_slice(name); - - Ok(()) - } - - /// Convert a byte vector to a PathBuf, without checking validity. - unsafe fn from_bytes_unchecked(bytes: Vec<u8>) -> PathBuf { - PathBuf { inner: bytes } - } - - /// Convert from a [&std::path::Path] to [Self]. - /// - /// - Self uses `/` as path separator. - /// - Absolute paths are always rejected, are are these with custom prefixes. - /// - Repeated separators are deduplicated. - /// - Occurrences of `.` are normalized away. - /// - A trailing slash is normalized away. - /// - /// A `canonicalize_dotdot` boolean controls whether `..` will get - /// canonicalized if possible, or should return an error. - /// - /// For more exotic paths, this conversion might produce different results - /// on different platforms, due to different underlying byte - /// representations, which is why it's restricted to unix for now. - #[cfg(unix)] - pub fn from_host_path( - host_path: &std::path::Path, - canonicalize_dotdot: bool, - ) -> Result<Self, std::io::Error> { - let mut p = PathBuf::with_capacity(host_path.as_os_str().len()); - - for component in host_path.components() { - match component { - std::path::Component::Prefix(_) | std::path::Component::RootDir => { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "found disallowed prefix or rootdir", - )) - } - std::path::Component::CurDir => continue, // ignore - std::path::Component::ParentDir => { - if canonicalize_dotdot { - // Try popping the last element from the path being constructed. - // FUTUREWORK: pop method? - p = p - .parent() - .ok_or_else(|| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - "found .. going too far up", - ) - })? - .to_owned(); - } else { - return Err(std::io::Error::new( - std::io::ErrorKind::InvalidData, - "found disallowed ..", - )); - } - } - std::path::Component::Normal(s) => { - // append the new component to the path being constructed. - p.try_push(s.as_encoded_bytes()).map_err(|_| { - std::io::Error::new( - std::io::ErrorKind::InvalidData, - "encountered invalid node in sub_path component", - ) - })? - } - } - } - - Ok(p) - } - - pub fn into_boxed_path(self) -> Box<Path> { - // SAFETY: Box<[u8]> and Box<Path> have the same representation, - // and PathBuf always contains a valid Path. - unsafe { mem::transmute(self.inner.into_boxed_slice()) } - } - - pub fn into_bytes(self) -> Vec<u8> { - self.inner - } -} - -#[cfg(test)] -mod test { - use super::{Path, PathBuf}; - use bstr::ByteSlice; - use rstest::rstest; - - // TODO: add some manual tests including invalid UTF-8 (hard to express - // with rstest) - - #[rstest] - #[case::empty("", 0)] - #[case("a", 1)] - #[case("a/b", 2)] - #[case("a/b/c", 3)] - // add two slightly more cursed variants. - // Technically nothing prevents us from representing this with castore, - // but maybe we want to disallow constructing paths like this as it's a - // bad idea. - #[case::cursed("C:\\a/b", 2)] - #[case::cursed("\\\\tvix-store", 1)] - pub fn from_str(#[case] s: &str, #[case] num_components: usize) { - let p: PathBuf = s.parse().expect("must parse"); - - assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch"); - assert_eq!( - num_components, - p.components().count(), - "number of components mismatch" - ); - } - - #[rstest] - #[case::absolute("/a/b")] - #[case::two_forward_slashes_start("//a/b")] - #[case::two_forward_slashes_middle("a/b//c/d")] - #[case::trailing_slash("a/b/")] - #[case::dot(".")] - #[case::dotdot("..")] - #[case::dot_start("./a")] - #[case::dotdot_start("../a")] - #[case::dot_middle("a/./b")] - #[case::dotdot_middle("a/../b")] - #[case::dot_end("a/b/.")] - #[case::dotdot_end("a/b/..")] - #[case::null("fo\0o")] - pub fn from_str_fail(#[case] s: &str) { - s.parse::<PathBuf>().expect_err("must fail"); - } - - #[rstest] - #[case("foo", "")] - #[case("foo/bar", "foo")] - #[case("foo2/bar2", "foo2")] - #[case("foo/bar/baz", "foo/bar")] - pub fn parent(#[case] p: PathBuf, #[case] exp_parent: PathBuf) { - assert_eq!(Some(&*exp_parent), p.parent()); - } - - #[rstest] - pub fn no_parent() { - assert!(Path::ROOT.parent().is_none()); - } - - #[rstest] - #[case("a", "b", "a/b")] - #[case("a", "b", "a/b")] - pub fn join_push(#[case] mut p: PathBuf, #[case] name: &str, #[case] exp_p: PathBuf) { - assert_eq!(exp_p, p.try_join(name.as_bytes()).expect("join failed")); - p.try_push(name.as_bytes()).expect("push failed"); - assert_eq!(exp_p, p); - } - - #[rstest] - #[case("a", "/")] - #[case("a", "")] - #[case("a", "b/c")] - #[case("", "/")] - #[case("", "")] - #[case("", "b/c")] - #[case("", ".")] - #[case("", "..")] - pub fn join_push_fail(#[case] mut p: PathBuf, #[case] name: &str) { - p.try_join(name.as_bytes()) - .expect_err("join succeeded unexpectedly"); - p.try_push(name.as_bytes()) - .expect_err("push succeeded unexpectedly"); - } - - #[rstest] - #[case::empty("", vec![])] - #[case("a", vec!["a"])] - #[case("a/b", vec!["a", "b"])] - #[case("a/b/c", vec!["a","b", "c"])] - pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) { - assert_eq!( - exp_components, - p.components() - .map(|x| x.to_str().unwrap()) - .collect::<Vec<_>>() - ); - } - - #[rstest] - #[case::empty("", "", false)] - #[case::path("a", "a", false)] - #[case::path2("a/b", "a/b", false)] - #[case::double_slash_middle("a//b", "a/b", false)] - #[case::dot(".", "", false)] - #[case::dot_start("./a/b", "a/b", false)] - #[case::dot_middle("a/./b", "a/b", false)] - #[case::dot_end("a/b/.", "a/b", false)] - #[case::trailing_slash("a/b/", "a/b", false)] - #[case::dotdot_canonicalize("a/..", "", true)] - #[case::dotdot_canonicalize2("a/../b", "b", true)] - #[cfg_attr(unix, case::faux_prefix("\\\\nix-store", "\\\\nix-store", false))] - #[cfg_attr(unix, case::faux_letter("C:\\foo.txt", "C:\\foo.txt", false))] - pub fn from_host_path( - #[case] host_path: std::path::PathBuf, - #[case] exp_path: PathBuf, - #[case] canonicalize_dotdot: bool, - ) { - let p = PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect("must succeed"); - - assert_eq!(exp_path, p); - } - - #[rstest] - #[case::absolute("/", false)] - #[case::dotdot_root("..", false)] - #[case::dotdot_root_canonicalize("..", true)] - #[case::dotdot_root_no_canonicalize("a/..", false)] - #[case::invalid_name("foo/bar\0", false)] - // #[cfg_attr(windows, case::prefix("\\\\nix-store", false))] - // #[cfg_attr(windows, case::letter("C:\\foo.txt", false))] - pub fn from_host_path_fail( - #[case] host_path: std::path::PathBuf, - #[case] canonicalize_dotdot: bool, - ) { - PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect_err("must fail"); - } -} |