//! Contains data structures to deal with Paths in the tvix-castore model. use std::{ borrow::Borrow, fmt::{self, Debug, Display}, mem, ops::Deref, str::FromStr, }; use bstr::ByteSlice; use crate::proto::validate_node_name; /// Represents a Path in the castore model. /// These are always relative, and platform-independent, which distinguishes /// them from the ones provided in the standard library. #[derive(Eq, Hash, PartialEq)] #[repr(transparent)] // SAFETY: Representation has to match [u8] pub struct Path { // As node names in the castore model cannot contain slashes, // we use them as component separators here. inner: [u8], } #[allow(dead_code)] impl Path { // SAFETY: The empty path is valid. pub const ROOT: &'static Path = unsafe { Path::from_bytes_unchecked(&[]) }; /// Convert a byte slice to a path, without checking validity. const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Path { // SAFETY: &[u8] and &Path have the same representation. unsafe { mem::transmute(bytes) } } fn from_bytes(bytes: &[u8]) -> Option<&Path> { if !bytes.is_empty() { // Ensure all components are valid castore node names. for component in bytes.split_str(b"/") { validate_node_name(component).ok()?; } } // SAFETY: We have verified that the path contains no empty components. Some(unsafe { Path::from_bytes_unchecked(bytes) }) } pub fn into_boxed_bytes(self: Box<Path>) -> Box<[u8]> { // SAFETY: Box<Path> and Box<[u8]> have the same representation. unsafe { mem::transmute(self) } } /// Returns the path without its final component, if there is one. /// /// Note that the parent of a bare file name is [Path::ROOT]. /// [Path::ROOT] is the only path without a parent. pub fn parent(&self) -> Option<&Path> { // The root does not have a parent. if self.inner.is_empty() { return None; } Some( if let Some((parent, _file_name)) = self.inner.rsplit_once_str(b"/") { // SAFETY: The parent of a valid Path is a valid Path. unsafe { Path::from_bytes_unchecked(parent) } } else { // The parent of a bare file name is the root. Path::ROOT }, ) } /// Creates a PathBuf with `name` adjoined to self. pub fn try_join(&self, name: &[u8]) -> Result<PathBuf, std::io::Error> { let mut v = PathBuf::with_capacity(self.inner.len() + name.len() + 1); v.inner.extend_from_slice(&self.inner); v.try_push(name)?; Ok(v) } /// Produces an iterator over the components of the path, which are /// individual byte slices. /// In case the path is empty, an empty iterator is returned. pub fn components(&self) -> impl Iterator<Item = &[u8]> { let mut iter = self.inner.split_str(&b"/"); // We don't want to return an empty element, consume it if it's the only one. if self.inner.is_empty() { let _ = iter.next(); } iter } /// Returns the final component of the Path, if there is one. pub fn file_name(&self) -> Option<&[u8]> { self.components().last() } pub fn as_bytes(&self) -> &[u8] { &self.inner } } impl Debug for Path { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { Debug::fmt(self.inner.as_bstr(), f) } } impl Display for Path { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { Display::fmt(self.inner.as_bstr(), f) } } impl AsRef<Path> for Path { fn as_ref(&self) -> &Path { self } } /// Represents a owned PathBuf in the castore model. /// These are always relative, and platform-independent, which distinguishes /// them from the ones provided in the standard library. #[derive(Clone, Default, Eq, Hash, PartialEq)] pub struct PathBuf { inner: Vec<u8>, } impl Deref for PathBuf { type Target = Path; fn deref(&self) -> &Self::Target { // SAFETY: PathBuf always contains a valid Path. unsafe { Path::from_bytes_unchecked(&self.inner) } } } impl AsRef<Path> for PathBuf { fn as_ref(&self) -> &Path { self } } impl ToOwned for Path { type Owned = PathBuf; fn to_owned(&self) -> Self::Owned { PathBuf { inner: self.inner.to_owned(), } } } impl Borrow<Path> for PathBuf { fn borrow(&self) -> &Path { self } } impl From<Box<Path>> for PathBuf { fn from(value: Box<Path>) -> Self { // SAFETY: Box<Path> is always a valid path. unsafe { PathBuf::from_bytes_unchecked(value.into_boxed_bytes().into_vec()) } } } impl From<&Path> for PathBuf { fn from(value: &Path) -> Self { value.to_owned() } } impl FromStr for PathBuf { type Err = std::io::Error; fn from_str(s: &str) -> Result<PathBuf, Self::Err> { Ok(Path::from_bytes(s.as_bytes()) .ok_or(std::io::ErrorKind::InvalidData)? .to_owned()) } } impl Debug for PathBuf { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { Debug::fmt(&**self, f) } } impl Display for PathBuf { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { Display::fmt(&**self, f) } } impl PathBuf { pub fn new() -> PathBuf { Self::default() } pub fn with_capacity(capacity: usize) -> PathBuf { // SAFETY: The empty path is a valid path. Self { inner: Vec::with_capacity(capacity), } } /// Adjoins `name` to self. pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> { validate_node_name(name).map_err(|_| std::io::ErrorKind::InvalidData)?; if !self.inner.is_empty() { self.inner.push(b'/'); } self.inner.extend_from_slice(name); Ok(()) } /// Convert a byte vector to a PathBuf, without checking validity. unsafe fn from_bytes_unchecked(bytes: Vec<u8>) -> PathBuf { PathBuf { inner: bytes } } pub fn into_boxed_path(self) -> Box<Path> { // SAFETY: Box<[u8]> and Box<Path> have the same representation, // and PathBuf always contains a valid Path. unsafe { mem::transmute(self.inner.into_boxed_slice()) } } pub fn into_bytes(self) -> Vec<u8> { self.inner } } #[cfg(test)] mod test { use super::{Path, PathBuf}; use bstr::ByteSlice; use rstest::rstest; // TODO: add some manual tests including invalid UTF-8 (hard to express // with rstest) #[rstest] #[case::empty("", 0)] #[case("a", 1)] #[case("a/b", 2)] #[case("a/b/c", 3)] // add two slightly more cursed variants. // Technically nothing prevents us from representing this with castore, // but maybe we want to disallow constructing paths like this as it's a // bad idea. #[case::cursed("C:\\a/b", 2)] #[case::cursed("\\tvix-store", 1)] pub fn from_str(#[case] s: &str, #[case] num_components: usize) { let p: PathBuf = s.parse().expect("must parse"); assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch"); assert_eq!( num_components, p.components().count(), "number of components mismatch" ); } #[rstest] #[case::absolute("/a/b")] #[case::two_forward_slashes_start("//a/b")] #[case::two_forward_slashes_middle("a/b//c/d")] #[case::trailing_slash("a/b/")] #[case::dot(".")] #[case::dotdot("..")] #[case::dot_start("./a")] #[case::dotdot_start("../a")] #[case::dot_middle("a/./b")] #[case::dotdot_middle("a/../b")] #[case::dot_end("a/b/.")] #[case::dotdot_end("a/b/..")] #[case::null("fo\0o")] pub fn from_str_fail(#[case] s: &str) { s.parse::<PathBuf>().expect_err("must fail"); } #[rstest] #[case("foo", "")] #[case("foo/bar", "foo")] #[case("foo2/bar2", "foo2")] #[case("foo/bar/baz", "foo/bar")] pub fn parent(#[case] p: PathBuf, #[case] exp_parent: PathBuf) { assert_eq!(Some(&*exp_parent), p.parent()); } #[rstest] pub fn no_parent() { assert!(Path::ROOT.parent().is_none()); } #[rstest] #[case("a", "b", "a/b")] #[case("a", "b", "a/b")] pub fn join_push(#[case] mut p: PathBuf, #[case] name: &str, #[case] exp_p: PathBuf) { assert_eq!(exp_p, p.try_join(name.as_bytes()).expect("join failed")); p.try_push(name.as_bytes()).expect("push failed"); assert_eq!(exp_p, p); } #[rstest] #[case("a", "/")] #[case("a", "")] #[case("a", "b/c")] #[case("", "/")] #[case("", "")] #[case("", "b/c")] #[case("", ".")] #[case("", "..")] pub fn join_push_fail(#[case] mut p: PathBuf, #[case] name: &str) { p.try_join(name.as_bytes()) .expect_err("join succeeded unexpectedly"); p.try_push(name.as_bytes()) .expect_err("push succeeded unexpectedly"); } #[rstest] #[case::empty("", vec![])] #[case("a", vec!["a"])] #[case("a/b", vec!["a", "b"])] #[case("a/b/c", vec!["a","b", "c"])] pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) { assert_eq!( exp_components, p.components() .map(|x| x.to_str().unwrap()) .collect::<Vec<_>>() ); } }