From 959c50c4ac7df882c22306610fcb8b26cdf0fcf5 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Wed, 1 May 2024 12:53:09 +0300 Subject: feat(tvix/castore/path): add Path{Buf} This contains Path and PathBuf, representing platform-independent paths representable by the castore model. These are always relative, and platform-independent, which distinguishes them from the ones provided in the standard library. A subsequent CL will move IngestionEntry (and more) to use them. Change-Id: Ib85857f4159ebc2f3c00192c95d4e5b54ffd4fcf Reviewed-on: https://cl.tvl.fyi/c/depot/+/11558 Tested-by: BuildkiteCI Reviewed-by: edef --- tvix/castore/src/lib.rs | 3 + tvix/castore/src/path.rs | 245 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 248 insertions(+) create mode 100644 tvix/castore/src/path.rs (limited to 'tvix/castore/src') diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs index 1a7ac6b4b415..6f5e683b4047 100644 --- a/tvix/castore/src/lib.rs +++ b/tvix/castore/src/lib.rs @@ -9,6 +9,9 @@ pub mod fixtures; #[cfg(feature = "fs")] pub mod fs; +mod path; +pub use path::Path; + pub mod import; pub mod proto; pub mod tonic; diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path.rs new file mode 100644 index 000000000000..42b69fee54df --- /dev/null +++ b/tvix/castore/src/path.rs @@ -0,0 +1,245 @@ +//! Contains data structures to deal with Paths in the tvix-castore model. + +use std::str::FromStr; + +use bstr::ByteSlice; + +/// Represents a Path in the castore model. +/// These are always relative, and platform-independent, which distinguishes +/// them from the ones provided in the standard library. +#[derive(Clone, Debug, Eq, Hash, PartialEq)] +pub struct Path<'a> { + // As node names in the castore model cannot contain slashes, + // we use them as component separators here. + inner: &'a [u8], +} + +#[allow(dead_code)] +impl Path<'_> { + pub fn parent(&self) -> Option> { + let (parent, _file_name) = self.inner.rsplit_once_str(b"/")?; + Some(Self { inner: parent }) + } + + pub fn join(&self, name: &[u8]) -> Result { + if name.contains(&b'/') || name.is_empty() { + return Err(std::io::ErrorKind::InvalidData.into()); + } + + let mut v = self.inner.to_vec(); + if !v.is_empty() { + v.extend_from_slice(b"/"); + } + v.extend_from_slice(name); + + Ok(PathBuf { inner: v }) + } + + /// Produces an iterator over the components of the path, which are + /// individual byte slices. + /// In case the path is empty, an empty iterator is returned. + pub fn components(&self) -> impl Iterator { + let mut iter = self.inner.split_str(&b"/"); + + // We don't want to return an empty element, consume it if it's the only one. + if self.inner.is_empty() { + let _ = iter.next(); + } + + iter + } + + /// Returns the final component of the Path, if there is one. + pub fn file_name(&self) -> Option<&[u8]> { + self.components().last() + } + + pub fn as_slice(&self) -> &[u8] { + self.inner + } +} + +/// Represents a owned PathBuf in the castore model. +/// These are always relative, and platform-independent, which distinguishes +/// them from the ones provided in the standard library. +#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)] +pub struct PathBuf { + inner: Vec, +} + +#[allow(dead_code)] +impl PathBuf { + pub fn as_ref(&self) -> Path<'_> { + Path { inner: &self.inner } + } + + pub fn parent(&self) -> Option> { + let (parent, _file_name) = self.inner.rsplit_once_str(b"/")?; + Some(Path { inner: parent }) + } + + pub fn join(&self, name: &[u8]) -> Result { + self.as_ref().join(name) + } + + /// Produces an iterator over the components of the path, which are + /// individual byte slices. + pub fn components(&self) -> impl Iterator { + // TODO(edef): get rid of the duplication + let mut iter = self.inner.split_str(&b"/"); + + // We don't want to return an empty element, consume it if it's the only one. + if self.inner.is_empty() { + let _ = iter.next(); + } + + iter + } + + /// Returns the final component of the Path, if there is one. + pub fn file_name(&self) -> Option<&[u8]> { + self.components().last() + } + + pub fn as_slice(&self) -> &[u8] { + // TODO(edef): get rid of the duplication + self.inner.as_slice() + } +} + +impl FromStr for PathBuf { + type Err = std::io::Error; + + fn from_str(s: &str) -> Result { + // Ensure there's no empty components (aka, double forward slashes), + // and all components individually validate. + let p = Path { + inner: s.as_bytes(), + }; + + for component in p.components() { + if component.is_empty() { + return Err(std::io::ErrorKind::InvalidData.into()); + } + } + + Ok(PathBuf { + inner: s.to_string().into(), + }) + } +} + +#[cfg(test)] +mod test { + use super::PathBuf; + use bstr::ByteSlice; + use rstest::rstest; + + // TODO: add some manual tests including invalid UTF-8 (hard to express + // with rstest) + + #[rstest] + #[case::empty("", 0)] + #[case("a", 1)] + #[case("a/b", 2)] + #[case("a/b/c", 3)] + // add two slightly more cursed variants. + // Technically nothing prevents us from representing this with castore, + // but maybe we want to disallow constructing paths like this as it's a + // bad idea. + #[case::cursed("C:\\a/b", 2)] + #[case::cursed("\\tvix-store", 1)] + pub fn from_str(#[case] s: &str, #[case] num_components: usize) { + let p: PathBuf = s.parse().expect("must parse"); + + assert_eq!(s.as_bytes(), p.as_slice(), "inner bytes mismatch"); + assert_eq!( + num_components, + p.components().count(), + "number of components mismatch" + ); + } + + #[rstest] + #[case::absolute("/a/b")] + #[case::two_forward_slashes_start("//a/b")] + #[case::two_forward_slashes_middle("a/b//c/d")] + #[case::trailing_slash("a/b/")] + pub fn from_str_fail(#[case] s: &str) { + s.parse::().expect_err("must fail"); + } + + #[rstest] + #[case("foo/bar", "foo")] + #[case("foo2/bar2", "foo2")] + #[case("foo/bar/baz", "foo/bar")] + pub fn parent(#[case] p: PathBuf, #[case] exp_parent: PathBuf) { + assert_eq!(Some(exp_parent.as_ref()), p.parent()); + + // same for Path + let p = p.as_ref(); + assert_eq!(Some(exp_parent.as_ref()), p.parent()); + } + + #[rstest] + #[case::empty("")] + #[case::single("foo")] + pub fn no_parent(#[case] p: PathBuf) { + assert!(p.parent().is_none()); + + // same for Path + assert!(p.as_ref().parent().is_none()); + } + + #[rstest] + #[case("a", "b", "a/b")] + #[case("a", "b", "a/b")] + pub fn join(#[case] p: PathBuf, #[case] name: &str, #[case] exp_p: PathBuf) { + assert_eq!(exp_p, p.join(name.as_bytes()).expect("join failed")); + // same for Path + assert_eq!( + exp_p, + p.as_ref().join(name.as_bytes()).expect("join failed") + ); + } + + #[rstest] + #[case("a", "/")] + #[case("a", "")] + #[case("a", "b/c")] + #[case("", "/")] + #[case("", "")] + #[case("", "b/c")] + pub fn join_fail(#[case] p: PathBuf, #[case] name: &str) { + p.join(name.as_bytes()) + .expect_err("join succeeded unexpectedly"); + + // same for Path + p.as_ref() + .join(name.as_bytes()) + .expect_err("join succeeded unexpectedly"); + } + + #[rstest] + #[case::empty("", vec![])] + #[case("a", vec!["a"])] + #[case("a/b", vec!["a", "b"])] + #[case("a/b/c", vec!["a","b", "c"])] + pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) { + assert_eq!( + exp_components, + p.components() + .map(|x| x.to_str().unwrap()) + .collect::>() + ); + + // same for Path + let p = p.as_ref(); + assert_eq!( + exp_components, + p.components() + .map(|x| x.to_str().unwrap()) + .collect::>() + ); + } +} -- cgit 1.4.1