about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-05-01T09·53+0300
committerflokli <flokli@flokli.de>2024-05-01T11·01+0000
commit959c50c4ac7df882c22306610fcb8b26cdf0fcf5 (patch)
treebfebe9d46466bb4639b68e33e33a196eca55d658
parent360d80f62e55917bf9cd097251e4fb17f176e559 (diff)
feat(tvix/castore/path): add Path{Buf} r/8050
This contains Path and PathBuf, representing platform-independent paths
representable by the castore model.

These are always relative, and platform-independent, which distinguishes
them from the ones provided in the standard library.

A subsequent CL will move IngestionEntry (and more) to use them.

Change-Id: Ib85857f4159ebc2f3c00192c95d4e5b54ffd4fcf
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11558
Tested-by: BuildkiteCI
Reviewed-by: edef <edef@edef.eu>
-rw-r--r--tvix/castore/src/lib.rs3
-rw-r--r--tvix/castore/src/path.rs245
2 files changed, 248 insertions, 0 deletions
diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs
index 1a7ac6b4b415..6f5e683b4047 100644
--- a/tvix/castore/src/lib.rs
+++ b/tvix/castore/src/lib.rs
@@ -9,6 +9,9 @@ pub mod fixtures;
 #[cfg(feature = "fs")]
 pub mod fs;
 
+mod path;
+pub use path::Path;
+
 pub mod import;
 pub mod proto;
 pub mod tonic;
diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path.rs
new file mode 100644
index 000000000000..42b69fee54df
--- /dev/null
+++ b/tvix/castore/src/path.rs
@@ -0,0 +1,245 @@
+//! Contains data structures to deal with Paths in the tvix-castore model.
+
+use std::str::FromStr;
+
+use bstr::ByteSlice;
+
+/// Represents a Path in the castore model.
+/// These are always relative, and platform-independent, which distinguishes
+/// them from the ones provided in the standard library.
+#[derive(Clone, Debug, Eq, Hash, PartialEq)]
+pub struct Path<'a> {
+    // As node names in the castore model cannot contain slashes,
+    // we use them as component separators here.
+    inner: &'a [u8],
+}
+
+#[allow(dead_code)]
+impl Path<'_> {
+    pub fn parent(&self) -> Option<Path<'_>> {
+        let (parent, _file_name) = self.inner.rsplit_once_str(b"/")?;
+        Some(Self { inner: parent })
+    }
+
+    pub fn join(&self, name: &[u8]) -> Result<PathBuf, std::io::Error> {
+        if name.contains(&b'/') || name.is_empty() {
+            return Err(std::io::ErrorKind::InvalidData.into());
+        }
+
+        let mut v = self.inner.to_vec();
+        if !v.is_empty() {
+            v.extend_from_slice(b"/");
+        }
+        v.extend_from_slice(name);
+
+        Ok(PathBuf { inner: v })
+    }
+
+    /// Produces an iterator over the components of the path, which are
+    /// individual byte slices.
+    /// In case the path is empty, an empty iterator is returned.
+    pub fn components(&self) -> impl Iterator<Item = &[u8]> {
+        let mut iter = self.inner.split_str(&b"/");
+
+        // We don't want to return an empty element, consume it if it's the only one.
+        if self.inner.is_empty() {
+            let _ = iter.next();
+        }
+
+        iter
+    }
+
+    /// Returns the final component of the Path, if there is one.
+    pub fn file_name(&self) -> Option<&[u8]> {
+        self.components().last()
+    }
+
+    pub fn as_slice(&self) -> &[u8] {
+        self.inner
+    }
+}
+
+/// Represents a owned PathBuf in the castore model.
+/// These are always relative, and platform-independent, which distinguishes
+/// them from the ones provided in the standard library.
+#[derive(Clone, Debug, Default, Eq, Hash, PartialEq)]
+pub struct PathBuf {
+    inner: Vec<u8>,
+}
+
+#[allow(dead_code)]
+impl PathBuf {
+    pub fn as_ref(&self) -> Path<'_> {
+        Path { inner: &self.inner }
+    }
+
+    pub fn parent(&self) -> Option<Path<'_>> {
+        let (parent, _file_name) = self.inner.rsplit_once_str(b"/")?;
+        Some(Path { inner: parent })
+    }
+
+    pub fn join(&self, name: &[u8]) -> Result<Self, std::io::Error> {
+        self.as_ref().join(name)
+    }
+
+    /// Produces an iterator over the components of the path, which are
+    /// individual byte slices.
+    pub fn components(&self) -> impl Iterator<Item = &[u8]> {
+        // TODO(edef): get rid of the duplication
+        let mut iter = self.inner.split_str(&b"/");
+
+        // We don't want to return an empty element, consume it if it's the only one.
+        if self.inner.is_empty() {
+            let _ = iter.next();
+        }
+
+        iter
+    }
+
+    /// Returns the final component of the Path, if there is one.
+    pub fn file_name(&self) -> Option<&[u8]> {
+        self.components().last()
+    }
+
+    pub fn as_slice(&self) -> &[u8] {
+        // TODO(edef): get rid of the duplication
+        self.inner.as_slice()
+    }
+}
+
+impl FromStr for PathBuf {
+    type Err = std::io::Error;
+
+    fn from_str(s: &str) -> Result<PathBuf, Self::Err> {
+        // Ensure there's no empty components (aka, double forward slashes),
+        // and all components individually validate.
+        let p = Path {
+            inner: s.as_bytes(),
+        };
+
+        for component in p.components() {
+            if component.is_empty() {
+                return Err(std::io::ErrorKind::InvalidData.into());
+            }
+        }
+
+        Ok(PathBuf {
+            inner: s.to_string().into(),
+        })
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use super::PathBuf;
+    use bstr::ByteSlice;
+    use rstest::rstest;
+
+    // TODO: add some manual tests including invalid UTF-8 (hard to express
+    // with rstest)
+
+    #[rstest]
+    #[case::empty("", 0)]
+    #[case("a", 1)]
+    #[case("a/b", 2)]
+    #[case("a/b/c", 3)]
+    // add two slightly more cursed variants.
+    // Technically nothing prevents us from representing this with castore,
+    // but maybe we want to disallow constructing paths like this as it's a
+    // bad idea.
+    #[case::cursed("C:\\a/b", 2)]
+    #[case::cursed("\\tvix-store", 1)]
+    pub fn from_str(#[case] s: &str, #[case] num_components: usize) {
+        let p: PathBuf = s.parse().expect("must parse");
+
+        assert_eq!(s.as_bytes(), p.as_slice(), "inner bytes mismatch");
+        assert_eq!(
+            num_components,
+            p.components().count(),
+            "number of components mismatch"
+        );
+    }
+
+    #[rstest]
+    #[case::absolute("/a/b")]
+    #[case::two_forward_slashes_start("//a/b")]
+    #[case::two_forward_slashes_middle("a/b//c/d")]
+    #[case::trailing_slash("a/b/")]
+    pub fn from_str_fail(#[case] s: &str) {
+        s.parse::<PathBuf>().expect_err("must fail");
+    }
+
+    #[rstest]
+    #[case("foo/bar", "foo")]
+    #[case("foo2/bar2", "foo2")]
+    #[case("foo/bar/baz", "foo/bar")]
+    pub fn parent(#[case] p: PathBuf, #[case] exp_parent: PathBuf) {
+        assert_eq!(Some(exp_parent.as_ref()), p.parent());
+
+        // same for Path
+        let p = p.as_ref();
+        assert_eq!(Some(exp_parent.as_ref()), p.parent());
+    }
+
+    #[rstest]
+    #[case::empty("")]
+    #[case::single("foo")]
+    pub fn no_parent(#[case] p: PathBuf) {
+        assert!(p.parent().is_none());
+
+        // same for Path
+        assert!(p.as_ref().parent().is_none());
+    }
+
+    #[rstest]
+    #[case("a", "b", "a/b")]
+    #[case("a", "b", "a/b")]
+    pub fn join(#[case] p: PathBuf, #[case] name: &str, #[case] exp_p: PathBuf) {
+        assert_eq!(exp_p, p.join(name.as_bytes()).expect("join failed"));
+        // same for Path
+        assert_eq!(
+            exp_p,
+            p.as_ref().join(name.as_bytes()).expect("join failed")
+        );
+    }
+
+    #[rstest]
+    #[case("a", "/")]
+    #[case("a", "")]
+    #[case("a", "b/c")]
+    #[case("", "/")]
+    #[case("", "")]
+    #[case("", "b/c")]
+    pub fn join_fail(#[case] p: PathBuf, #[case] name: &str) {
+        p.join(name.as_bytes())
+            .expect_err("join succeeded unexpectedly");
+
+        // same for Path
+        p.as_ref()
+            .join(name.as_bytes())
+            .expect_err("join succeeded unexpectedly");
+    }
+
+    #[rstest]
+    #[case::empty("", vec![])]
+    #[case("a", vec!["a"])]
+    #[case("a/b", vec!["a", "b"])]
+    #[case("a/b/c", vec!["a","b", "c"])]
+    pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) {
+        assert_eq!(
+            exp_components,
+            p.components()
+                .map(|x| x.to_str().unwrap())
+                .collect::<Vec<_>>()
+        );
+
+        // same for Path
+        let p = p.as_ref();
+        assert_eq!(
+            exp_components,
+            p.components()
+                .map(|x| x.to_str().unwrap())
+                .collect::<Vec<_>>()
+        );
+    }
+}