about summary refs log tree commit diff
path: root/tvix/castore/src/path.rs
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-08-16T14·32+0300
committerclbot <clbot@tvl.fyi>2024-08-17T15·59+0000
commit5ec93b57e6a263eef91ee583aba9f04581e4a66b (patch)
tree896407c00900d630a38ee82176ff12e0870f7a20 /tvix/castore/src/path.rs
parent8ea7d2b60eb4052d934820078c31ff25786376a4 (diff)
refactor(tvix/castore): add PathComponent type for checked components r/8506
This encodes a verified component on the type level. Internally, it
contains a bytes::Bytes.

The castore Path/PathBuf component() and file_name() methods now
return this type, the old ones returning bytes were renamed to
component_bytes() and component_file_name() respectively.

We can drop the directory_reject_invalid_name test - it's not possible
anymore to pass an invalid name to Directories::add.
Invalid names in the Directory proto are still being tested to be
rejected in the validate_invalid_names tests.

Change-Id: Ide4d16415dfd50b7e2d7e0c36d42a3bbeeb9b6c5
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12217
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/castore/src/path.rs')
-rw-r--r--tvix/castore/src/path.rs450
1 files changed, 0 insertions, 450 deletions
diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path.rs
deleted file mode 100644
index 8a55e9f5a1d3..000000000000
--- a/tvix/castore/src/path.rs
+++ /dev/null
@@ -1,450 +0,0 @@
-//! Contains data structures to deal with Paths in the tvix-castore model.
-
-use std::{
-    borrow::Borrow,
-    fmt::{self, Debug, Display},
-    mem,
-    ops::Deref,
-    str::FromStr,
-};
-
-use bstr::ByteSlice;
-
-use crate::Directory;
-
-/// Represents a Path in the castore model.
-/// These are always relative, and platform-independent, which distinguishes
-/// them from the ones provided in the standard library.
-#[derive(Eq, Hash, PartialEq)]
-#[repr(transparent)] // SAFETY: Representation has to match [u8]
-pub struct Path {
-    // As node names in the castore model cannot contain slashes,
-    // we use them as component separators here.
-    inner: [u8],
-}
-
-#[allow(dead_code)]
-impl Path {
-    // SAFETY: The empty path is valid.
-    pub const ROOT: &'static Path = unsafe { Path::from_bytes_unchecked(&[]) };
-
-    /// Convert a byte slice to a path, without checking validity.
-    const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Path {
-        // SAFETY: &[u8] and &Path have the same representation.
-        unsafe { mem::transmute(bytes) }
-    }
-
-    fn from_bytes(bytes: &[u8]) -> Option<&Path> {
-        if !bytes.is_empty() {
-            // Ensure all components are valid castore node names.
-            for component in bytes.split_str(b"/") {
-                if !Directory::is_valid_name(component) {
-                    return None;
-                }
-            }
-        }
-
-        // SAFETY: We have verified that the path contains no empty components.
-        Some(unsafe { Path::from_bytes_unchecked(bytes) })
-    }
-
-    pub fn into_boxed_bytes(self: Box<Path>) -> Box<[u8]> {
-        // SAFETY: Box<Path> and Box<[u8]> have the same representation.
-        unsafe { mem::transmute(self) }
-    }
-
-    /// Returns the path without its final component, if there is one.
-    ///
-    /// Note that the parent of a bare file name is [Path::ROOT].
-    /// [Path::ROOT] is the only path without a parent.
-    pub fn parent(&self) -> Option<&Path> {
-        // The root does not have a parent.
-        if self.inner.is_empty() {
-            return None;
-        }
-
-        Some(
-            if let Some((parent, _file_name)) = self.inner.rsplit_once_str(b"/") {
-                // SAFETY: The parent of a valid Path is a valid Path.
-                unsafe { Path::from_bytes_unchecked(parent) }
-            } else {
-                // The parent of a bare file name is the root.
-                Path::ROOT
-            },
-        )
-    }
-
-    /// Creates a PathBuf with `name` adjoined to self.
-    pub fn try_join(&self, name: &[u8]) -> Result<PathBuf, std::io::Error> {
-        let mut v = PathBuf::with_capacity(self.inner.len() + name.len() + 1);
-        v.inner.extend_from_slice(&self.inner);
-        v.try_push(name)?;
-
-        Ok(v)
-    }
-
-    /// Produces an iterator over the components of the path, which are
-    /// individual byte slices.
-    /// In case the path is empty, an empty iterator is returned.
-    pub fn components(&self) -> impl Iterator<Item = &[u8]> {
-        let mut iter = self.inner.split_str(&b"/");
-
-        // We don't want to return an empty element, consume it if it's the only one.
-        if self.inner.is_empty() {
-            let _ = iter.next();
-        }
-
-        iter
-    }
-
-    /// Returns the final component of the Path, if there is one.
-    pub fn file_name(&self) -> Option<&[u8]> {
-        self.components().last()
-    }
-
-    pub fn as_bytes(&self) -> &[u8] {
-        &self.inner
-    }
-}
-
-impl Debug for Path {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        Debug::fmt(self.inner.as_bstr(), f)
-    }
-}
-
-impl Display for Path {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        Display::fmt(self.inner.as_bstr(), f)
-    }
-}
-
-impl AsRef<Path> for Path {
-    fn as_ref(&self) -> &Path {
-        self
-    }
-}
-
-/// Represents a owned PathBuf in the castore model.
-/// These are always relative, and platform-independent, which distinguishes
-/// them from the ones provided in the standard library.
-#[derive(Clone, Default, Eq, Hash, PartialEq)]
-pub struct PathBuf {
-    inner: Vec<u8>,
-}
-
-impl Deref for PathBuf {
-    type Target = Path;
-
-    fn deref(&self) -> &Self::Target {
-        // SAFETY: PathBuf always contains a valid Path.
-        unsafe { Path::from_bytes_unchecked(&self.inner) }
-    }
-}
-
-impl AsRef<Path> for PathBuf {
-    fn as_ref(&self) -> &Path {
-        self
-    }
-}
-
-impl ToOwned for Path {
-    type Owned = PathBuf;
-
-    fn to_owned(&self) -> Self::Owned {
-        PathBuf {
-            inner: self.inner.to_owned(),
-        }
-    }
-}
-
-impl Borrow<Path> for PathBuf {
-    fn borrow(&self) -> &Path {
-        self
-    }
-}
-
-impl From<Box<Path>> for PathBuf {
-    fn from(value: Box<Path>) -> Self {
-        // SAFETY: Box<Path> is always a valid path.
-        unsafe { PathBuf::from_bytes_unchecked(value.into_boxed_bytes().into_vec()) }
-    }
-}
-
-impl From<&Path> for PathBuf {
-    fn from(value: &Path) -> Self {
-        value.to_owned()
-    }
-}
-
-impl FromStr for PathBuf {
-    type Err = std::io::Error;
-
-    fn from_str(s: &str) -> Result<PathBuf, Self::Err> {
-        Ok(Path::from_bytes(s.as_bytes())
-            .ok_or(std::io::ErrorKind::InvalidData)?
-            .to_owned())
-    }
-}
-
-impl Debug for PathBuf {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        Debug::fmt(&**self, f)
-    }
-}
-
-impl Display for PathBuf {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        Display::fmt(&**self, f)
-    }
-}
-
-impl PathBuf {
-    pub fn new() -> PathBuf {
-        Self::default()
-    }
-
-    pub fn with_capacity(capacity: usize) -> PathBuf {
-        // SAFETY: The empty path is a valid path.
-        Self {
-            inner: Vec::with_capacity(capacity),
-        }
-    }
-
-    /// Adjoins `name` to self.
-    pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> {
-        if !Directory::is_valid_name(name) {
-            return Err(std::io::ErrorKind::InvalidData.into());
-        }
-
-        if !self.inner.is_empty() {
-            self.inner.push(b'/');
-        }
-
-        self.inner.extend_from_slice(name);
-
-        Ok(())
-    }
-
-    /// Convert a byte vector to a PathBuf, without checking validity.
-    unsafe fn from_bytes_unchecked(bytes: Vec<u8>) -> PathBuf {
-        PathBuf { inner: bytes }
-    }
-
-    /// Convert from a [&std::path::Path] to [Self].
-    ///
-    /// - Self uses `/` as path separator.
-    /// - Absolute paths are always rejected, are are these with custom prefixes.
-    /// - Repeated separators are deduplicated.
-    /// - Occurrences of `.` are normalized away.
-    /// - A trailing slash is normalized away.
-    ///
-    /// A `canonicalize_dotdot` boolean controls whether `..` will get
-    /// canonicalized if possible, or should return an error.
-    ///
-    /// For more exotic paths, this conversion might produce different results
-    /// on different platforms, due to different underlying byte
-    /// representations, which is why it's restricted to unix for now.
-    #[cfg(unix)]
-    pub fn from_host_path(
-        host_path: &std::path::Path,
-        canonicalize_dotdot: bool,
-    ) -> Result<Self, std::io::Error> {
-        let mut p = PathBuf::with_capacity(host_path.as_os_str().len());
-
-        for component in host_path.components() {
-            match component {
-                std::path::Component::Prefix(_) | std::path::Component::RootDir => {
-                    return Err(std::io::Error::new(
-                        std::io::ErrorKind::InvalidData,
-                        "found disallowed prefix or rootdir",
-                    ))
-                }
-                std::path::Component::CurDir => continue, // ignore
-                std::path::Component::ParentDir => {
-                    if canonicalize_dotdot {
-                        // Try popping the last element from the path being constructed.
-                        // FUTUREWORK: pop method?
-                        p = p
-                            .parent()
-                            .ok_or_else(|| {
-                                std::io::Error::new(
-                                    std::io::ErrorKind::InvalidData,
-                                    "found .. going too far up",
-                                )
-                            })?
-                            .to_owned();
-                    } else {
-                        return Err(std::io::Error::new(
-                            std::io::ErrorKind::InvalidData,
-                            "found disallowed ..",
-                        ));
-                    }
-                }
-                std::path::Component::Normal(s) => {
-                    // append the new component to the path being constructed.
-                    p.try_push(s.as_encoded_bytes()).map_err(|_| {
-                        std::io::Error::new(
-                            std::io::ErrorKind::InvalidData,
-                            "encountered invalid node in sub_path component",
-                        )
-                    })?
-                }
-            }
-        }
-
-        Ok(p)
-    }
-
-    pub fn into_boxed_path(self) -> Box<Path> {
-        // SAFETY: Box<[u8]> and Box<Path> have the same representation,
-        // and PathBuf always contains a valid Path.
-        unsafe { mem::transmute(self.inner.into_boxed_slice()) }
-    }
-
-    pub fn into_bytes(self) -> Vec<u8> {
-        self.inner
-    }
-}
-
-#[cfg(test)]
-mod test {
-    use super::{Path, PathBuf};
-    use bstr::ByteSlice;
-    use rstest::rstest;
-
-    // TODO: add some manual tests including invalid UTF-8 (hard to express
-    // with rstest)
-
-    #[rstest]
-    #[case::empty("", 0)]
-    #[case("a", 1)]
-    #[case("a/b", 2)]
-    #[case("a/b/c", 3)]
-    // add two slightly more cursed variants.
-    // Technically nothing prevents us from representing this with castore,
-    // but maybe we want to disallow constructing paths like this as it's a
-    // bad idea.
-    #[case::cursed("C:\\a/b", 2)]
-    #[case::cursed("\\\\tvix-store", 1)]
-    pub fn from_str(#[case] s: &str, #[case] num_components: usize) {
-        let p: PathBuf = s.parse().expect("must parse");
-
-        assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch");
-        assert_eq!(
-            num_components,
-            p.components().count(),
-            "number of components mismatch"
-        );
-    }
-
-    #[rstest]
-    #[case::absolute("/a/b")]
-    #[case::two_forward_slashes_start("//a/b")]
-    #[case::two_forward_slashes_middle("a/b//c/d")]
-    #[case::trailing_slash("a/b/")]
-    #[case::dot(".")]
-    #[case::dotdot("..")]
-    #[case::dot_start("./a")]
-    #[case::dotdot_start("../a")]
-    #[case::dot_middle("a/./b")]
-    #[case::dotdot_middle("a/../b")]
-    #[case::dot_end("a/b/.")]
-    #[case::dotdot_end("a/b/..")]
-    #[case::null("fo\0o")]
-    pub fn from_str_fail(#[case] s: &str) {
-        s.parse::<PathBuf>().expect_err("must fail");
-    }
-
-    #[rstest]
-    #[case("foo", "")]
-    #[case("foo/bar", "foo")]
-    #[case("foo2/bar2", "foo2")]
-    #[case("foo/bar/baz", "foo/bar")]
-    pub fn parent(#[case] p: PathBuf, #[case] exp_parent: PathBuf) {
-        assert_eq!(Some(&*exp_parent), p.parent());
-    }
-
-    #[rstest]
-    pub fn no_parent() {
-        assert!(Path::ROOT.parent().is_none());
-    }
-
-    #[rstest]
-    #[case("a", "b", "a/b")]
-    #[case("a", "b", "a/b")]
-    pub fn join_push(#[case] mut p: PathBuf, #[case] name: &str, #[case] exp_p: PathBuf) {
-        assert_eq!(exp_p, p.try_join(name.as_bytes()).expect("join failed"));
-        p.try_push(name.as_bytes()).expect("push failed");
-        assert_eq!(exp_p, p);
-    }
-
-    #[rstest]
-    #[case("a", "/")]
-    #[case("a", "")]
-    #[case("a", "b/c")]
-    #[case("", "/")]
-    #[case("", "")]
-    #[case("", "b/c")]
-    #[case("", ".")]
-    #[case("", "..")]
-    pub fn join_push_fail(#[case] mut p: PathBuf, #[case] name: &str) {
-        p.try_join(name.as_bytes())
-            .expect_err("join succeeded unexpectedly");
-        p.try_push(name.as_bytes())
-            .expect_err("push succeeded unexpectedly");
-    }
-
-    #[rstest]
-    #[case::empty("", vec![])]
-    #[case("a", vec!["a"])]
-    #[case("a/b", vec!["a", "b"])]
-    #[case("a/b/c", vec!["a","b", "c"])]
-    pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) {
-        assert_eq!(
-            exp_components,
-            p.components()
-                .map(|x| x.to_str().unwrap())
-                .collect::<Vec<_>>()
-        );
-    }
-
-    #[rstest]
-    #[case::empty("", "", false)]
-    #[case::path("a", "a", false)]
-    #[case::path2("a/b", "a/b", false)]
-    #[case::double_slash_middle("a//b", "a/b", false)]
-    #[case::dot(".", "", false)]
-    #[case::dot_start("./a/b", "a/b", false)]
-    #[case::dot_middle("a/./b", "a/b", false)]
-    #[case::dot_end("a/b/.", "a/b", false)]
-    #[case::trailing_slash("a/b/", "a/b", false)]
-    #[case::dotdot_canonicalize("a/..", "", true)]
-    #[case::dotdot_canonicalize2("a/../b", "b", true)]
-    #[cfg_attr(unix, case::faux_prefix("\\\\nix-store", "\\\\nix-store", false))]
-    #[cfg_attr(unix, case::faux_letter("C:\\foo.txt", "C:\\foo.txt", false))]
-    pub fn from_host_path(
-        #[case] host_path: std::path::PathBuf,
-        #[case] exp_path: PathBuf,
-        #[case] canonicalize_dotdot: bool,
-    ) {
-        let p = PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect("must succeed");
-
-        assert_eq!(exp_path, p);
-    }
-
-    #[rstest]
-    #[case::absolute("/", false)]
-    #[case::dotdot_root("..", false)]
-    #[case::dotdot_root_canonicalize("..", true)]
-    #[case::dotdot_root_no_canonicalize("a/..", false)]
-    #[case::invalid_name("foo/bar\0", false)]
-    // #[cfg_attr(windows, case::prefix("\\\\nix-store", false))]
-    // #[cfg_attr(windows, case::letter("C:\\foo.txt", false))]
-    pub fn from_host_path_fail(
-        #[case] host_path: std::path::PathBuf,
-        #[case] canonicalize_dotdot: bool,
-    ) {
-        PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect_err("must fail");
-    }
-}