about summary refs log tree commit diff
path: root/tvix/castore/src/path.rs
//! Contains data structures to deal with Paths in the tvix-castore model.

use std::{
    borrow::Borrow,
    fmt::{self, Debug, Display},
    mem,
    ops::Deref,
    str::FromStr,
};

use bstr::ByteSlice;

use crate::proto::validate_node_name;

/// Represents a Path in the castore model.
/// These are always relative, and platform-independent, which distinguishes
/// them from the ones provided in the standard library.
#[derive(Eq, Hash, PartialEq)]
#[repr(transparent)] // SAFETY: Representation has to match [u8]
pub struct Path {
    // As node names in the castore model cannot contain slashes,
    // we use them as component separators here.
    inner: [u8],
}

#[allow(dead_code)]
impl Path {
    // SAFETY: The empty path is valid.
    pub const ROOT: &'static Path = unsafe { Path::from_bytes_unchecked(&[]) };

    /// Convert a byte slice to a path, without checking validity.
    const unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Path {
        // SAFETY: &[u8] and &Path have the same representation.
        unsafe { mem::transmute(bytes) }
    }

    fn from_bytes(bytes: &[u8]) -> Option<&Path> {
        if !bytes.is_empty() {
            // Ensure all components are valid castore node names.
            for component in bytes.split_str(b"/") {
                validate_node_name(component).ok()?;
            }
        }

        // SAFETY: We have verified that the path contains no empty components.
        Some(unsafe { Path::from_bytes_unchecked(bytes) })
    }

    pub fn into_boxed_bytes(self: Box<Path>) -> Box<[u8]> {
        // SAFETY: Box<Path> and Box<[u8]> have the same representation.
        unsafe { mem::transmute(self) }
    }

    /// Returns the path without its final component, if there is one.
    ///
    /// Note that the parent of a bare file name is [Path::ROOT].
    /// [Path::ROOT] is the only path without a parent.
    pub fn parent(&self) -> Option<&Path> {
        // The root does not have a parent.
        if self.inner.is_empty() {
            return None;
        }

        Some(
            if let Some((parent, _file_name)) = self.inner.rsplit_once_str(b"/") {
                // SAFETY: The parent of a valid Path is a valid Path.
                unsafe { Path::from_bytes_unchecked(parent) }
            } else {
                // The parent of a bare file name is the root.
                Path::ROOT
            },
        )
    }

    /// Creates a PathBuf with `name` adjoined to self.
    pub fn try_join(&self, name: &[u8]) -> Result<PathBuf, std::io::Error> {
        let mut v = PathBuf::with_capacity(self.inner.len() + name.len() + 1);
        v.inner.extend_from_slice(&self.inner);
        v.try_push(name)?;

        Ok(v)
    }

    /// Produces an iterator over the components of the path, which are
    /// individual byte slices.
    /// In case the path is empty, an empty iterator is returned.
    pub fn components(&self) -> impl Iterator<Item = &[u8]> {
        let mut iter = self.inner.split_str(&b"/");

        // We don't want to return an empty element, consume it if it's the only one.
        if self.inner.is_empty() {
            let _ = iter.next();
        }

        iter
    }

    /// Returns the final component of the Path, if there is one.
    pub fn file_name(&self) -> Option<&[u8]> {
        self.components().last()
    }

    pub fn as_bytes(&self) -> &[u8] {
        &self.inner
    }
}

impl Debug for Path {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        Debug::fmt(self.inner.as_bstr(), f)
    }
}

impl Display for Path {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        Display::fmt(self.inner.as_bstr(), f)
    }
}

impl AsRef<Path> for Path {
    fn as_ref(&self) -> &Path {
        self
    }
}

/// Represents a owned PathBuf in the castore model.
/// These are always relative, and platform-independent, which distinguishes
/// them from the ones provided in the standard library.
#[derive(Clone, Default, Eq, Hash, PartialEq)]
pub struct PathBuf {
    inner: Vec<u8>,
}

impl Deref for PathBuf {
    type Target = Path;

    fn deref(&self) -> &Self::Target {
        // SAFETY: PathBuf always contains a valid Path.
        unsafe { Path::from_bytes_unchecked(&self.inner) }
    }
}

impl AsRef<Path> for PathBuf {
    fn as_ref(&self) -> &Path {
        self
    }
}

impl ToOwned for Path {
    type Owned = PathBuf;

    fn to_owned(&self) -> Self::Owned {
        PathBuf {
            inner: self.inner.to_owned(),
        }
    }
}

impl Borrow<Path> for PathBuf {
    fn borrow(&self) -> &Path {
        self
    }
}

impl From<Box<Path>> for PathBuf {
    fn from(value: Box<Path>) -> Self {
        // SAFETY: Box<Path> is always a valid path.
        unsafe { PathBuf::from_bytes_unchecked(value.into_boxed_bytes().into_vec()) }
    }
}

impl From<&Path> for PathBuf {
    fn from(value: &Path) -> Self {
        value.to_owned()
    }
}

impl FromStr for PathBuf {
    type Err = std::io::Error;

    fn from_str(s: &str) -> Result<PathBuf, Self::Err> {
        Ok(Path::from_bytes(s.as_bytes())
            .ok_or(std::io::ErrorKind::InvalidData)?
            .to_owned())
    }
}

impl Debug for PathBuf {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        Debug::fmt(&**self, f)
    }
}

impl Display for PathBuf {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        Display::fmt(&**self, f)
    }
}

impl PathBuf {
    pub fn new() -> PathBuf {
        Self::default()
    }

    pub fn with_capacity(capacity: usize) -> PathBuf {
        // SAFETY: The empty path is a valid path.
        Self {
            inner: Vec::with_capacity(capacity),
        }
    }

    /// Adjoins `name` to self.
    pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> {
        validate_node_name(name).map_err(|_| std::io::ErrorKind::InvalidData)?;

        if !self.inner.is_empty() {
            self.inner.push(b'/');
        }

        self.inner.extend_from_slice(name);

        Ok(())
    }

    /// Convert a byte vector to a PathBuf, without checking validity.
    unsafe fn from_bytes_unchecked(bytes: Vec<u8>) -> PathBuf {
        PathBuf { inner: bytes }
    }

    pub fn into_boxed_path(self) -> Box<Path> {
        // SAFETY: Box<[u8]> and Box<Path> have the same representation,
        // and PathBuf always contains a valid Path.
        unsafe { mem::transmute(self.inner.into_boxed_slice()) }
    }

    pub fn into_bytes(self) -> Vec<u8> {
        self.inner
    }
}

#[cfg(test)]
mod test {
    use super::{Path, PathBuf};
    use bstr::ByteSlice;
    use rstest::rstest;

    // TODO: add some manual tests including invalid UTF-8 (hard to express
    // with rstest)

    #[rstest]
    #[case::empty("", 0)]
    #[case("a", 1)]
    #[case("a/b", 2)]
    #[case("a/b/c", 3)]
    // add two slightly more cursed variants.
    // Technically nothing prevents us from representing this with castore,
    // but maybe we want to disallow constructing paths like this as it's a
    // bad idea.
    #[case::cursed("C:\\a/b", 2)]
    #[case::cursed("\\tvix-store", 1)]
    pub fn from_str(#[case] s: &str, #[case] num_components: usize) {
        let p: PathBuf = s.parse().expect("must parse");

        assert_eq!(s.as_bytes(), p.as_bytes(), "inner bytes mismatch");
        assert_eq!(
            num_components,
            p.components().count(),
            "number of components mismatch"
        );
    }

    #[rstest]
    #[case::absolute("/a/b")]
    #[case::two_forward_slashes_start("//a/b")]
    #[case::two_forward_slashes_middle("a/b//c/d")]
    #[case::trailing_slash("a/b/")]
    #[case::dot(".")]
    #[case::dotdot("..")]
    #[case::dot_start("./a")]
    #[case::dotdot_start("../a")]
    #[case::dot_middle("a/./b")]
    #[case::dotdot_middle("a/../b")]
    #[case::dot_end("a/b/.")]
    #[case::dotdot_end("a/b/..")]
    #[case::null("fo\0o")]
    pub fn from_str_fail(#[case] s: &str) {
        s.parse::<PathBuf>().expect_err("must fail");
    }

    #[rstest]
    #[case("foo", "")]
    #[case("foo/bar", "foo")]
    #[case("foo2/bar2", "foo2")]
    #[case("foo/bar/baz", "foo/bar")]
    pub fn parent(#[case] p: PathBuf, #[case] exp_parent: PathBuf) {
        assert_eq!(Some(&*exp_parent), p.parent());
    }

    #[rstest]
    pub fn no_parent() {
        assert!(Path::ROOT.parent().is_none());
    }

    #[rstest]
    #[case("a", "b", "a/b")]
    #[case("a", "b", "a/b")]
    pub fn join_push(#[case] mut p: PathBuf, #[case] name: &str, #[case] exp_p: PathBuf) {
        assert_eq!(exp_p, p.try_join(name.as_bytes()).expect("join failed"));
        p.try_push(name.as_bytes()).expect("push failed");
        assert_eq!(exp_p, p);
    }

    #[rstest]
    #[case("a", "/")]
    #[case("a", "")]
    #[case("a", "b/c")]
    #[case("", "/")]
    #[case("", "")]
    #[case("", "b/c")]
    #[case("", ".")]
    #[case("", "..")]
    pub fn join_push_fail(#[case] mut p: PathBuf, #[case] name: &str) {
        p.try_join(name.as_bytes())
            .expect_err("join succeeded unexpectedly");
        p.try_push(name.as_bytes())
            .expect_err("push succeeded unexpectedly");
    }

    #[rstest]
    #[case::empty("", vec![])]
    #[case("a", vec!["a"])]
    #[case("a/b", vec!["a", "b"])]
    #[case("a/b/c", vec!["a","b", "c"])]
    pub fn components(#[case] p: PathBuf, #[case] exp_components: Vec<&str>) {
        assert_eq!(
            exp_components,
            p.components()
                .map(|x| x.to_str().unwrap())
                .collect::<Vec<_>>()
        );
    }
}