about summary refs log tree commit diff
path: root/tvix/castore/src/nodes
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/castore/src/nodes')
-rw-r--r--tvix/castore/src/nodes/directory.rs287
-rw-r--r--tvix/castore/src/nodes/mod.rs48
-rw-r--r--tvix/castore/src/nodes/symlink_target.rs223
3 files changed, 558 insertions, 0 deletions
diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs
new file mode 100644
index 000000000000..f80e055dde80
--- /dev/null
+++ b/tvix/castore/src/nodes/directory.rs
@@ -0,0 +1,287 @@
+use std::collections::btree_map::{self, BTreeMap};
+
+use crate::{errors::DirectoryError, path::PathComponent, proto, B3Digest, Node};
+
+/// A Directory contains nodes, which can be Directory, File or Symlink nodes.
+/// It attaches names to these nodes, which is the basename in that directory.
+/// These names:
+///  - MUST not contain slashes or null bytes
+///  - MUST not be '.' or '..'
+///  - MUST be unique across all three lists
+#[derive(Default, Debug, Clone, PartialEq, Eq)]
+pub struct Directory {
+    nodes: BTreeMap<PathComponent, Node>,
+}
+
+impl Directory {
+    /// Constructs a new, empty Directory.
+    pub fn new() -> Self {
+        Directory {
+            nodes: BTreeMap::new(),
+        }
+    }
+
+    /// Construct a [Directory] from tuples of name and [Node].
+    ///
+    /// Inserting multiple elements with the same name will yield an error, as
+    /// well as exceeding the maximum size.
+    pub fn try_from_iter<T: IntoIterator<Item = (PathComponent, Node)>>(
+        iter: T,
+    ) -> Result<Directory, DirectoryError> {
+        let mut nodes = BTreeMap::new();
+
+        iter.into_iter().try_fold(0u64, |size, (name, node)| {
+            check_insert_node(size, &mut nodes, name, node)
+        })?;
+
+        Ok(Self { nodes })
+    }
+
+    /// The size of a directory is the number of all regular and symlink elements,
+    /// the number of directory elements, and their size fields.
+    pub fn size(&self) -> u64 {
+        // It's impossible to create a Directory where the size overflows, because we
+        // check before every add() that the size won't overflow.
+        (self.nodes.len() as u64)
+            + self
+                .nodes()
+                .map(|(_name, n)| match n {
+                    Node::Directory { size, .. } => 1 + size,
+                    Node::File { .. } | Node::Symlink { .. } => 1,
+                })
+                .sum::<u64>()
+    }
+
+    /// Calculates the digest of a Directory, which is the blake3 hash of a
+    /// Directory protobuf message, serialized in protobuf canonical form.
+    pub fn digest(&self) -> B3Digest {
+        proto::Directory::from(self.clone()).digest()
+    }
+
+    /// Allows iterating over all nodes (directories, files and symlinks)
+    /// For each, it returns a tuple of its name and node.
+    /// The elements are sorted by their names.
+    pub fn nodes(&self) -> impl Iterator<Item = (&PathComponent, &Node)> + Send + Sync + '_ {
+        self.nodes.iter()
+    }
+
+    /// Dissolves a Directory into its individual names and nodes.
+    /// The elements are sorted by their names.
+    pub fn into_nodes(self) -> impl Iterator<Item = (PathComponent, Node)> + Send + Sync {
+        self.nodes.into_iter()
+    }
+
+    /// Adds the specified [Node] to the [Directory] with a given name.
+    ///
+    /// Inserting a node that already exists with the same name in the directory
+    /// will yield an error, as well as exceeding the maximum size.
+    ///
+    /// In case you want to construct a [Directory] from multiple elements, use
+    /// [from_iter] instead.
+    pub fn add(&mut self, name: PathComponent, node: Node) -> Result<(), DirectoryError> {
+        check_insert_node(self.size(), &mut self.nodes, name, node)?;
+        Ok(())
+    }
+}
+
+fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> {
+    iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i))
+}
+
+/// Helper function dealing with inserting nodes into the nodes [BTreeMap],
+/// after ensuring the new size doesn't overlow and the key doesn't exist already.
+///
+/// Returns the new total size, or an error.
+fn check_insert_node(
+    current_size: u64,
+    nodes: &mut BTreeMap<PathComponent, Node>,
+    name: PathComponent,
+    node: Node,
+) -> Result<u64, DirectoryError> {
+    // Check that the even after adding this new directory entry, the size calculation will not
+    // overflow
+    let new_size = checked_sum([
+        current_size,
+        1,
+        match node {
+            Node::Directory { size, .. } => size,
+            _ => 0,
+        },
+    ])
+    .ok_or(DirectoryError::SizeOverflow)?;
+
+    match nodes.entry(name) {
+        btree_map::Entry::Vacant(e) => {
+            e.insert(node);
+        }
+        btree_map::Entry::Occupied(occupied) => {
+            return Err(DirectoryError::DuplicateName(occupied.key().to_owned()))
+        }
+    }
+
+    Ok(new_size)
+}
+
+#[cfg(test)]
+mod test {
+    use super::{Directory, Node};
+    use crate::fixtures::DUMMY_DIGEST;
+    use crate::{DirectoryError, PathComponent};
+
+    #[test]
+    fn from_iter_single() {
+        Directory::try_from_iter([(
+            PathComponent::try_from("b").unwrap(),
+            Node::Directory {
+                digest: DUMMY_DIGEST.clone(),
+                size: 1,
+            },
+        )])
+        .unwrap();
+    }
+
+    #[test]
+    fn from_iter_multiple() {
+        let d = Directory::try_from_iter([
+            (
+                "b".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                },
+            ),
+            (
+                "a".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                },
+            ),
+            (
+                "z".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                },
+            ),
+            (
+                "f".try_into().unwrap(),
+                Node::File {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                    executable: true,
+                },
+            ),
+            (
+                "c".try_into().unwrap(),
+                Node::File {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                    executable: true,
+                },
+            ),
+            (
+                "g".try_into().unwrap(),
+                Node::File {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: 1,
+                    executable: true,
+                },
+            ),
+            (
+                "t".try_into().unwrap(),
+                Node::Symlink {
+                    target: "a".try_into().unwrap(),
+                },
+            ),
+            (
+                "o".try_into().unwrap(),
+                Node::Symlink {
+                    target: "a".try_into().unwrap(),
+                },
+            ),
+            (
+                "e".try_into().unwrap(),
+                Node::Symlink {
+                    target: "a".try_into().unwrap(),
+                },
+            ),
+        ])
+        .unwrap();
+
+        // Convert to proto struct and back to ensure we are not generating any invalid structures
+        crate::Directory::try_from(crate::proto::Directory::from(d))
+            .expect("directory should be valid");
+    }
+
+    #[test]
+    fn add_nodes_to_directory() {
+        let mut d = Directory::new();
+
+        d.add(
+            "b".try_into().unwrap(),
+            Node::Directory {
+                digest: DUMMY_DIGEST.clone(),
+                size: 1,
+            },
+        )
+        .unwrap();
+        d.add(
+            "a".try_into().unwrap(),
+            Node::Directory {
+                digest: DUMMY_DIGEST.clone(),
+                size: 1,
+            },
+        )
+        .unwrap();
+
+        // Convert to proto struct and back to ensure we are not generating any invalid structures
+        crate::Directory::try_from(crate::proto::Directory::from(d))
+            .expect("directory should be valid");
+    }
+
+    #[test]
+    fn validate_overflow() {
+        let mut d = Directory::new();
+
+        assert_eq!(
+            d.add(
+                "foo".try_into().unwrap(),
+                Node::Directory {
+                    digest: DUMMY_DIGEST.clone(),
+                    size: u64::MAX
+                }
+            ),
+            Err(DirectoryError::SizeOverflow)
+        );
+    }
+
+    #[test]
+    fn add_duplicate_node_to_directory() {
+        let mut d = Directory::new();
+
+        d.add(
+            "a".try_into().unwrap(),
+            Node::Directory {
+                digest: DUMMY_DIGEST.clone(),
+                size: 1,
+            },
+        )
+        .unwrap();
+        assert_eq!(
+            format!(
+                "{}",
+                d.add(
+                    "a".try_into().unwrap(),
+                    Node::File {
+                        digest: DUMMY_DIGEST.clone(),
+                        size: 1,
+                        executable: true
+                    }
+                )
+                .expect_err("adding duplicate dir entry must fail")
+            ),
+            "\"a\" is a duplicate name"
+        );
+    }
+}
diff --git a/tvix/castore/src/nodes/mod.rs b/tvix/castore/src/nodes/mod.rs
new file mode 100644
index 000000000000..ac7aa1e666df
--- /dev/null
+++ b/tvix/castore/src/nodes/mod.rs
@@ -0,0 +1,48 @@
+//! This holds types describing nodes in the tvix-castore model.
+mod directory;
+mod symlink_target;
+
+use crate::B3Digest;
+pub use directory::Directory;
+pub use symlink_target::{SymlinkTarget, SymlinkTargetError};
+
+/// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode].
+/// Nodes themselves don't have names, what gives them names is either them
+/// being inside a [Directory], or a root node with its own name attached to it.
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub enum Node {
+    /// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest].
+    /// It also records a`size`.
+    /// Such a node is either an element in the [Directory] it itself is contained in,
+    /// or a standalone root node.
+    Directory {
+        /// The blake3 hash of a Directory message, serialized in protobuf canonical form.
+        digest: B3Digest,
+        /// Number of child elements in the Directory referred to by `digest`.
+        /// Calculated by summing up the numbers of nodes, and for each directory,
+        /// its size field. Can be used for inode allocation.
+        /// This field is precisely as verifiable as any other Merkle tree edge.
+        /// Resolve `digest`, and you can compute it incrementally. Resolve the entire
+        /// tree, and you can fully compute it from scratch.
+        /// A credulous implementation won't reject an excessive size, but this is
+        /// harmless: you'll have some ordinals without nodes. Undersizing is obvious
+        /// and easy to reject: you won't have an ordinal for some nodes.
+        size: u64,
+    },
+    /// A FileNode represents a regular or executable file in a Directory or at the root.
+    File {
+        /// The blake3 digest of the file contents
+        digest: B3Digest,
+
+        /// The file content size
+        size: u64,
+
+        /// Whether the file is executable
+        executable: bool,
+    },
+    /// A SymlinkNode represents a symbolic link in a Directory or at the root.
+    Symlink {
+        /// The target of the symlink.
+        target: SymlinkTarget,
+    },
+}
diff --git a/tvix/castore/src/nodes/symlink_target.rs b/tvix/castore/src/nodes/symlink_target.rs
new file mode 100644
index 000000000000..e9a1a0bd05c2
--- /dev/null
+++ b/tvix/castore/src/nodes/symlink_target.rs
@@ -0,0 +1,223 @@
+use bstr::ByteSlice;
+use std::fmt::{self, Debug, Display};
+
+/// A wrapper type for symlink targets.
+/// Internally uses a [bytes::Bytes], but disallows empty targets and those
+/// containing null bytes.
+#[repr(transparent)]
+#[derive(Clone, PartialEq, Eq)]
+pub struct SymlinkTarget {
+    inner: bytes::Bytes,
+}
+
+/// The maximum length a symlink target can have.
+/// Linux allows 4095 bytes here.
+pub const MAX_TARGET_LEN: usize = 4095;
+
+impl AsRef<[u8]> for SymlinkTarget {
+    fn as_ref(&self) -> &[u8] {
+        self.inner.as_ref()
+    }
+}
+
+impl From<SymlinkTarget> for bytes::Bytes {
+    fn from(value: SymlinkTarget) -> Self {
+        value.inner
+    }
+}
+
+fn validate_symlink_target<B: AsRef<[u8]>>(symlink_target: B) -> Result<B, SymlinkTargetError> {
+    let v = symlink_target.as_ref();
+
+    if v.is_empty() {
+        return Err(SymlinkTargetError::Empty);
+    }
+    if v.len() > MAX_TARGET_LEN {
+        return Err(SymlinkTargetError::TooLong);
+    }
+    if v.contains(&0x00) {
+        return Err(SymlinkTargetError::Null);
+    }
+
+    Ok(symlink_target)
+}
+
+impl TryFrom<bytes::Bytes> for SymlinkTarget {
+    type Error = SymlinkTargetError;
+
+    fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> {
+        if let Err(e) = validate_symlink_target(&value) {
+            return Err(SymlinkTargetError::Convert(value, Box::new(e)));
+        }
+
+        Ok(Self { inner: value })
+    }
+}
+
+impl TryFrom<&'static [u8]> for SymlinkTarget {
+    type Error = SymlinkTargetError;
+
+    fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> {
+        if let Err(e) = validate_symlink_target(&value) {
+            return Err(SymlinkTargetError::Convert(value.into(), Box::new(e)));
+        }
+
+        Ok(Self {
+            inner: bytes::Bytes::from_static(value),
+        })
+    }
+}
+
+impl TryFrom<&str> for SymlinkTarget {
+    type Error = SymlinkTargetError;
+
+    fn try_from(value: &str) -> Result<Self, Self::Error> {
+        if let Err(e) = validate_symlink_target(value) {
+            return Err(SymlinkTargetError::Convert(
+                value.to_owned().into(),
+                Box::new(e),
+            ));
+        }
+
+        Ok(Self {
+            inner: bytes::Bytes::copy_from_slice(value.as_bytes()),
+        })
+    }
+}
+
+impl Debug for SymlinkTarget {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        Debug::fmt(self.inner.as_bstr(), f)
+    }
+}
+
+impl Display for SymlinkTarget {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        Display::fmt(self.inner.as_bstr(), f)
+    }
+}
+
+/// Errors created when constructing / converting to [SymlinkTarget].
+#[derive(Debug, PartialEq, Eq, thiserror::Error)]
+#[cfg_attr(test, derive(Clone))]
+pub enum SymlinkTargetError {
+    #[error("cannot be empty")]
+    Empty,
+    #[error("cannot contain null bytes")]
+    Null,
+    #[error("cannot be over {} bytes long", MAX_TARGET_LEN)]
+    TooLong,
+    #[error("unable to convert '{:?}", .0.as_bstr())]
+    Convert(bytes::Bytes, Box<Self>),
+}
+
+#[cfg(test)]
+mod tests {
+    use bytes::Bytes;
+    use rstest::rstest;
+
+    use super::validate_symlink_target;
+    use super::{SymlinkTarget, SymlinkTargetError};
+
+    #[rstest]
+    #[case::empty(b"", SymlinkTargetError::Empty)]
+    #[case::null(b"foo\0", SymlinkTargetError::Null)]
+    fn errors(#[case] v: &'static [u8], #[case] err: SymlinkTargetError) {
+        {
+            assert_eq!(
+                Err(err.clone()),
+                validate_symlink_target(v),
+                "validate_symlink_target must fail as expected"
+            );
+        }
+
+        let exp_err_v = Bytes::from_static(v);
+
+        // Bytes
+        {
+            let v = Bytes::from_static(v);
+            assert_eq!(
+                Err(SymlinkTargetError::Convert(
+                    exp_err_v.clone(),
+                    Box::new(err.clone())
+                )),
+                SymlinkTarget::try_from(v),
+                "conversion must fail as expected"
+            );
+        }
+        // &[u8]
+        {
+            assert_eq!(
+                Err(SymlinkTargetError::Convert(
+                    exp_err_v.clone(),
+                    Box::new(err.clone())
+                )),
+                SymlinkTarget::try_from(v),
+                "conversion must fail as expected"
+            );
+        }
+        // &str, if this is valid UTF-8
+        {
+            if let Ok(v) = std::str::from_utf8(v) {
+                assert_eq!(
+                    Err(SymlinkTargetError::Convert(
+                        exp_err_v.clone(),
+                        Box::new(err.clone())
+                    )),
+                    SymlinkTarget::try_from(v),
+                    "conversion must fail as expected"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn error_toolong() {
+        assert_eq!(
+            Err(SymlinkTargetError::TooLong),
+            validate_symlink_target("X".repeat(5000).into_bytes().as_slice())
+        )
+    }
+
+    #[rstest]
+    #[case::boring(b"aa")]
+    #[case::dot(b".")]
+    #[case::dotsandslashes(b"./..")]
+    #[case::dotdot(b"..")]
+    #[case::slashes(b"a/b")]
+    #[case::slashes_and_absolute(b"/a/b")]
+    #[case::invalid_utf8(b"\xc5\xc4\xd6")]
+    fn success(#[case] v: &'static [u8]) {
+        let exp = SymlinkTarget { inner: v.into() };
+
+        // Bytes
+        {
+            let v: Bytes = v.into();
+            assert_eq!(
+                Ok(exp.clone()),
+                SymlinkTarget::try_from(v),
+                "conversion must succeed"
+            )
+        }
+
+        // &[u8]
+        {
+            assert_eq!(
+                Ok(exp.clone()),
+                SymlinkTarget::try_from(v),
+                "conversion must succeed"
+            )
+        }
+
+        // &str, if this is valid UTF-8
+        {
+            if let Ok(v) = std::str::from_utf8(v) {
+                assert_eq!(
+                    Ok(exp.clone()),
+                    SymlinkTarget::try_from(v),
+                    "conversion must succeed"
+                )
+            }
+        }
+    }
+}