diff options
Diffstat (limited to 'tvix/castore/src/nodes')
-rw-r--r-- | tvix/castore/src/nodes/directory.rs | 287 | ||||
-rw-r--r-- | tvix/castore/src/nodes/mod.rs | 48 | ||||
-rw-r--r-- | tvix/castore/src/nodes/symlink_target.rs | 223 |
3 files changed, 558 insertions, 0 deletions
diff --git a/tvix/castore/src/nodes/directory.rs b/tvix/castore/src/nodes/directory.rs new file mode 100644 index 000000000000..f80e055dde80 --- /dev/null +++ b/tvix/castore/src/nodes/directory.rs @@ -0,0 +1,287 @@ +use std::collections::btree_map::{self, BTreeMap}; + +use crate::{errors::DirectoryError, path::PathComponent, proto, B3Digest, Node}; + +/// A Directory contains nodes, which can be Directory, File or Symlink nodes. +/// It attaches names to these nodes, which is the basename in that directory. +/// These names: +/// - MUST not contain slashes or null bytes +/// - MUST not be '.' or '..' +/// - MUST be unique across all three lists +#[derive(Default, Debug, Clone, PartialEq, Eq)] +pub struct Directory { + nodes: BTreeMap<PathComponent, Node>, +} + +impl Directory { + /// Constructs a new, empty Directory. + pub fn new() -> Self { + Directory { + nodes: BTreeMap::new(), + } + } + + /// Construct a [Directory] from tuples of name and [Node]. + /// + /// Inserting multiple elements with the same name will yield an error, as + /// well as exceeding the maximum size. + pub fn try_from_iter<T: IntoIterator<Item = (PathComponent, Node)>>( + iter: T, + ) -> Result<Directory, DirectoryError> { + let mut nodes = BTreeMap::new(); + + iter.into_iter().try_fold(0u64, |size, (name, node)| { + check_insert_node(size, &mut nodes, name, node) + })?; + + Ok(Self { nodes }) + } + + /// The size of a directory is the number of all regular and symlink elements, + /// the number of directory elements, and their size fields. + pub fn size(&self) -> u64 { + // It's impossible to create a Directory where the size overflows, because we + // check before every add() that the size won't overflow. + (self.nodes.len() as u64) + + self + .nodes() + .map(|(_name, n)| match n { + Node::Directory { size, .. } => 1 + size, + Node::File { .. } | Node::Symlink { .. } => 1, + }) + .sum::<u64>() + } + + /// Calculates the digest of a Directory, which is the blake3 hash of a + /// Directory protobuf message, serialized in protobuf canonical form. + pub fn digest(&self) -> B3Digest { + proto::Directory::from(self.clone()).digest() + } + + /// Allows iterating over all nodes (directories, files and symlinks) + /// For each, it returns a tuple of its name and node. + /// The elements are sorted by their names. + pub fn nodes(&self) -> impl Iterator<Item = (&PathComponent, &Node)> + Send + Sync + '_ { + self.nodes.iter() + } + + /// Dissolves a Directory into its individual names and nodes. + /// The elements are sorted by their names. + pub fn into_nodes(self) -> impl Iterator<Item = (PathComponent, Node)> + Send + Sync { + self.nodes.into_iter() + } + + /// Adds the specified [Node] to the [Directory] with a given name. + /// + /// Inserting a node that already exists with the same name in the directory + /// will yield an error, as well as exceeding the maximum size. + /// + /// In case you want to construct a [Directory] from multiple elements, use + /// [from_iter] instead. + pub fn add(&mut self, name: PathComponent, node: Node) -> Result<(), DirectoryError> { + check_insert_node(self.size(), &mut self.nodes, name, node)?; + Ok(()) + } +} + +fn checked_sum(iter: impl IntoIterator<Item = u64>) -> Option<u64> { + iter.into_iter().try_fold(0u64, |acc, i| acc.checked_add(i)) +} + +/// Helper function dealing with inserting nodes into the nodes [BTreeMap], +/// after ensuring the new size doesn't overlow and the key doesn't exist already. +/// +/// Returns the new total size, or an error. +fn check_insert_node( + current_size: u64, + nodes: &mut BTreeMap<PathComponent, Node>, + name: PathComponent, + node: Node, +) -> Result<u64, DirectoryError> { + // Check that the even after adding this new directory entry, the size calculation will not + // overflow + let new_size = checked_sum([ + current_size, + 1, + match node { + Node::Directory { size, .. } => size, + _ => 0, + }, + ]) + .ok_or(DirectoryError::SizeOverflow)?; + + match nodes.entry(name) { + btree_map::Entry::Vacant(e) => { + e.insert(node); + } + btree_map::Entry::Occupied(occupied) => { + return Err(DirectoryError::DuplicateName(occupied.key().to_owned())) + } + } + + Ok(new_size) +} + +#[cfg(test)] +mod test { + use super::{Directory, Node}; + use crate::fixtures::DUMMY_DIGEST; + use crate::{DirectoryError, PathComponent}; + + #[test] + fn from_iter_single() { + Directory::try_from_iter([( + PathComponent::try_from("b").unwrap(), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, + )]) + .unwrap(); + } + + #[test] + fn from_iter_multiple() { + let d = Directory::try_from_iter([ + ( + "b".try_into().unwrap(), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, + ), + ( + "a".try_into().unwrap(), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, + ), + ( + "z".try_into().unwrap(), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, + ), + ( + "f".try_into().unwrap(), + Node::File { + digest: DUMMY_DIGEST.clone(), + size: 1, + executable: true, + }, + ), + ( + "c".try_into().unwrap(), + Node::File { + digest: DUMMY_DIGEST.clone(), + size: 1, + executable: true, + }, + ), + ( + "g".try_into().unwrap(), + Node::File { + digest: DUMMY_DIGEST.clone(), + size: 1, + executable: true, + }, + ), + ( + "t".try_into().unwrap(), + Node::Symlink { + target: "a".try_into().unwrap(), + }, + ), + ( + "o".try_into().unwrap(), + Node::Symlink { + target: "a".try_into().unwrap(), + }, + ), + ( + "e".try_into().unwrap(), + Node::Symlink { + target: "a".try_into().unwrap(), + }, + ), + ]) + .unwrap(); + + // Convert to proto struct and back to ensure we are not generating any invalid structures + crate::Directory::try_from(crate::proto::Directory::from(d)) + .expect("directory should be valid"); + } + + #[test] + fn add_nodes_to_directory() { + let mut d = Directory::new(); + + d.add( + "b".try_into().unwrap(), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, + ) + .unwrap(); + d.add( + "a".try_into().unwrap(), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, + ) + .unwrap(); + + // Convert to proto struct and back to ensure we are not generating any invalid structures + crate::Directory::try_from(crate::proto::Directory::from(d)) + .expect("directory should be valid"); + } + + #[test] + fn validate_overflow() { + let mut d = Directory::new(); + + assert_eq!( + d.add( + "foo".try_into().unwrap(), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: u64::MAX + } + ), + Err(DirectoryError::SizeOverflow) + ); + } + + #[test] + fn add_duplicate_node_to_directory() { + let mut d = Directory::new(); + + d.add( + "a".try_into().unwrap(), + Node::Directory { + digest: DUMMY_DIGEST.clone(), + size: 1, + }, + ) + .unwrap(); + assert_eq!( + format!( + "{}", + d.add( + "a".try_into().unwrap(), + Node::File { + digest: DUMMY_DIGEST.clone(), + size: 1, + executable: true + } + ) + .expect_err("adding duplicate dir entry must fail") + ), + "\"a\" is a duplicate name" + ); + } +} diff --git a/tvix/castore/src/nodes/mod.rs b/tvix/castore/src/nodes/mod.rs new file mode 100644 index 000000000000..ac7aa1e666df --- /dev/null +++ b/tvix/castore/src/nodes/mod.rs @@ -0,0 +1,48 @@ +//! This holds types describing nodes in the tvix-castore model. +mod directory; +mod symlink_target; + +use crate::B3Digest; +pub use directory::Directory; +pub use symlink_target::{SymlinkTarget, SymlinkTargetError}; + +/// A Node is either a [DirectoryNode], [FileNode] or [SymlinkNode]. +/// Nodes themselves don't have names, what gives them names is either them +/// being inside a [Directory], or a root node with its own name attached to it. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Node { + /// A DirectoryNode is a pointer to a [Directory], by its [Directory::digest]. + /// It also records a`size`. + /// Such a node is either an element in the [Directory] it itself is contained in, + /// or a standalone root node. + Directory { + /// The blake3 hash of a Directory message, serialized in protobuf canonical form. + digest: B3Digest, + /// Number of child elements in the Directory referred to by `digest`. + /// Calculated by summing up the numbers of nodes, and for each directory, + /// its size field. Can be used for inode allocation. + /// This field is precisely as verifiable as any other Merkle tree edge. + /// Resolve `digest`, and you can compute it incrementally. Resolve the entire + /// tree, and you can fully compute it from scratch. + /// A credulous implementation won't reject an excessive size, but this is + /// harmless: you'll have some ordinals without nodes. Undersizing is obvious + /// and easy to reject: you won't have an ordinal for some nodes. + size: u64, + }, + /// A FileNode represents a regular or executable file in a Directory or at the root. + File { + /// The blake3 digest of the file contents + digest: B3Digest, + + /// The file content size + size: u64, + + /// Whether the file is executable + executable: bool, + }, + /// A SymlinkNode represents a symbolic link in a Directory or at the root. + Symlink { + /// The target of the symlink. + target: SymlinkTarget, + }, +} diff --git a/tvix/castore/src/nodes/symlink_target.rs b/tvix/castore/src/nodes/symlink_target.rs new file mode 100644 index 000000000000..e9a1a0bd05c2 --- /dev/null +++ b/tvix/castore/src/nodes/symlink_target.rs @@ -0,0 +1,223 @@ +use bstr::ByteSlice; +use std::fmt::{self, Debug, Display}; + +/// A wrapper type for symlink targets. +/// Internally uses a [bytes::Bytes], but disallows empty targets and those +/// containing null bytes. +#[repr(transparent)] +#[derive(Clone, PartialEq, Eq)] +pub struct SymlinkTarget { + inner: bytes::Bytes, +} + +/// The maximum length a symlink target can have. +/// Linux allows 4095 bytes here. +pub const MAX_TARGET_LEN: usize = 4095; + +impl AsRef<[u8]> for SymlinkTarget { + fn as_ref(&self) -> &[u8] { + self.inner.as_ref() + } +} + +impl From<SymlinkTarget> for bytes::Bytes { + fn from(value: SymlinkTarget) -> Self { + value.inner + } +} + +fn validate_symlink_target<B: AsRef<[u8]>>(symlink_target: B) -> Result<B, SymlinkTargetError> { + let v = symlink_target.as_ref(); + + if v.is_empty() { + return Err(SymlinkTargetError::Empty); + } + if v.len() > MAX_TARGET_LEN { + return Err(SymlinkTargetError::TooLong); + } + if v.contains(&0x00) { + return Err(SymlinkTargetError::Null); + } + + Ok(symlink_target) +} + +impl TryFrom<bytes::Bytes> for SymlinkTarget { + type Error = SymlinkTargetError; + + fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> { + if let Err(e) = validate_symlink_target(&value) { + return Err(SymlinkTargetError::Convert(value, Box::new(e))); + } + + Ok(Self { inner: value }) + } +} + +impl TryFrom<&'static [u8]> for SymlinkTarget { + type Error = SymlinkTargetError; + + fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> { + if let Err(e) = validate_symlink_target(&value) { + return Err(SymlinkTargetError::Convert(value.into(), Box::new(e))); + } + + Ok(Self { + inner: bytes::Bytes::from_static(value), + }) + } +} + +impl TryFrom<&str> for SymlinkTarget { + type Error = SymlinkTargetError; + + fn try_from(value: &str) -> Result<Self, Self::Error> { + if let Err(e) = validate_symlink_target(value) { + return Err(SymlinkTargetError::Convert( + value.to_owned().into(), + Box::new(e), + )); + } + + Ok(Self { + inner: bytes::Bytes::copy_from_slice(value.as_bytes()), + }) + } +} + +impl Debug for SymlinkTarget { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Debug::fmt(self.inner.as_bstr(), f) + } +} + +impl Display for SymlinkTarget { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + Display::fmt(self.inner.as_bstr(), f) + } +} + +/// Errors created when constructing / converting to [SymlinkTarget]. +#[derive(Debug, PartialEq, Eq, thiserror::Error)] +#[cfg_attr(test, derive(Clone))] +pub enum SymlinkTargetError { + #[error("cannot be empty")] + Empty, + #[error("cannot contain null bytes")] + Null, + #[error("cannot be over {} bytes long", MAX_TARGET_LEN)] + TooLong, + #[error("unable to convert '{:?}", .0.as_bstr())] + Convert(bytes::Bytes, Box<Self>), +} + +#[cfg(test)] +mod tests { + use bytes::Bytes; + use rstest::rstest; + + use super::validate_symlink_target; + use super::{SymlinkTarget, SymlinkTargetError}; + + #[rstest] + #[case::empty(b"", SymlinkTargetError::Empty)] + #[case::null(b"foo\0", SymlinkTargetError::Null)] + fn errors(#[case] v: &'static [u8], #[case] err: SymlinkTargetError) { + { + assert_eq!( + Err(err.clone()), + validate_symlink_target(v), + "validate_symlink_target must fail as expected" + ); + } + + let exp_err_v = Bytes::from_static(v); + + // Bytes + { + let v = Bytes::from_static(v); + assert_eq!( + Err(SymlinkTargetError::Convert( + exp_err_v.clone(), + Box::new(err.clone()) + )), + SymlinkTarget::try_from(v), + "conversion must fail as expected" + ); + } + // &[u8] + { + assert_eq!( + Err(SymlinkTargetError::Convert( + exp_err_v.clone(), + Box::new(err.clone()) + )), + SymlinkTarget::try_from(v), + "conversion must fail as expected" + ); + } + // &str, if this is valid UTF-8 + { + if let Ok(v) = std::str::from_utf8(v) { + assert_eq!( + Err(SymlinkTargetError::Convert( + exp_err_v.clone(), + Box::new(err.clone()) + )), + SymlinkTarget::try_from(v), + "conversion must fail as expected" + ); + } + } + } + + #[test] + fn error_toolong() { + assert_eq!( + Err(SymlinkTargetError::TooLong), + validate_symlink_target("X".repeat(5000).into_bytes().as_slice()) + ) + } + + #[rstest] + #[case::boring(b"aa")] + #[case::dot(b".")] + #[case::dotsandslashes(b"./..")] + #[case::dotdot(b"..")] + #[case::slashes(b"a/b")] + #[case::slashes_and_absolute(b"/a/b")] + #[case::invalid_utf8(b"\xc5\xc4\xd6")] + fn success(#[case] v: &'static [u8]) { + let exp = SymlinkTarget { inner: v.into() }; + + // Bytes + { + let v: Bytes = v.into(); + assert_eq!( + Ok(exp.clone()), + SymlinkTarget::try_from(v), + "conversion must succeed" + ) + } + + // &[u8] + { + assert_eq!( + Ok(exp.clone()), + SymlinkTarget::try_from(v), + "conversion must succeed" + ) + } + + // &str, if this is valid UTF-8 + { + if let Ok(v) = std::str::from_utf8(v) { + assert_eq!( + Ok(exp.clone()), + SymlinkTarget::try_from(v), + "conversion must succeed" + ) + } + } + } +} |