about summary refs log tree commit diff
path: root/tvix
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-08-17T19·00+0300
committerclbot <clbot@tvl.fyi>2024-08-18T17·22+0000
commit56fa533e438bd367aa5cae6fa505508aced42156 (patch)
tree5e2624f28f946a1753a49d8a321acd627bfc9624 /tvix
parent0cfe2aaf6a412e495e63372c6e3f01039f371f90 (diff)
refactor(tvix/castore): have PathComponent-specific errors r/8513
Don't use DirectoryError, but PathComponentError.

Also add checks for too long path components.

Change-Id: Ia9deb9dd0351138baadb2e9c9454c3e019d5a45e
Reviewed-on: https://cl.tvl.fyi/c/depot/+/12229
Tested-by: BuildkiteCI
Reviewed-by: Ilan Joselevich <personal@ilanjoselevich.com>
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: edef <edef@edef.eu>
Diffstat (limited to 'tvix')
-rw-r--r--tvix/castore/src/errors.rs6
-rw-r--r--tvix/castore/src/lib.rs2
-rw-r--r--tvix/castore/src/path/component.rs218
-rw-r--r--tvix/castore/src/path/mod.rs6
-rw-r--r--tvix/castore/src/proto/mod.rs39
-rw-r--r--tvix/castore/src/proto/tests/directory.rs51
6 files changed, 246 insertions, 76 deletions
diff --git a/tvix/castore/src/errors.rs b/tvix/castore/src/errors.rs
index 6c213cb06743..ec59c7734584 100644
--- a/tvix/castore/src/errors.rs
+++ b/tvix/castore/src/errors.rs
@@ -3,7 +3,7 @@ use thiserror::Error;
 use tokio::task::JoinError;
 use tonic::Status;
 
-use crate::path::PathComponent;
+use crate::path::{PathComponent, PathComponentError};
 
 /// Errors related to communication with the store.
 #[derive(Debug, Error, PartialEq)]
@@ -47,8 +47,8 @@ pub enum DirectoryError {
     #[error("Total size exceeds u32::MAX")]
     SizeOverflow,
     /// Invalid name encountered
-    #[error("Invalid name: {}", .0.as_bstr())]
-    InvalidName(bytes::Bytes),
+    #[error("Invalid name: {0}")]
+    InvalidName(PathComponentError),
     /// Elements are not in sorted order. Can only happen on protos
     #[error("{:?} is not sorted", .0.as_bstr())]
     WrongSorting(bytes::Bytes),
diff --git a/tvix/castore/src/lib.rs b/tvix/castore/src/lib.rs
index 6e1b36231b9d..8ac6ca3dd66a 100644
--- a/tvix/castore/src/lib.rs
+++ b/tvix/castore/src/lib.rs
@@ -14,7 +14,7 @@ mod nodes;
 pub use nodes::*;
 
 mod path;
-pub use path::{Path, PathBuf, PathComponent};
+pub use path::{Path, PathBuf, PathComponent, PathComponentError};
 
 pub mod import;
 pub mod proto;
diff --git a/tvix/castore/src/path/component.rs b/tvix/castore/src/path/component.rs
index f755f06e62a8..78aca03c50fe 100644
--- a/tvix/castore/src/path/component.rs
+++ b/tvix/castore/src/path/component.rs
@@ -1,6 +1,3 @@
-// TODO: split out this error
-use crate::DirectoryError;
-
 use bstr::ByteSlice;
 use std::fmt::{self, Debug, Display};
 
@@ -8,12 +5,17 @@ use std::fmt::{self, Debug, Display};
 /// Internally uses a [bytes::Bytes], but disallows
 /// slashes, and null bytes to be present, as well as
 /// '.', '..' and the empty string.
+/// It also rejects components that are too long (> 255 bytes).
 #[repr(transparent)]
 #[derive(Clone, Hash, PartialEq, Eq, PartialOrd, Ord)]
 pub struct PathComponent {
     pub(super) inner: bytes::Bytes,
 }
 
+/// The maximum length an individual path component can have.
+/// Linux allows 255 bytes of actual name, so we pick that.
+pub const MAX_NAME_LEN: usize = 255;
+
 impl AsRef<[u8]> for PathComponent {
     fn as_ref(&self) -> &[u8] {
         self.inner.as_ref()
@@ -26,18 +28,24 @@ impl From<PathComponent> for bytes::Bytes {
     }
 }
 
-pub(super) fn is_valid_name<B: AsRef<[u8]>>(name: B) -> bool {
-    let v = name.as_ref();
-
-    !v.is_empty() && v != *b".." && v != *b"." && !v.contains(&0x00) && !v.contains(&b'/')
+pub(super) fn validate_name<B: AsRef<[u8]>>(name: B) -> Result<(), PathComponentError> {
+    match name.as_ref() {
+        b"" => Err(PathComponentError::Empty),
+        b".." => Err(PathComponentError::Parent),
+        b"." => Err(PathComponentError::CurDir),
+        v if v.len() > MAX_NAME_LEN => Err(PathComponentError::TooLong),
+        v if v.contains(&0x00) => Err(PathComponentError::Null),
+        v if v.contains(&b'/') => Err(PathComponentError::Slashes),
+        _ => Ok(()),
+    }
 }
 
 impl TryFrom<bytes::Bytes> for PathComponent {
-    type Error = DirectoryError;
+    type Error = PathComponentError;
 
     fn try_from(value: bytes::Bytes) -> Result<Self, Self::Error> {
-        if !is_valid_name(&value) {
-            return Err(DirectoryError::InvalidName(value));
+        if let Err(e) = validate_name(&value) {
+            return Err(PathComponentError::Convert(value, Box::new(e)));
         }
 
         Ok(Self { inner: value })
@@ -45,14 +53,13 @@ impl TryFrom<bytes::Bytes> for PathComponent {
 }
 
 impl TryFrom<&'static [u8]> for PathComponent {
-    type Error = DirectoryError;
+    type Error = PathComponentError;
 
     fn try_from(value: &'static [u8]) -> Result<Self, Self::Error> {
-        if !is_valid_name(value) {
-            return Err(DirectoryError::InvalidName(bytes::Bytes::from_static(
-                value,
-            )));
+        if let Err(e) = validate_name(value) {
+            return Err(PathComponentError::Convert(value.into(), Box::new(e)));
         }
+
         Ok(Self {
             inner: bytes::Bytes::from_static(value),
         })
@@ -60,14 +67,16 @@ impl TryFrom<&'static [u8]> for PathComponent {
 }
 
 impl TryFrom<&str> for PathComponent {
-    type Error = DirectoryError;
+    type Error = PathComponentError;
 
     fn try_from(value: &str) -> Result<Self, Self::Error> {
-        if !is_valid_name(value) {
-            return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice(
-                value.as_bytes(),
-            )));
+        if let Err(e) = validate_name(value) {
+            return Err(PathComponentError::Convert(
+                value.to_owned().into(),
+                Box::new(e),
+            ));
         }
+
         Ok(Self {
             inner: bytes::Bytes::copy_from_slice(value.as_bytes()),
         })
@@ -75,16 +84,19 @@ impl TryFrom<&str> for PathComponent {
 }
 
 impl TryFrom<&std::ffi::CStr> for PathComponent {
-    type Error = DirectoryError;
+    type Error = PathComponentError;
 
     fn try_from(value: &std::ffi::CStr) -> Result<Self, Self::Error> {
-        if !is_valid_name(value.to_bytes()) {
-            return Err(DirectoryError::InvalidName(bytes::Bytes::copy_from_slice(
-                value.to_bytes(),
-            )));
+        let value = value.to_bytes();
+        if let Err(e) = validate_name(value) {
+            return Err(PathComponentError::Convert(
+                value.to_owned().into(),
+                Box::new(e),
+            ));
         }
+
         Ok(Self {
-            inner: bytes::Bytes::copy_from_slice(value.to_bytes()),
+            inner: bytes::Bytes::copy_from_slice(value),
         })
     }
 }
@@ -100,3 +112,157 @@ impl Display for PathComponent {
         Display::fmt(self.inner.as_bstr(), f)
     }
 }
+
+/// Errors created when parsing / validating [PathComponent].
+#[derive(Debug, PartialEq, thiserror::Error)]
+#[cfg_attr(test, derive(Clone))]
+pub enum PathComponentError {
+    #[error("cannot be empty")]
+    Empty,
+    #[error("cannot contain null bytes")]
+    Null,
+    #[error("cannot be '.'")]
+    CurDir,
+    #[error("cannot be '..'")]
+    Parent,
+    #[error("cannot contain slashes")]
+    Slashes,
+    #[error("cannot be over {} bytes long", MAX_NAME_LEN)]
+    TooLong,
+    #[error("unable to convert '{:?}'", .0.as_bstr())]
+    Convert(bytes::Bytes, #[source] Box<Self>),
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ffi::CString;
+
+    use bytes::Bytes;
+    use rstest::rstest;
+
+    use super::{validate_name, PathComponent, PathComponentError};
+
+    #[rstest]
+    #[case::empty(b"", PathComponentError::Empty)]
+    #[case::null(b"foo\0", PathComponentError::Null)]
+    #[case::curdir(b".", PathComponentError::CurDir)]
+    #[case::parent(b"..", PathComponentError::Parent)]
+    #[case::slashes1(b"a/b", PathComponentError::Slashes)]
+    #[case::slashes2(b"/", PathComponentError::Slashes)]
+    fn errors(#[case] v: &'static [u8], #[case] err: PathComponentError) {
+        {
+            assert_eq!(
+                Err(err.clone()),
+                validate_name(v),
+                "validate_name must fail as expected"
+            );
+        }
+
+        let exp_err_v = Bytes::from_static(v);
+
+        // Bytes
+        {
+            let v = Bytes::from_static(v);
+            assert_eq!(
+                Err(PathComponentError::Convert(
+                    exp_err_v.clone(),
+                    Box::new(err.clone())
+                )),
+                PathComponent::try_from(v),
+                "conversion must fail as expected"
+            );
+        }
+        // &[u8]
+        {
+            assert_eq!(
+                Err(PathComponentError::Convert(
+                    exp_err_v.clone(),
+                    Box::new(err.clone())
+                )),
+                PathComponent::try_from(v),
+                "conversion must fail as expected"
+            );
+        }
+        // &str, if it is valid UTF-8
+        {
+            if let Ok(v) = std::str::from_utf8(v) {
+                assert_eq!(
+                    Err(PathComponentError::Convert(
+                        exp_err_v.clone(),
+                        Box::new(err.clone())
+                    )),
+                    PathComponent::try_from(v),
+                    "conversion must fail as expected"
+                );
+            }
+        }
+        // &CStr, if it can be constructed (fails if the payload contains null bytes)
+        {
+            if let Ok(v) = CString::new(v) {
+                let v = v.as_ref();
+                assert_eq!(
+                    Err(PathComponentError::Convert(
+                        exp_err_v.clone(),
+                        Box::new(err.clone())
+                    )),
+                    PathComponent::try_from(v),
+                    "conversion must fail as expected"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn error_toolong() {
+        assert_eq!(
+            Err(PathComponentError::TooLong),
+            validate_name("X".repeat(500).into_bytes().as_slice())
+        )
+    }
+
+    #[test]
+    fn success() {
+        let exp = PathComponent { inner: "aa".into() };
+
+        // Bytes
+        {
+            let v: Bytes = "aa".into();
+            assert_eq!(
+                Ok(exp.clone()),
+                PathComponent::try_from(v),
+                "conversion must succeed"
+            );
+        }
+
+        // &[u8]
+        {
+            let v: &[u8] = b"aa";
+            assert_eq!(
+                Ok(exp.clone()),
+                PathComponent::try_from(v),
+                "conversion must succeed"
+            );
+        }
+
+        // &str
+        {
+            let v: &str = "aa";
+            assert_eq!(
+                Ok(exp.clone()),
+                PathComponent::try_from(v),
+                "conversion must succeed"
+            );
+        }
+
+        // &CStr
+        {
+            let v = CString::new("aa").expect("CString must construct");
+            let v = v.as_c_str();
+            assert_eq!(
+                Ok(exp.clone()),
+                PathComponent::try_from(v),
+                "conversion must succeed"
+            );
+        }
+    }
+}
diff --git a/tvix/castore/src/path/mod.rs b/tvix/castore/src/path/mod.rs
index 5a68f1add416..15f31a570da9 100644
--- a/tvix/castore/src/path/mod.rs
+++ b/tvix/castore/src/path/mod.rs
@@ -9,7 +9,7 @@ use std::{
 };
 
 mod component;
-pub use component::PathComponent;
+pub use component::{PathComponent, PathComponentError};
 
 /// Represents a Path in the castore model.
 /// These are always relative, and platform-independent, which distinguishes
@@ -37,7 +37,7 @@ impl Path {
         if !bytes.is_empty() {
             // Ensure all components are valid castore node names.
             for component in bytes.split_str(b"/") {
-                if !component::is_valid_name(component) {
+                if component::validate_name(component).is_err() {
                     return None;
                 }
             }
@@ -233,7 +233,7 @@ impl PathBuf {
 
     /// Adjoins `name` to self.
     pub fn try_push(&mut self, name: &[u8]) -> Result<(), std::io::Error> {
-        if !component::is_valid_name(name) {
+        if component::validate_name(name).is_err() {
             return Err(std::io::ErrorKind::InvalidData.into());
         }
 
diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs
index a40a67f53cf3..67f7b2fa72a2 100644
--- a/tvix/castore/src/proto/mod.rs
+++ b/tvix/castore/src/proto/mod.rs
@@ -1,6 +1,5 @@
-use std::{cmp::Ordering, str};
-
 use prost::Message;
+use std::cmp::Ordering;
 
 mod grpc_blobservice_wrapper;
 mod grpc_directoryservice_wrapper;
@@ -80,31 +79,42 @@ impl TryFrom<Directory> for crate::Directory {
             .try_fold(&b""[..], |prev_name, e| {
                 match e.name.as_ref().cmp(prev_name) {
                     Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())),
-                    Ordering::Equal => {
-                        Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?))
-                    }
+                    Ordering::Equal => Err(DirectoryError::DuplicateName(
+                        e.name
+                            .to_owned()
+                            .try_into()
+                            .map_err(DirectoryError::InvalidName)?,
+                    )),
                     Ordering::Greater => Ok(e.name.as_ref()),
                 }
             })?;
         value.files.iter().try_fold(&b""[..], |prev_name, e| {
             match e.name.as_ref().cmp(prev_name) {
                 Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())),
-                Ordering::Equal => {
-                    Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?))
-                }
+                Ordering::Equal => Err(DirectoryError::DuplicateName(
+                    e.name
+                        .to_owned()
+                        .try_into()
+                        .map_err(DirectoryError::InvalidName)?,
+                )),
                 Ordering::Greater => Ok(e.name.as_ref()),
             }
         })?;
         value.symlinks.iter().try_fold(&b""[..], |prev_name, e| {
             match e.name.as_ref().cmp(prev_name) {
                 Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())),
-                Ordering::Equal => {
-                    Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?))
-                }
+                Ordering::Equal => Err(DirectoryError::DuplicateName(
+                    e.name
+                        .to_owned()
+                        .try_into()
+                        .map_err(DirectoryError::InvalidName)?,
+                )),
                 Ordering::Greater => Ok(e.name.as_ref()),
             }
         })?;
 
+        // FUTUREWORK: use is_sorted() once stable, and/or implement the producer for
+        // [crate::Directory::try_from_iter] iterating over all three and doing all checks inline.
         let mut elems: Vec<(PathComponent, crate::Node)> =
             Vec::with_capacity(value.directories.len() + value.files.len() + value.symlinks.len());
 
@@ -184,7 +194,7 @@ impl Node {
     pub fn into_name_and_node(self) -> Result<(PathComponent, crate::Node), DirectoryError> {
         match self.node.ok_or_else(|| DirectoryError::NoNodeSet)? {
             node::Node::Directory(n) => {
-                let name: PathComponent = n.name.try_into()?;
+                let name: PathComponent = n.name.try_into().map_err(DirectoryError::InvalidName)?;
                 let digest = B3Digest::try_from(n.digest)
                     .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?;
 
@@ -196,7 +206,7 @@ impl Node {
                 Ok((name, node))
             }
             node::Node::File(n) => {
-                let name: PathComponent = n.name.try_into()?;
+                let name: PathComponent = n.name.try_into().map_err(DirectoryError::InvalidName)?;
                 let digest = B3Digest::try_from(n.digest)
                     .map_err(|e| DirectoryError::InvalidNode(name.clone(), e.into()))?;
 
@@ -210,7 +220,8 @@ impl Node {
             }
 
             node::Node::Symlink(n) => {
-                let name: PathComponent = n.name.try_into()?;
+                let name: PathComponent = n.name.try_into().map_err(DirectoryError::InvalidName)?;
+
                 let node = crate::Node::Symlink {
                     target: n
                         .target
diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs
index 26c434ab46c0..efbc4e9f2af1 100644
--- a/tvix/castore/src/proto/tests/directory.rs
+++ b/tvix/castore/src/proto/tests/directory.rs
@@ -161,12 +161,9 @@ fn validate_invalid_names() {
             }],
             ..Default::default()
         };
-        match crate::Directory::try_from(d).expect_err("must fail") {
-            DirectoryError::InvalidName(n) => {
-                assert_eq!(n.as_ref(), b"\0")
-            }
-            _ => panic!("unexpected error"),
-        };
+
+        let e = crate::Directory::try_from(d).expect_err("must fail");
+        assert!(matches!(e, DirectoryError::InvalidName(_)));
     }
 
     {
@@ -178,12 +175,8 @@ fn validate_invalid_names() {
             }],
             ..Default::default()
         };
-        match crate::Directory::try_from(d).expect_err("must fail") {
-            DirectoryError::InvalidName(n) => {
-                assert_eq!(n.as_ref(), b".")
-            }
-            _ => panic!("unexpected error"),
-        };
+        let e = crate::Directory::try_from(d).expect_err("must fail");
+        assert!(matches!(e, DirectoryError::InvalidName(_)));
     }
 
     {
@@ -196,12 +189,8 @@ fn validate_invalid_names() {
             }],
             ..Default::default()
         };
-        match crate::Directory::try_from(d).expect_err("must fail") {
-            DirectoryError::InvalidName(n) => {
-                assert_eq!(n.as_ref(), b"..")
-            }
-            _ => panic!("unexpected error"),
-        };
+        let e = crate::Directory::try_from(d).expect_err("must fail");
+        assert!(matches!(e, DirectoryError::InvalidName(_)));
     }
 
     {
@@ -212,12 +201,8 @@ fn validate_invalid_names() {
             }],
             ..Default::default()
         };
-        match crate::Directory::try_from(d).expect_err("must fail") {
-            DirectoryError::InvalidName(n) => {
-                assert_eq!(n.as_ref(), b"\x00")
-            }
-            _ => panic!("unexpected error"),
-        };
+        let e = crate::Directory::try_from(d).expect_err("must fail");
+        assert!(matches!(e, DirectoryError::InvalidName(_)));
     }
 
     {
@@ -228,12 +213,20 @@ fn validate_invalid_names() {
             }],
             ..Default::default()
         };
-        match crate::Directory::try_from(d).expect_err("must fail") {
-            DirectoryError::InvalidName(n) => {
-                assert_eq!(n.as_ref(), b"foo/bar")
-            }
-            _ => panic!("unexpected error"),
+        let e = crate::Directory::try_from(d).expect_err("must fail");
+        assert!(matches!(e, DirectoryError::InvalidName(_)));
+    }
+
+    {
+        let d = Directory {
+            symlinks: vec![SymlinkNode {
+                name: bytes::Bytes::copy_from_slice("X".repeat(500).into_bytes().as_slice()),
+                target: "foo".into(),
+            }],
+            ..Default::default()
         };
+        let e = crate::Directory::try_from(d).expect_err("must fail");
+        assert!(matches!(e, DirectoryError::InvalidName(_)));
     }
 }