diff options
author | Florian Klink <flokli@flokli.de> | 2023-07-18T16·37+0300 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2023-07-21T19·01+0000 |
commit | 72e82ffcb11b1aaf1f1cc8db4189ced5ec0aa42e (patch) | |
tree | fbf51cd1d47df2f3341795fe6bcf8e0a95ccebef /tvix/store/src/proto | |
parent | 638f3e874d5eb6c157ffd065e593ee1a8a14d3e0 (diff) |
refactor(tvix/store): use bytes for node names and symlink targets r/6436
Some paths might use names that are not valid UTF-8. We should be able to represent them. We don't actually need to touch the PathInfo structures, as they need to represent StorePaths, which come with their own harder restrictions, which can't encode non-UTF8 data. While this doesn't change any of the wire format of the gRPC messages, it does however change the interface of tvix_eval::EvalIO - its read_dir() method does now return a list of Vec<u8>, rather than SmolStr. Maybe this should be OsString instead? Change-Id: I821016d9a58ec441ee081b0b9f01c9240723af0b Reviewed-on: https://cl.tvl.fyi/c/depot/+/8974 Autosubmit: flokli <flokli@flokli.de> Reviewed-by: raitobezarius <tvl@lahfa.xyz> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/store/src/proto')
-rw-r--r-- | tvix/store/src/proto/grpc_directoryservice_wrapper.rs | 4 | ||||
-rw-r--r-- | tvix/store/src/proto/mod.rs | 83 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/directory.rs | 60 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/directory_nodes_iterator.rs | 34 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/grpc_directoryservice.rs | 6 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/grpc_pathinfoservice.rs | 4 | ||||
-rw-r--r-- | tvix/store/src/proto/tests/pathinfo.rs | 24 |
7 files changed, 108 insertions, 107 deletions
diff --git a/tvix/store/src/proto/grpc_directoryservice_wrapper.rs b/tvix/store/src/proto/grpc_directoryservice_wrapper.rs index 434d660f3d41..ec9e3cb123eb 100644 --- a/tvix/store/src/proto/grpc_directoryservice_wrapper.rs +++ b/tvix/store/src/proto/grpc_directoryservice_wrapper.rs @@ -116,7 +116,7 @@ impl proto::directory_service_server::DirectoryService for GRPCDirectoryServiceW match seen_directories_sizes.get(&child_directory_digest) { None => { return Err(Status::invalid_argument(format!( - "child directory '{}' ({}) in directory '{}' not seen yet", + "child directory '{:?}' ({}) in directory '{}' not seen yet", child_directory.name, &child_directory_digest, &directory.digest(), @@ -125,7 +125,7 @@ impl proto::directory_service_server::DirectoryService for GRPCDirectoryServiceW Some(seen_child_directory_size) => { if seen_child_directory_size != &child_directory.size { return Err(Status::invalid_argument(format!( - "child directory '{}' ({}) in directory '{}' referred with wrong size, expected {}, actual {}", + "child directory '{:?}' ({}) in directory '{}' referred with wrong size, expected {}, actual {}", child_directory.name, &child_directory_digest, &directory.digest(), diff --git a/tvix/store/src/proto/mod.rs b/tvix/store/src/proto/mod.rs index 7e69726632ce..7e81efc51782 100644 --- a/tvix/store/src/proto/mod.rs +++ b/tvix/store/src/proto/mod.rs @@ -1,6 +1,5 @@ #![allow(clippy::derive_partial_eq_without_eq)] // https://github.com/hyperium/tonic/issues/1056 -use std::str::FromStr; use std::{collections::HashSet, iter::Peekable}; use thiserror::Error; @@ -35,14 +34,14 @@ mod tests; #[derive(Debug, PartialEq, Eq, Error)] pub enum ValidateDirectoryError { /// Elements are not in sorted order - #[error("{0} is not sorted")] - WrongSorting(String), + #[error("{0:?} is not sorted")] + WrongSorting(Vec<u8>), /// Multiple elements with the same name encountered - #[error("{0} is a duplicate name")] - DuplicateName(String), + #[error("{0:?} is a duplicate name")] + DuplicateName(Vec<u8>), /// Invalid name encountered - #[error("Invalid name in {0}")] - InvalidName(String), + #[error("Invalid name in {0:?}")] + InvalidName(Vec<u8>), /// Invalid digest length encountered #[error("Invalid Digest length: {0}")] InvalidDigestLen(usize), @@ -56,8 +55,8 @@ pub enum ValidatePathInfoError { NoNodePresent(), /// Invalid node name encountered. - #[error("Failed to parse {0} as StorePath: {1}")] - InvalidNodeName(String, store_path::Error), + #[error("Failed to parse {0:?} as StorePath: {1}")] + InvalidNodeName(Vec<u8>, store_path::Error), /// The digest the (root) node refers to has invalid length. #[error("Invalid Digest length: {0}")] @@ -73,10 +72,14 @@ pub enum ValidatePathInfoError { /// error that's generated from the supplied constructor. /// /// We disallow slashes, null bytes, '.', '..' and the empty string. -fn validate_node_name<E>(name: &str, err: fn(String) -> E) -> Result<(), E> { - if name.is_empty() || name == ".." || name == "." || name.contains('\x00') || name.contains('/') +fn validate_node_name<E>(name: &[u8], err: fn(Vec<u8>) -> E) -> Result<(), E> { + if name.is_empty() + || name == b".." + || name == b"." + || name.contains(&0x00) + || name.contains(&b'/') { - return Err(err(name.to_string())); + return Err(err(name.to_vec())); } Ok(()) } @@ -95,12 +98,12 @@ fn validate_digest<E>(digest: &Vec<u8>, err: fn(usize) -> E) -> Result<(), E> { /// On success, this returns the parsed [StorePath]. /// On error, it returns an error generated from the supplied constructor. fn parse_node_name_root<E>( - name: &str, - err: fn(String, store_path::Error) -> E, + name: &[u8], + err: fn(Vec<u8>, store_path::Error) -> E, ) -> Result<StorePath, E> { - match StorePath::from_str(name) { + match StorePath::from_bytes(name) { Ok(np) => Ok(np), - Err(e) => Err(err(name.to_string(), e)), + Err(e) => Err(err(name.to_vec(), e)), } } @@ -169,29 +172,29 @@ impl PathInfo { /// NamedNode is implemented for [FileNode], [DirectoryNode] and [SymlinkNode] /// and [node::Node], so we can ask all of them for the name easily. pub trait NamedNode { - fn get_name(&self) -> &str; + fn get_name(&self) -> &[u8]; } impl NamedNode for &FileNode { - fn get_name(&self) -> &str { - self.name.as_str() + fn get_name(&self) -> &[u8] { + &self.name } } impl NamedNode for &DirectoryNode { - fn get_name(&self) -> &str { - self.name.as_str() + fn get_name(&self) -> &[u8] { + &self.name } } impl NamedNode for &SymlinkNode { - fn get_name(&self) -> &str { - self.name.as_str() + fn get_name(&self) -> &[u8] { + &self.name } } impl NamedNode for node::Node { - fn get_name(&self) -> &str { + fn get_name(&self) -> &[u8] { match self { node::Node::File(node_file) => &node_file.name, node::Node::Directory(node_directory) => &node_directory.name, @@ -204,11 +207,11 @@ impl NamedNode for node::Node { /// If the passed name is larger than the previous one, the reference is updated. /// If it's not, an error is returned. fn update_if_lt_prev<'n>( - prev_name: &mut &'n str, - name: &'n str, + prev_name: &mut &'n [u8], + name: &'n [u8], ) -> Result<(), ValidateDirectoryError> { if *name < **prev_name { - return Err(ValidateDirectoryError::WrongSorting(name.to_string())); + return Err(ValidateDirectoryError::WrongSorting(name.to_vec())); } *prev_name = name; Ok(()) @@ -217,11 +220,11 @@ fn update_if_lt_prev<'n>( /// Inserts the given name into a HashSet if it's not already in there. /// If it is, an error is returned. fn insert_once<'n>( - seen_names: &mut HashSet<&'n str>, - name: &'n str, + seen_names: &mut HashSet<&'n [u8]>, + name: &'n [u8], ) -> Result<(), ValidateDirectoryError> { if seen_names.get(name).is_some() { - return Err(ValidateDirectoryError::DuplicateName(name.to_string())); + return Err(ValidateDirectoryError::DuplicateName(name.to_vec())); } seen_names.insert(name); Ok(()) @@ -258,11 +261,11 @@ impl Directory { /// - not properly sorted lists /// - duplicate names in the three lists pub fn validate(&self) -> Result<(), ValidateDirectoryError> { - let mut seen_names: HashSet<&str> = HashSet::new(); + let mut seen_names: HashSet<&[u8]> = HashSet::new(); - let mut last_directory_name: &str = ""; - let mut last_file_name: &str = ""; - let mut last_symlink_name: &str = ""; + let mut last_directory_name: &[u8] = b""; + let mut last_file_name: &[u8] = b""; + let mut last_symlink_name: &[u8] = b""; // check directories for directory_node in &self.directories { @@ -272,8 +275,8 @@ impl Directory { ValidateDirectoryError::InvalidDigestLen, )?; - update_if_lt_prev(&mut last_directory_name, directory_node.name.as_str())?; - insert_once(&mut seen_names, directory_node.name.as_str())?; + update_if_lt_prev(&mut last_directory_name, &directory_node.name)?; + insert_once(&mut seen_names, &directory_node.name)?; } // check files @@ -281,16 +284,16 @@ impl Directory { validate_node_name(&file_node.name, ValidateDirectoryError::InvalidName)?; validate_digest(&file_node.digest, ValidateDirectoryError::InvalidDigestLen)?; - update_if_lt_prev(&mut last_file_name, file_node.name.as_str())?; - insert_once(&mut seen_names, file_node.name.as_str())?; + update_if_lt_prev(&mut last_file_name, &file_node.name)?; + insert_once(&mut seen_names, &file_node.name)?; } // check symlinks for symlink_node in &self.symlinks { validate_node_name(&symlink_node.name, ValidateDirectoryError::InvalidName)?; - update_if_lt_prev(&mut last_symlink_name, symlink_node.name.as_str())?; - insert_once(&mut seen_names, symlink_node.name.as_str())?; + update_if_lt_prev(&mut last_symlink_name, &symlink_node.name)?; + insert_once(&mut seen_names, &symlink_node.name)?; } Ok(()) diff --git a/tvix/store/src/proto/tests/directory.rs b/tvix/store/src/proto/tests/directory.rs index 8d6ca7241d7a..48eeaa7b5fb4 100644 --- a/tvix/store/src/proto/tests/directory.rs +++ b/tvix/store/src/proto/tests/directory.rs @@ -20,7 +20,7 @@ fn size() { { let d = Directory { directories: vec![DirectoryNode { - name: String::from("foo"), + name: "foo".into(), digest: DUMMY_DIGEST.to_vec(), size: 0, }], @@ -31,7 +31,7 @@ fn size() { { let d = Directory { directories: vec![DirectoryNode { - name: String::from("foo"), + name: "foo".into(), digest: DUMMY_DIGEST.to_vec(), size: 4, }], @@ -42,7 +42,7 @@ fn size() { { let d = Directory { files: vec![FileNode { - name: String::from("foo"), + name: "foo".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, executable: false, @@ -54,8 +54,8 @@ fn size() { { let d = Directory { symlinks: vec![SymlinkNode { - name: String::from("foo"), - target: String::from("bar"), + name: "foo".into(), + target: "bar".into(), }], ..Default::default() }; @@ -89,7 +89,7 @@ fn validate_invalid_names() { { let d = Directory { directories: vec![DirectoryNode { - name: "".to_string(), + name: "".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }], @@ -97,7 +97,7 @@ fn validate_invalid_names() { }; match d.validate().expect_err("must fail") { ValidateDirectoryError::InvalidName(n) => { - assert_eq!(n, "") + assert_eq!(n, b"") } _ => panic!("unexpected error"), }; @@ -106,7 +106,7 @@ fn validate_invalid_names() { { let d = Directory { directories: vec![DirectoryNode { - name: ".".to_string(), + name: ".".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }], @@ -114,7 +114,7 @@ fn validate_invalid_names() { }; match d.validate().expect_err("must fail") { ValidateDirectoryError::InvalidName(n) => { - assert_eq!(n, ".") + assert_eq!(n, b".") } _ => panic!("unexpected error"), }; @@ -123,7 +123,7 @@ fn validate_invalid_names() { { let d = Directory { files: vec![FileNode { - name: "..".to_string(), + name: "..".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, executable: false, @@ -132,7 +132,7 @@ fn validate_invalid_names() { }; match d.validate().expect_err("must fail") { ValidateDirectoryError::InvalidName(n) => { - assert_eq!(n, "..") + assert_eq!(n, b"..") } _ => panic!("unexpected error"), }; @@ -141,14 +141,14 @@ fn validate_invalid_names() { { let d = Directory { symlinks: vec![SymlinkNode { - name: "\x00".to_string(), - target: "foo".to_string(), + name: "\x00".into(), + target: "foo".into(), }], ..Default::default() }; match d.validate().expect_err("must fail") { ValidateDirectoryError::InvalidName(n) => { - assert_eq!(n, "\x00") + assert_eq!(n, b"\x00") } _ => panic!("unexpected error"), }; @@ -157,14 +157,14 @@ fn validate_invalid_names() { { let d = Directory { symlinks: vec![SymlinkNode { - name: "foo/bar".to_string(), - target: "foo".to_string(), + name: "foo/bar".into(), + target: "foo".into(), }], ..Default::default() }; match d.validate().expect_err("must fail") { ValidateDirectoryError::InvalidName(n) => { - assert_eq!(n, "foo/bar") + assert_eq!(n, b"foo/bar") } _ => panic!("unexpected error"), }; @@ -175,7 +175,7 @@ fn validate_invalid_names() { fn validate_invalid_digest() { let d = Directory { directories: vec![DirectoryNode { - name: "foo".to_string(), + name: "foo".into(), digest: vec![0x00, 0x42], // invalid length size: 42, }], @@ -196,12 +196,12 @@ fn validate_sorting() { let d = Directory { directories: vec![ DirectoryNode { - name: "b".to_string(), + name: "b".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }, DirectoryNode { - name: "a".to_string(), + name: "a".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }, @@ -210,7 +210,7 @@ fn validate_sorting() { }; match d.validate().expect_err("must fail") { ValidateDirectoryError::WrongSorting(s) => { - assert_eq!(s, "a".to_string()); + assert_eq!(s, b"a"); } _ => panic!("unexpected error"), } @@ -221,12 +221,12 @@ fn validate_sorting() { let d = Directory { directories: vec![ DirectoryNode { - name: "a".to_string(), + name: "a".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }, DirectoryNode { - name: "a".to_string(), + name: "a".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }, @@ -235,7 +235,7 @@ fn validate_sorting() { }; match d.validate().expect_err("must fail") { ValidateDirectoryError::DuplicateName(s) => { - assert_eq!(s, "a".to_string()); + assert_eq!(s, b"a"); } _ => panic!("unexpected error"), } @@ -246,12 +246,12 @@ fn validate_sorting() { let d = Directory { directories: vec![ DirectoryNode { - name: "a".to_string(), + name: "a".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }, DirectoryNode { - name: "b".to_string(), + name: "b".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }, @@ -267,19 +267,19 @@ fn validate_sorting() { let d = Directory { directories: vec![ DirectoryNode { - name: "b".to_string(), + name: "b".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }, DirectoryNode { - name: "c".to_string(), + name: "c".into(), digest: DUMMY_DIGEST.to_vec(), size: 42, }, ], symlinks: vec![SymlinkNode { - name: "a".to_string(), - target: "foo".to_string(), + name: "a".into(), + target: "foo".into(), }], ..Default::default() }; diff --git a/tvix/store/src/proto/tests/directory_nodes_iterator.rs b/tvix/store/src/proto/tests/directory_nodes_iterator.rs index 9a283f72bd45..68f147a33210 100644 --- a/tvix/store/src/proto/tests/directory_nodes_iterator.rs +++ b/tvix/store/src/proto/tests/directory_nodes_iterator.rs @@ -1,7 +1,7 @@ -use crate::proto::node::Node; use crate::proto::Directory; use crate::proto::DirectoryNode; use crate::proto::FileNode; +use crate::proto::NamedNode; use crate::proto::SymlinkNode; #[test] @@ -9,68 +9,66 @@ fn iterator() { let d = Directory { directories: vec![ DirectoryNode { - name: "c".to_string(), + name: "c".into(), ..DirectoryNode::default() }, DirectoryNode { - name: "d".to_string(), + name: "d".into(), ..DirectoryNode::default() }, DirectoryNode { - name: "h".to_string(), + name: "h".into(), ..DirectoryNode::default() }, DirectoryNode { - name: "l".to_string(), + name: "l".into(), ..DirectoryNode::default() }, ], files: vec![ FileNode { - name: "b".to_string(), + name: "b".into(), ..FileNode::default() }, FileNode { - name: "e".to_string(), + name: "e".into(), ..FileNode::default() }, FileNode { - name: "g".to_string(), + name: "g".into(), ..FileNode::default() }, FileNode { - name: "j".to_string(), + name: "j".into(), ..FileNode::default() }, ], symlinks: vec![ SymlinkNode { - name: "a".to_string(), + name: "a".into(), ..SymlinkNode::default() }, SymlinkNode { - name: "f".to_string(), + name: "f".into(), ..SymlinkNode::default() }, SymlinkNode { - name: "i".to_string(), + name: "i".into(), ..SymlinkNode::default() }, SymlinkNode { - name: "k".to_string(), + name: "k".into(), ..SymlinkNode::default() }, ], }; + // We keep this strings here and convert to string to make the comparison + // less messy. let mut node_names: Vec<String> = vec![]; for node in d.nodes() { - match node { - Node::Directory(n) => node_names.push(n.name), - Node::File(n) => node_names.push(n.name), - Node::Symlink(n) => node_names.push(n.name), - }; + node_names.push(String::from_utf8(node.get_name().to_vec()).unwrap()); } assert_eq!( diff --git a/tvix/store/src/proto/tests/grpc_directoryservice.rs b/tvix/store/src/proto/tests/grpc_directoryservice.rs index a1058706d521..73ce0082d3ca 100644 --- a/tvix/store/src/proto/tests/grpc_directoryservice.rs +++ b/tvix/store/src/proto/tests/grpc_directoryservice.rs @@ -190,8 +190,8 @@ async fn put_reject_failed_validation() { // construct a broken Directory message that fails validation let broken_directory = Directory { symlinks: vec![SymlinkNode { - name: "".to_string(), - target: "doesntmatter".to_string(), + name: "".into(), + target: "doesntmatter".into(), }], ..Default::default() }; @@ -214,7 +214,7 @@ async fn put_reject_wrong_size() { // Construct a directory referring to DIRECTORY_A, but with wrong size. let broken_parent_directory = Directory { directories: vec![DirectoryNode { - name: "foo".to_string(), + name: "foo".into(), digest: DIRECTORY_A.digest().to_vec(), size: 42, }], diff --git a/tvix/store/src/proto/tests/grpc_pathinfoservice.rs b/tvix/store/src/proto/tests/grpc_pathinfoservice.rs index 186461d16528..57c88c2863b1 100644 --- a/tvix/store/src/proto/tests/grpc_pathinfoservice.rs +++ b/tvix/store/src/proto/tests/grpc_pathinfoservice.rs @@ -48,8 +48,8 @@ async fn put_get() { let path_info = PathInfo { node: Some(Node { node: Some(Symlink(SymlinkNode { - name: "00000000000000000000000000000000-foo".to_string(), - target: "doesntmatter".to_string(), + name: "00000000000000000000000000000000-foo".into(), + target: "doesntmatter".into(), })), }), ..Default::default() diff --git a/tvix/store/src/proto/tests/pathinfo.rs b/tvix/store/src/proto/tests/pathinfo.rs index bccec539f9f3..a14554ee4fd3 100644 --- a/tvix/store/src/proto/tests/pathinfo.rs +++ b/tvix/store/src/proto/tests/pathinfo.rs @@ -43,7 +43,7 @@ fn validate_no_node( #[test_case( proto::DirectoryNode { - name: DUMMY_NAME.to_string(), + name: DUMMY_NAME.into(), digest: DUMMY_DIGEST.to_vec(), size: 0, }, @@ -52,7 +52,7 @@ fn validate_no_node( )] #[test_case( proto::DirectoryNode { - name: DUMMY_NAME.to_string(), + name: DUMMY_NAME.into(), digest: vec![], size: 0, }, @@ -61,12 +61,12 @@ fn validate_no_node( )] #[test_case( proto::DirectoryNode { - name: "invalid".to_string(), + name: "invalid".into(), digest: DUMMY_DIGEST.to_vec(), size: 0, }, Err(ValidatePathInfoError::InvalidNodeName( - "invalid".to_string(), + "invalid".into(), store_path::Error::InvalidLength() )); "invalid node name" @@ -87,7 +87,7 @@ fn validate_directory( #[test_case( proto::FileNode { - name: DUMMY_NAME.to_string(), + name: DUMMY_NAME.into(), digest: DUMMY_DIGEST.to_vec(), size: 0, executable: false, @@ -97,7 +97,7 @@ fn validate_directory( )] #[test_case( proto::FileNode { - name: DUMMY_NAME.to_string(), + name: DUMMY_NAME.into(), digest: vec![], ..Default::default() }, @@ -106,12 +106,12 @@ fn validate_directory( )] #[test_case( proto::FileNode { - name: "invalid".to_string(), + name: "invalid".into(), digest: DUMMY_DIGEST.to_vec(), ..Default::default() }, Err(ValidatePathInfoError::InvalidNodeName( - "invalid".to_string(), + "invalid".into(), store_path::Error::InvalidLength() )); "invalid node name" @@ -129,7 +129,7 @@ fn validate_file(t_file_node: proto::FileNode, t_result: Result<StorePath, Valid #[test_case( proto::SymlinkNode { - name: DUMMY_NAME.to_string(), + name: DUMMY_NAME.into(), ..Default::default() }, Ok(StorePath::from_str(DUMMY_NAME).expect("must succeed")); @@ -137,11 +137,11 @@ fn validate_file(t_file_node: proto::FileNode, t_result: Result<StorePath, Valid )] #[test_case( proto::SymlinkNode { - name: "invalid".to_string(), + name: "invalid".into(), ..Default::default() }, Err(ValidatePathInfoError::InvalidNodeName( - "invalid".to_string(), + "invalid".into(), store_path::Error::InvalidLength() )); "invalid node name" @@ -166,7 +166,7 @@ fn validate_references() { let path_info = PathInfo { node: Some(Node { node: Some(proto::node::Node::Directory(proto::DirectoryNode { - name: DUMMY_NAME.to_string(), + name: DUMMY_NAME.into(), digest: DUMMY_DIGEST.to_vec(), size: 0, })), |