From 0cfe2aaf6a412e495e63372c6e3f01039f371f90 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Sat, 17 Aug 2024 18:23:44 +0300 Subject: feat(tvix/castore/proto): add owned conv to castore::Directory Replace the hand-rolled code comparing names with a try_fold. Also, make it slightly stricter here, detecting duplicates in the same fields earlier. Change-Id: I9c560838ece88c3b8b339249a8ecbf3b05969538 Reviewed-on: https://cl.tvl.fyi/c/depot/+/12226 Autosubmit: flokli Reviewed-by: edef Tested-by: BuildkiteCI --- tvix/castore/src/proto/mod.rs | 121 +++++++++++++++--------------- tvix/castore/src/proto/tests/directory.rs | 28 ++++++- 2 files changed, 84 insertions(+), 65 deletions(-) (limited to 'tvix/castore') diff --git a/tvix/castore/src/proto/mod.rs b/tvix/castore/src/proto/mod.rs index a533c1c4f195..a40a67f53cf3 100644 --- a/tvix/castore/src/proto/mod.rs +++ b/tvix/castore/src/proto/mod.rs @@ -1,4 +1,4 @@ -use std::str; +use std::{cmp::Ordering, str}; use prost::Message; @@ -66,79 +66,76 @@ impl Directory { } } -/// Accepts a name, and a mutable reference to the previous name. -/// If the passed name is larger than the previous one, the reference is updated. -/// If it's not, an error is returned. -fn update_if_lt_prev<'n>(prev_name: &mut &'n [u8], name: &'n [u8]) -> Result<(), DirectoryError> { - if *name < **prev_name { - return Err(DirectoryError::WrongSorting(bytes::Bytes::copy_from_slice( - name, - ))); - } - *prev_name = name; - Ok(()) -} - -// TODO: add a proper owned version here that moves various fields impl TryFrom for crate::Directory { type Error = DirectoryError; fn try_from(value: Directory) -> Result { - (&value).try_into() - } -} - -impl TryFrom<&Directory> for crate::Directory { - type Error = DirectoryError; - - fn try_from(directory: &Directory) -> Result { - let mut dir = crate::Directory::new(); - - let mut last_file_name: &[u8] = b""; - - // TODO: this currently loops over all three types separately, rather - // than peeking and picking from where would be the next. - - for file in directory.files.iter().map(move |file| { - update_if_lt_prev(&mut last_file_name, &file.name).map(|()| file.clone()) - }) { - let file = file?; - - let (name, node) = Node { - node: Some(node::Node::File(file)), + // Check directories, files and symlinks are sorted + // We'll notice duplicates across all three fields when constructing the Directory. + // FUTUREWORK: use is_sorted() once stable, and/or implement the producer for + // [crate::Directory::try_from_iter] iterating over all three and doing all checks inline. + value + .directories + .iter() + .try_fold(&b""[..], |prev_name, e| { + match e.name.as_ref().cmp(prev_name) { + Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), + Ordering::Equal => { + Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?)) + } + Ordering::Greater => Ok(e.name.as_ref()), + } + })?; + value.files.iter().try_fold(&b""[..], |prev_name, e| { + match e.name.as_ref().cmp(prev_name) { + Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), + Ordering::Equal => { + Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?)) + } + Ordering::Greater => Ok(e.name.as_ref()), } - .into_name_and_node()?; - - dir.add(name, node)?; - } - let mut last_directory_name: &[u8] = b""; - for directory in directory.directories.iter().map(move |directory| { - update_if_lt_prev(&mut last_directory_name, &directory.name).map(|()| directory.clone()) - }) { - let directory = directory?; - - let (name, node) = Node { - node: Some(node::Node::Directory(directory)), + })?; + value.symlinks.iter().try_fold(&b""[..], |prev_name, e| { + match e.name.as_ref().cmp(prev_name) { + Ordering::Less => Err(DirectoryError::WrongSorting(e.name.to_owned())), + Ordering::Equal => { + Err(DirectoryError::DuplicateName(e.name.to_owned().try_into()?)) + } + Ordering::Greater => Ok(e.name.as_ref()), } - .into_name_and_node()?; + })?; - dir.add(name, node)?; + let mut elems: Vec<(PathComponent, crate::Node)> = + Vec::with_capacity(value.directories.len() + value.files.len() + value.symlinks.len()); + + for e in value.directories { + elems.push( + Node { + node: Some(node::Node::Directory(e)), + } + .into_name_and_node()?, + ); } - let mut last_symlink_name: &[u8] = b""; - for symlink in directory.symlinks.iter().map(move |symlink| { - update_if_lt_prev(&mut last_symlink_name, &symlink.name).map(|()| symlink.clone()) - }) { - let symlink = symlink?; - let (name, node) = Node { - node: Some(node::Node::Symlink(symlink)), - } - .into_name_and_node()?; + for e in value.files { + elems.push( + Node { + node: Some(node::Node::File(e)), + } + .into_name_and_node()?, + ) + } - dir.add(name, node)?; + for e in value.symlinks { + elems.push( + Node { + node: Some(node::Node::Symlink(e)), + } + .into_name_and_node()?, + ) } - Ok(dir) + crate::Directory::try_from_iter(elems) } } diff --git a/tvix/castore/src/proto/tests/directory.rs b/tvix/castore/src/proto/tests/directory.rs index 1e7cdb89c118..26c434ab46c0 100644 --- a/tvix/castore/src/proto/tests/directory.rs +++ b/tvix/castore/src/proto/tests/directory.rs @@ -155,7 +155,7 @@ fn validate_invalid_names() { { let d = Directory { directories: vec![DirectoryNode { - name: "".into(), + name: b"\0"[..].into(), digest: DUMMY_DIGEST.to_vec().into(), size: 42, }], @@ -163,7 +163,7 @@ fn validate_invalid_names() { }; match crate::Directory::try_from(d).expect_err("must fail") { DirectoryError::InvalidName(n) => { - assert_eq!(n.as_ref(), b"") + assert_eq!(n.as_ref(), b"\0") } _ => panic!("unexpected error"), }; @@ -282,7 +282,7 @@ fn validate_sorting() { } } - // "a" exists twice, bad. + // "a" exists twice (same types), bad. { let d = Directory { directories: vec![ @@ -307,6 +307,28 @@ fn validate_sorting() { } } + // "a" exists twice (different types), bad. + { + let d = Directory { + directories: vec![DirectoryNode { + name: "a".into(), + digest: DUMMY_DIGEST.to_vec().into(), + size: 42, + }], + symlinks: vec![SymlinkNode { + name: "a".into(), + target: "b".into(), + }], + ..Default::default() + }; + match crate::Directory::try_from(d).expect_err("must fail") { + DirectoryError::DuplicateName(s) => { + assert_eq!(s.as_ref(), b"a"); + } + _ => panic!("unexpected error"), + } + } + // "a" comes before "b", all good. { let d = Directory { -- cgit 1.4.1