From 4033d4c50f833f56c07da519827b331a521f8d0e Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Wed, 1 May 2024 23:10:12 +0300 Subject: feat(tvix/castore/path): implement PathBuf::from_host_path This allows converting from std::path::Path to castore PathBufs. A flag is present to control .. canonicalization, and the usual caveats about platform-specific differences apply. Currently only added for unix, we'll carefully consider other platforms on a case-by-case basis. Change-Id: If289a92f75a2e5c3eec132b6a91a28d225fc1989 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11577 Reviewed-by: edef Tested-by: BuildkiteCI Autosubmit: flokli --- tvix/castore/src/path.rs | 106 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 105 insertions(+), 1 deletion(-) (limited to 'tvix/castore/src') diff --git a/tvix/castore/src/path.rs b/tvix/castore/src/path.rs index 68e4b75ec812..fcc2bd01fbd6 100644 --- a/tvix/castore/src/path.rs +++ b/tvix/castore/src/path.rs @@ -227,6 +227,71 @@ impl PathBuf { PathBuf { inner: bytes } } + /// Convert from a [&std::path::Path] to [Self]. + /// + /// - Self uses `/` as path separator. + /// - Absolute paths are always rejected, are are these with custom prefixes. + /// - Repeated separators are deduplicated. + /// - Occurrences of `.` are normalized away. + /// - A trailing slash is normalized away. + /// + /// A `canonicalize_dotdot` boolean controls whether `..` will get + /// canonicalized if possible, or should return an error. + /// + /// For more exotic paths, this conversion might produce different results + /// on different platforms, due to different underlying byte + /// representations, which is why it's restricted to unix for now. + #[cfg(unix)] + pub fn from_host_path( + host_path: &std::path::Path, + canonicalize_dotdot: bool, + ) -> Result { + let mut p = PathBuf::with_capacity(host_path.as_os_str().len()); + + for component in host_path.components() { + match component { + std::path::Component::Prefix(_) | std::path::Component::RootDir => { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "found disallowed prefix or rootdir", + )) + } + std::path::Component::CurDir => continue, // ignore + std::path::Component::ParentDir => { + if canonicalize_dotdot { + // Try popping the last element from the path being constructed. + // FUTUREWORK: pop method? + p = p + .parent() + .ok_or_else(|| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "found .. going too far up", + ) + })? + .to_owned(); + } else { + return Err(std::io::Error::new( + std::io::ErrorKind::InvalidData, + "found disallowed ..", + )); + } + } + std::path::Component::Normal(s) => { + // append the new component to the path being constructed. + p.try_push(s.as_encoded_bytes()).map_err(|_| { + std::io::Error::new( + std::io::ErrorKind::InvalidData, + "encountered invalid node in sub_path component", + ) + })? + } + } + } + + Ok(p) + } + pub fn into_boxed_path(self) -> Box { // SAFETY: Box<[u8]> and Box have the same representation, // and PathBuf always contains a valid Path. @@ -257,7 +322,7 @@ mod test { // but maybe we want to disallow constructing paths like this as it's a // bad idea. #[case::cursed("C:\\a/b", 2)] - #[case::cursed("\\tvix-store", 1)] + #[case::cursed("\\\\tvix-store", 1)] pub fn from_str(#[case] s: &str, #[case] num_components: usize) { let p: PathBuf = s.parse().expect("must parse"); @@ -339,4 +404,43 @@ mod test { .collect::>() ); } + + #[rstest] + #[case::empty("", "", false)] + #[case::path("a", "a", false)] + #[case::path2("a/b", "a/b", false)] + #[case::double_slash_middle("a//b", "a/b", false)] + #[case::dot(".", "", false)] + #[case::dot_start("./a/b", "a/b", false)] + #[case::dot_middle("a/./b", "a/b", false)] + #[case::dot_end("a/b/.", "a/b", false)] + #[case::trailing_slash("a/b/", "a/b", false)] + #[case::dotdot_canonicalize("a/..", "", true)] + #[case::dotdot_canonicalize2("a/../b", "b", true)] + #[cfg_attr(unix, case::faux_prefix("\\\\nix-store", "\\\\nix-store", false))] + #[cfg_attr(unix, case::faux_letter("C:\\foo.txt", "C:\\foo.txt", false))] + pub fn from_host_path( + #[case] host_path: std::path::PathBuf, + #[case] exp_path: PathBuf, + #[case] canonicalize_dotdot: bool, + ) { + let p = PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect("must succeed"); + + assert_eq!(exp_path, p); + } + + #[rstest] + #[case::absolute("/", false)] + #[case::dotdot_root("..", false)] + #[case::dotdot_root_canonicalize("..", true)] + #[case::dotdot_root_no_canonicalize("a/..", false)] + #[case::invalid_name("foo/bar\0", false)] + // #[cfg_attr(windows, case::prefix("\\\\nix-store", false))] + // #[cfg_attr(windows, case::letter("C:\\foo.txt", false))] + pub fn from_host_path_fail( + #[case] host_path: std::path::PathBuf, + #[case] canonicalize_dotdot: bool, + ) { + PathBuf::from_host_path(&host_path, canonicalize_dotdot).expect_err("must fail"); + } } -- cgit 1.4.1