From 8181817e53f4c0da5d7dfa1a56e296fefd929bcb Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Mon, 22 Apr 2024 20:05:49 +0300 Subject: feat(tvix/glue/fetchers): support file:// URLs Nix supports file:// - URLs for `fetchurl` and `fetchTarball`. Convert the enums and function arguments to hold a URL type. reqwest::Url is a re-export of the url crate, but they don't re-export the parsing errors, and as we want to hold these in our Error types, add it to Cargo.toml explicitly. The Fetcher::download function now checks on the scheme, and either opens the file locally, or does do a HTTP request as before. Fetch gets its custom debug impl, removing potentially sensitive username and password out of URLs. Change-Id: I777db1fe487370e822cbfec4624034aca5e08045 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11504 Autosubmit: flokli Reviewed-by: raitobezarius Tested-by: BuildkiteCI --- tvix/Cargo.lock | 1 + tvix/Cargo.nix | 4 + tvix/glue/Cargo.toml | 1 + tvix/glue/src/builtins/errors.rs | 6 +- tvix/glue/src/builtins/fetchers.rs | 36 +++++--- tvix/glue/src/fetchers.rs | 100 ++++++++++++++++----- tvix/glue/src/known_paths.rs | 5 +- .../src/tests/tvix_tests/eval-okay-fetchurl.exp | 2 +- .../src/tests/tvix_tests/eval-okay-fetchurl.nix | 6 +- 9 files changed, 124 insertions(+), 37 deletions(-) diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock index b2be20a897b6..334b69b7f580 100644 --- a/tvix/Cargo.lock +++ b/tvix/Cargo.lock @@ -4415,6 +4415,7 @@ dependencies = [ "tvix-castore", "tvix-eval", "tvix-store", + "url", "walkdir", "wu-manber", ] diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix index 25e5dccfd7cf..51d47b05e34f 100644 --- a/tvix/Cargo.nix +++ b/tvix/Cargo.nix @@ -14174,6 +14174,10 @@ rec { packageId = "tvix-store"; usesDefaultFeatures = false; } + { + name = "url"; + packageId = "url"; + } { name = "walkdir"; packageId = "walkdir"; diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml index f28011767419..f929d720a0e7 100644 --- a/tvix/glue/Cargo.toml +++ b/tvix/glue/Cargo.toml @@ -27,6 +27,7 @@ serde_json = "1.0" sha2 = "0.10.8" sha1 = "0.10.6" md-5 = "0.10.6" +url = "2.4.0" walkdir = "2.4.0" [dependencies.async-compression] diff --git a/tvix/glue/src/builtins/errors.rs b/tvix/glue/src/builtins/errors.rs index 5aced2bde43b..5e36bc1a243f 100644 --- a/tvix/glue/src/builtins/errors.rs +++ b/tvix/glue/src/builtins/errors.rs @@ -3,6 +3,7 @@ use nix_compat::{ nixhash::{self, NixHash}, store_path::BuildStorePathError, }; +use reqwest::Url; use std::rc::Rc; use thiserror::Error; @@ -33,7 +34,7 @@ impl From for tvix_eval::ErrorKind { pub enum FetcherError { #[error("hash mismatch in file downloaded from {url}:\n wanted: {wanted}\n got: {got}")] HashMismatch { - url: String, + url: Url, wanted: NixHash, got: NixHash, }, @@ -41,6 +42,9 @@ pub enum FetcherError { #[error("Invalid hash type '{0}' for fetcher")] InvalidHashType(&'static str), + #[error("Unable to parse URL: {0}")] + InvalidUrl(#[from] url::ParseError), + #[error(transparent)] Http(#[from] reqwest::Error), diff --git a/tvix/glue/src/builtins/fetchers.rs b/tvix/glue/src/builtins/fetchers.rs index 0de3d5462561..c7602c03e81f 100644 --- a/tvix/glue/src/builtins/fetchers.rs +++ b/tvix/glue/src/builtins/fetchers.rs @@ -1,4 +1,4 @@ -//! Contains builtins that fetch paths from the Internet +//! Contains builtins that fetch paths from the Internet, or local filesystem. use super::utils::select_string; use crate::{ @@ -15,7 +15,7 @@ use tvix_eval::generators::GenCo; use tvix_eval::{CatchableErrorKind, ErrorKind, Value}; struct NixFetchArgs { - url: String, + url_str: String, name: Option, sha256: Option<[u8; 32]>, } @@ -26,11 +26,12 @@ async fn extract_fetch_args( co: &GenCo, args: Value, ) -> Result, ErrorKind> { - if let Ok(url) = args.to_str() { + if let Ok(url_str) = args.to_str() { // Get the raw bytes, not the ToString repr. - let url = String::from_utf8(url.as_bytes().to_vec()).map_err(|_| ErrorKind::Utf8)?; + let url_str = + String::from_utf8(url_str.as_bytes().to_vec()).map_err(|_| ErrorKind::Utf8)?; return Ok(Ok(NixFetchArgs { - url, + url_str, name: None, sha256: None, })); @@ -41,7 +42,7 @@ async fn extract_fetch_args( actual: args.type_of(), })?; - let url = match select_string(co, &attrs, "url").await? { + let url_str = match select_string(co, &attrs, "url").await? { Ok(s) => s.ok_or_else(|| ErrorKind::AttributeNotFound { name: "url".into() })?, Err(cek) => return Ok(Err(cek)), }; @@ -68,12 +69,19 @@ async fn extract_fetch_args( None => None, }; - Ok(Ok(NixFetchArgs { url, name, sha256 })) + Ok(Ok(NixFetchArgs { + url_str, + name, + sha256, + })) } #[allow(unused_variables)] // for the `state` arg, for now #[builtins(state = "Rc")] pub(crate) mod fetcher_builtins { + use crate::builtins::FetcherError; + use url::Url; + use super::*; /// Consumes a fetch. @@ -130,12 +138,16 @@ pub(crate) mod fetcher_builtins { // Derive the name from the URL basename if not set explicitly. let name = args .name - .unwrap_or_else(|| url_basename(&args.url).to_owned()); + .unwrap_or_else(|| url_basename(&args.url_str).to_owned()); + + // Parse the URL. + let url = Url::parse(&args.url_str) + .map_err(|e| ErrorKind::TvixError(Rc::new(FetcherError::InvalidUrl(e))))?; fetch_lazy( state, name, - Fetch::URL(args.url, args.sha256.map(NixHash::Sha256)), + Fetch::URL(url, args.sha256.map(NixHash::Sha256)), ) } @@ -156,7 +168,11 @@ pub(crate) mod fetcher_builtins { .name .unwrap_or_else(|| DEFAULT_NAME_FETCH_TARBALL.to_owned()); - fetch_lazy(state, name, Fetch::Tarball(args.url, args.sha256)) + // Parse the URL. + let url = Url::parse(&args.url_str) + .map_err(|e| ErrorKind::TvixError(Rc::new(FetcherError::InvalidUrl(e))))?; + + fetch_lazy(state, name, Fetch::Tarball(url, args.sha256)) } #[builtin("fetchGit")] diff --git a/tvix/glue/src/fetchers.rs b/tvix/glue/src/fetchers.rs index ee0111c88f4f..0328f007e006 100644 --- a/tvix/glue/src/fetchers.rs +++ b/tvix/glue/src/fetchers.rs @@ -15,16 +15,17 @@ use tvix_castore::{ proto::{node::Node, FileNode}, }; use tvix_store::{pathinfoservice::PathInfoService, proto::PathInfo}; +use url::Url; use crate::{builtins::FetcherError, decompression::DecompressedReader}; /// Representing options for doing a fetch. -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Eq, PartialEq)] pub enum Fetch { /// Fetch a literal file from the given URL, with an optional expected /// NixHash of it. /// TODO: check if this is *always* sha256, and if so, make it [u8; 32]. - URL(String, Option), + URL(Url, Option), /// Fetch a tarball from the given URL and unpack. /// The file must be a tape archive (.tar) compressed with gzip, bzip2 or xz. @@ -32,12 +33,55 @@ pub enum Fetch { /// so it is best if the tarball contains a single directory at top level. /// Optionally, a sha256 digest can be provided to verify the unpacked /// contents against. - Tarball(String, Option<[u8; 32]>), + Tarball(Url, Option<[u8; 32]>), /// TODO Git(), } +// Drops potentially sensitive username and password from a URL. +fn redact_url(url: &Url) -> Url { + let mut url = url.to_owned(); + if !url.username().is_empty() { + let _ = url.set_username("redacted"); + } + + if url.password().is_some() { + let _ = url.set_password(Some("redacted")); + } + + url +} + +impl std::fmt::Debug for Fetch { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Fetch::URL(url, nixhash) => { + let url = redact_url(url); + if let Some(nixhash) = nixhash { + write!(f, "URL [url: {}, exp_hash: Some({})]", &url, nixhash) + } else { + write!(f, "URL [url: {}, exp_hash: None]", &url) + } + } + Fetch::Tarball(url, exp_digest) => { + let url = redact_url(url); + if let Some(exp_digest) = exp_digest { + write!( + f, + "Tarball [url: {}, exp_hash: Some({})]", + url, + NixHash::Sha256(*exp_digest) + ) + } else { + write!(f, "Tarball [url: {}, exp_hash: None]", url) + } + } + Fetch::Git() => todo!(), + } + } +} + impl Fetch { /// If the [Fetch] contains an expected hash upfront, returns the resulting /// store path. @@ -76,18 +120,32 @@ impl Fetcher { } /// Constructs a HTTP request to the passed URL, and returns a AsyncReadBuf to it. - async fn download<'a>( - &self, - url: &str, - ) -> Result { - let resp = self.http_client.get(url).send().await?; - Ok(tokio_util::io::StreamReader::new( - resp.bytes_stream().map_err(|e| { - let e = e.without_url(); - warn!(%e, "failed to get response body"); - std::io::Error::new(std::io::ErrorKind::BrokenPipe, e) - }), - )) + /// In case the URI uses the file:// scheme, use tokio::fs to open it. + async fn download(&self, url: Url) -> Result, FetcherError> { + match url.scheme() { + "file" => { + let f = tokio::fs::File::open(url.to_file_path().map_err(|_| { + // "Returns Err if the host is neither empty nor "localhost" + // (except on Windows, where file: URLs may have a non-local host)" + FetcherError::Io(std::io::Error::new( + std::io::ErrorKind::Other, + "invalid host for file:// scheme", + )) + })?) + .await?; + Ok(Box::new(tokio::io::BufReader::new(f))) + } + _ => { + let resp = self.http_client.get(url).send().await?; + Ok(Box::new(tokio_util::io::StreamReader::new( + resp.bytes_stream().map_err(|e| { + let e = e.without_url(); + warn!(%e, "failed to get response body"); + std::io::Error::new(std::io::ErrorKind::BrokenPipe, e) + }), + ))) + } + } } } @@ -122,7 +180,7 @@ where match fetch { Fetch::URL(url, exp_nixhash) => { // Construct a AsyncRead reading from the data as its downloaded. - let mut r = self.download(&url).await?; + let mut r = self.download(url.clone()).await?; // Construct a AsyncWrite to write into the BlobService. let mut blob_writer = self.blob_service.open_write().await; @@ -175,7 +233,7 @@ where } Fetch::Tarball(url, exp_nar_sha256) => { // Construct a AsyncRead reading from the data as its downloaded. - let r = self.download(&url).await?; + let r = self.download(url.clone()).await?; // Pop compression. let r = DecompressedReader::new(r); @@ -324,13 +382,13 @@ mod tests { #[test] fn fetchurl_store_path() { - let url = "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch"; + let url = Url::parse("https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch").unwrap(); let exp_nixhash = NixHash::Sha256( nixbase32::decode_fixed("0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax") .unwrap(), ); - let fetch = Fetch::URL(url.into(), Some(exp_nixhash)); + let fetch = Fetch::URL(url, Some(exp_nixhash)); assert_eq!( "06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch", &fetch @@ -343,11 +401,11 @@ mod tests { #[test] fn fetch_tarball_store_path() { - let url = "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz"; + let url = Url::parse("https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz").unwrap(); let exp_nixbase32 = nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm") .unwrap(); - let fetch = Fetch::Tarball(url.into(), Some(exp_nixbase32)); + let fetch = Fetch::Tarball(url, Some(exp_nixbase32)); assert_eq!( "7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source", diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs index 873efdcf8a06..c95065592bcf 100644 --- a/tvix/glue/src/known_paths.rs +++ b/tvix/glue/src/known_paths.rs @@ -134,6 +134,7 @@ impl KnownPaths { #[cfg(test)] mod tests { use nix_compat::{derivation::Derivation, nixbase32, nixhash::NixHash, store_path::StorePath}; + use url::Url; use crate::fetchers::Fetch; @@ -160,13 +161,13 @@ mod tests { StorePath::from_bytes(b"fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo").expect("must parse"); static ref FETCH_URL : Fetch = Fetch::URL( - "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch".into(), + Url::parse("https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch").unwrap(), Some(NixHash::Sha256(nixbase32::decode_fixed("0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax").unwrap())) ); static ref FETCH_URL_OUT_PATH: StorePath = StorePath::from_bytes(b"06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap(); static ref FETCH_TARBALL : Fetch = Fetch::Tarball( - "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz".into(), + Url::parse("https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz").unwrap(), Some(nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm").unwrap()) ); static ref FETCH_TARBALL_OUT_PATH: StorePath = StorePath::from_bytes(b"7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap(); diff --git a/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.exp b/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.exp index a2fd0f1fec84..37a04d577c59 100644 --- a/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.exp +++ b/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.exp @@ -1 +1 @@ -[ /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch ] +[ /nix/store/y0r1p1cqmlvm0yqkz3gxvkc1p8kg2sz8-null /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch ] diff --git a/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.nix b/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.nix index 80fe808c21bd..8a3910152554 100644 --- a/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.nix +++ b/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.nix @@ -1,6 +1,8 @@ [ - # (fetchurl "url") cannot be tested, as that one has to fetch from the - # internet to calculate the path. + # (fetchurl "url") needs to immediately fetch, but our options without + # internet access are fairly limited. + # TODO: populate some fixtures at a known location instead. + (builtins.fetchurl "file:///dev/null") # fetchurl with url and sha256 (builtins.fetchurl { -- cgit 1.4.1