about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-04-22T17·05+0300
committerflokli <flokli@flokli.de>2024-04-23T12·41+0000
commit8181817e53f4c0da5d7dfa1a56e296fefd929bcb (patch)
tree9fe6ede59171f79d37ff954deb2bc8cfff9cc8ba
parentdfef3d18d177bbeadcc3000cc39ef0a16a566a1f (diff)
feat(tvix/glue/fetchers): support file:// URLs r/7996
Nix supports file:// - URLs for `fetchurl` and `fetchTarball`.

Convert the enums and function arguments to hold a URL type.
reqwest::Url is a re-export of the url crate, but they don't re-export
the parsing errors, and as we want to hold these in our Error types, add
it to Cargo.toml explicitly.

The Fetcher::download function now checks on the scheme, and either
opens the file locally, or does do a HTTP request as before.

Fetch gets its custom debug impl, removing potentially sensitive
username and password out of URLs.

Change-Id: I777db1fe487370e822cbfec4624034aca5e08045
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11504
Autosubmit: flokli <flokli@flokli.de>
Reviewed-by: raitobezarius <tvl@lahfa.xyz>
Tested-by: BuildkiteCI
-rw-r--r--tvix/Cargo.lock1
-rw-r--r--tvix/Cargo.nix4
-rw-r--r--tvix/glue/Cargo.toml1
-rw-r--r--tvix/glue/src/builtins/errors.rs6
-rw-r--r--tvix/glue/src/builtins/fetchers.rs36
-rw-r--r--tvix/glue/src/fetchers.rs100
-rw-r--r--tvix/glue/src/known_paths.rs5
-rw-r--r--tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.exp2
-rw-r--r--tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.nix6
9 files changed, 124 insertions, 37 deletions
diff --git a/tvix/Cargo.lock b/tvix/Cargo.lock
index b2be20a897b6..334b69b7f580 100644
--- a/tvix/Cargo.lock
+++ b/tvix/Cargo.lock
@@ -4415,6 +4415,7 @@ dependencies = [
  "tvix-castore",
  "tvix-eval",
  "tvix-store",
+ "url",
  "walkdir",
  "wu-manber",
 ]
diff --git a/tvix/Cargo.nix b/tvix/Cargo.nix
index 25e5dccfd7cf..51d47b05e34f 100644
--- a/tvix/Cargo.nix
+++ b/tvix/Cargo.nix
@@ -14175,6 +14175,10 @@ rec {
             usesDefaultFeatures = false;
           }
           {
+            name = "url";
+            packageId = "url";
+          }
+          {
             name = "walkdir";
             packageId = "walkdir";
           }
diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml
index f28011767419..f929d720a0e7 100644
--- a/tvix/glue/Cargo.toml
+++ b/tvix/glue/Cargo.toml
@@ -27,6 +27,7 @@ serde_json = "1.0"
 sha2 = "0.10.8"
 sha1 = "0.10.6"
 md-5 = "0.10.6"
+url = "2.4.0"
 walkdir = "2.4.0"
 
 [dependencies.async-compression]
diff --git a/tvix/glue/src/builtins/errors.rs b/tvix/glue/src/builtins/errors.rs
index 5aced2bde43b..5e36bc1a243f 100644
--- a/tvix/glue/src/builtins/errors.rs
+++ b/tvix/glue/src/builtins/errors.rs
@@ -3,6 +3,7 @@ use nix_compat::{
     nixhash::{self, NixHash},
     store_path::BuildStorePathError,
 };
+use reqwest::Url;
 use std::rc::Rc;
 use thiserror::Error;
 
@@ -33,7 +34,7 @@ impl From<DerivationError> for tvix_eval::ErrorKind {
 pub enum FetcherError {
     #[error("hash mismatch in file downloaded from {url}:\n  wanted: {wanted}\n     got: {got}")]
     HashMismatch {
-        url: String,
+        url: Url,
         wanted: NixHash,
         got: NixHash,
     },
@@ -41,6 +42,9 @@ pub enum FetcherError {
     #[error("Invalid hash type '{0}' for fetcher")]
     InvalidHashType(&'static str),
 
+    #[error("Unable to parse URL: {0}")]
+    InvalidUrl(#[from] url::ParseError),
+
     #[error(transparent)]
     Http(#[from] reqwest::Error),
 
diff --git a/tvix/glue/src/builtins/fetchers.rs b/tvix/glue/src/builtins/fetchers.rs
index 0de3d5462561..c7602c03e81f 100644
--- a/tvix/glue/src/builtins/fetchers.rs
+++ b/tvix/glue/src/builtins/fetchers.rs
@@ -1,4 +1,4 @@
-//! Contains builtins that fetch paths from the Internet
+//! Contains builtins that fetch paths from the Internet, or local filesystem.
 
 use super::utils::select_string;
 use crate::{
@@ -15,7 +15,7 @@ use tvix_eval::generators::GenCo;
 use tvix_eval::{CatchableErrorKind, ErrorKind, Value};
 
 struct NixFetchArgs {
-    url: String,
+    url_str: String,
     name: Option<String>,
     sha256: Option<[u8; 32]>,
 }
@@ -26,11 +26,12 @@ async fn extract_fetch_args(
     co: &GenCo,
     args: Value,
 ) -> Result<Result<NixFetchArgs, CatchableErrorKind>, ErrorKind> {
-    if let Ok(url) = args.to_str() {
+    if let Ok(url_str) = args.to_str() {
         // Get the raw bytes, not the ToString repr.
-        let url = String::from_utf8(url.as_bytes().to_vec()).map_err(|_| ErrorKind::Utf8)?;
+        let url_str =
+            String::from_utf8(url_str.as_bytes().to_vec()).map_err(|_| ErrorKind::Utf8)?;
         return Ok(Ok(NixFetchArgs {
-            url,
+            url_str,
             name: None,
             sha256: None,
         }));
@@ -41,7 +42,7 @@ async fn extract_fetch_args(
         actual: args.type_of(),
     })?;
 
-    let url = match select_string(co, &attrs, "url").await? {
+    let url_str = match select_string(co, &attrs, "url").await? {
         Ok(s) => s.ok_or_else(|| ErrorKind::AttributeNotFound { name: "url".into() })?,
         Err(cek) => return Ok(Err(cek)),
     };
@@ -68,12 +69,19 @@ async fn extract_fetch_args(
         None => None,
     };
 
-    Ok(Ok(NixFetchArgs { url, name, sha256 }))
+    Ok(Ok(NixFetchArgs {
+        url_str,
+        name,
+        sha256,
+    }))
 }
 
 #[allow(unused_variables)] // for the `state` arg, for now
 #[builtins(state = "Rc<TvixStoreIO>")]
 pub(crate) mod fetcher_builtins {
+    use crate::builtins::FetcherError;
+    use url::Url;
+
     use super::*;
 
     /// Consumes a fetch.
@@ -130,12 +138,16 @@ pub(crate) mod fetcher_builtins {
         // Derive the name from the URL basename if not set explicitly.
         let name = args
             .name
-            .unwrap_or_else(|| url_basename(&args.url).to_owned());
+            .unwrap_or_else(|| url_basename(&args.url_str).to_owned());
+
+        // Parse the URL.
+        let url = Url::parse(&args.url_str)
+            .map_err(|e| ErrorKind::TvixError(Rc::new(FetcherError::InvalidUrl(e))))?;
 
         fetch_lazy(
             state,
             name,
-            Fetch::URL(args.url, args.sha256.map(NixHash::Sha256)),
+            Fetch::URL(url, args.sha256.map(NixHash::Sha256)),
         )
     }
 
@@ -156,7 +168,11 @@ pub(crate) mod fetcher_builtins {
             .name
             .unwrap_or_else(|| DEFAULT_NAME_FETCH_TARBALL.to_owned());
 
-        fetch_lazy(state, name, Fetch::Tarball(args.url, args.sha256))
+        // Parse the URL.
+        let url = Url::parse(&args.url_str)
+            .map_err(|e| ErrorKind::TvixError(Rc::new(FetcherError::InvalidUrl(e))))?;
+
+        fetch_lazy(state, name, Fetch::Tarball(url, args.sha256))
     }
 
     #[builtin("fetchGit")]
diff --git a/tvix/glue/src/fetchers.rs b/tvix/glue/src/fetchers.rs
index ee0111c88f4f..0328f007e006 100644
--- a/tvix/glue/src/fetchers.rs
+++ b/tvix/glue/src/fetchers.rs
@@ -15,16 +15,17 @@ use tvix_castore::{
     proto::{node::Node, FileNode},
 };
 use tvix_store::{pathinfoservice::PathInfoService, proto::PathInfo};
+use url::Url;
 
 use crate::{builtins::FetcherError, decompression::DecompressedReader};
 
 /// Representing options for doing a fetch.
-#[derive(Clone, Debug, Eq, PartialEq)]
+#[derive(Clone, Eq, PartialEq)]
 pub enum Fetch {
     /// Fetch a literal file from the given URL, with an optional expected
     /// NixHash of it.
     /// TODO: check if this is *always* sha256, and if so, make it [u8; 32].
-    URL(String, Option<NixHash>),
+    URL(Url, Option<NixHash>),
 
     /// Fetch a tarball from the given URL and unpack.
     /// The file must be a tape archive (.tar) compressed with gzip, bzip2 or xz.
@@ -32,12 +33,55 @@ pub enum Fetch {
     /// so it is best if the tarball contains a single directory at top level.
     /// Optionally, a sha256 digest can be provided to verify the unpacked
     /// contents against.
-    Tarball(String, Option<[u8; 32]>),
+    Tarball(Url, Option<[u8; 32]>),
 
     /// TODO
     Git(),
 }
 
+// Drops potentially sensitive username and password from a URL.
+fn redact_url(url: &Url) -> Url {
+    let mut url = url.to_owned();
+    if !url.username().is_empty() {
+        let _ = url.set_username("redacted");
+    }
+
+    if url.password().is_some() {
+        let _ = url.set_password(Some("redacted"));
+    }
+
+    url
+}
+
+impl std::fmt::Debug for Fetch {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        match self {
+            Fetch::URL(url, nixhash) => {
+                let url = redact_url(url);
+                if let Some(nixhash) = nixhash {
+                    write!(f, "URL [url: {}, exp_hash: Some({})]", &url, nixhash)
+                } else {
+                    write!(f, "URL [url: {}, exp_hash: None]", &url)
+                }
+            }
+            Fetch::Tarball(url, exp_digest) => {
+                let url = redact_url(url);
+                if let Some(exp_digest) = exp_digest {
+                    write!(
+                        f,
+                        "Tarball [url: {}, exp_hash: Some({})]",
+                        url,
+                        NixHash::Sha256(*exp_digest)
+                    )
+                } else {
+                    write!(f, "Tarball [url: {}, exp_hash: None]", url)
+                }
+            }
+            Fetch::Git() => todo!(),
+        }
+    }
+}
+
 impl Fetch {
     /// If the [Fetch] contains an expected hash upfront, returns the resulting
     /// store path.
@@ -76,18 +120,32 @@ impl<BS, DS, PS> Fetcher<BS, DS, PS> {
     }
 
     /// Constructs a HTTP request to the passed URL, and returns a AsyncReadBuf to it.
-    async fn download<'a>(
-        &self,
-        url: &str,
-    ) -> Result<impl AsyncBufRead + Unpin + 'a, reqwest::Error> {
-        let resp = self.http_client.get(url).send().await?;
-        Ok(tokio_util::io::StreamReader::new(
-            resp.bytes_stream().map_err(|e| {
-                let e = e.without_url();
-                warn!(%e, "failed to get response body");
-                std::io::Error::new(std::io::ErrorKind::BrokenPipe, e)
-            }),
-        ))
+    /// In case the URI uses the file:// scheme, use tokio::fs to open it.
+    async fn download(&self, url: Url) -> Result<Box<dyn AsyncBufRead + Unpin>, FetcherError> {
+        match url.scheme() {
+            "file" => {
+                let f = tokio::fs::File::open(url.to_file_path().map_err(|_| {
+                    // "Returns Err if the host is neither empty nor "localhost"
+                    // (except on Windows, where file: URLs may have a non-local host)"
+                    FetcherError::Io(std::io::Error::new(
+                        std::io::ErrorKind::Other,
+                        "invalid host for file:// scheme",
+                    ))
+                })?)
+                .await?;
+                Ok(Box::new(tokio::io::BufReader::new(f)))
+            }
+            _ => {
+                let resp = self.http_client.get(url).send().await?;
+                Ok(Box::new(tokio_util::io::StreamReader::new(
+                    resp.bytes_stream().map_err(|e| {
+                        let e = e.without_url();
+                        warn!(%e, "failed to get response body");
+                        std::io::Error::new(std::io::ErrorKind::BrokenPipe, e)
+                    }),
+                )))
+            }
+        }
     }
 }
 
@@ -122,7 +180,7 @@ where
         match fetch {
             Fetch::URL(url, exp_nixhash) => {
                 // Construct a AsyncRead reading from the data as its downloaded.
-                let mut r = self.download(&url).await?;
+                let mut r = self.download(url.clone()).await?;
 
                 // Construct a AsyncWrite to write into the BlobService.
                 let mut blob_writer = self.blob_service.open_write().await;
@@ -175,7 +233,7 @@ where
             }
             Fetch::Tarball(url, exp_nar_sha256) => {
                 // Construct a AsyncRead reading from the data as its downloaded.
-                let r = self.download(&url).await?;
+                let r = self.download(url.clone()).await?;
 
                 // Pop compression.
                 let r = DecompressedReader::new(r);
@@ -324,13 +382,13 @@ mod tests {
 
         #[test]
         fn fetchurl_store_path() {
-            let url = "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch";
+            let url = Url::parse("https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch").unwrap();
             let exp_nixhash = NixHash::Sha256(
                 nixbase32::decode_fixed("0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax")
                     .unwrap(),
             );
 
-            let fetch = Fetch::URL(url.into(), Some(exp_nixhash));
+            let fetch = Fetch::URL(url, Some(exp_nixhash));
             assert_eq!(
                 "06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch",
                 &fetch
@@ -343,11 +401,11 @@ mod tests {
 
         #[test]
         fn fetch_tarball_store_path() {
-            let url = "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz";
+            let url = Url::parse("https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz").unwrap();
             let exp_nixbase32 =
                 nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm")
                     .unwrap();
-            let fetch = Fetch::Tarball(url.into(), Some(exp_nixbase32));
+            let fetch = Fetch::Tarball(url, Some(exp_nixbase32));
 
             assert_eq!(
                 "7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source",
diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs
index 873efdcf8a06..c95065592bcf 100644
--- a/tvix/glue/src/known_paths.rs
+++ b/tvix/glue/src/known_paths.rs
@@ -134,6 +134,7 @@ impl KnownPaths {
 #[cfg(test)]
 mod tests {
     use nix_compat::{derivation::Derivation, nixbase32, nixhash::NixHash, store_path::StorePath};
+    use url::Url;
 
     use crate::fetchers::Fetch;
 
@@ -160,13 +161,13 @@ mod tests {
             StorePath::from_bytes(b"fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo").expect("must parse");
 
         static ref FETCH_URL : Fetch = Fetch::URL(
-            "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch".into(),
+            Url::parse("https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch").unwrap(),
             Some(NixHash::Sha256(nixbase32::decode_fixed("0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax").unwrap()))
         );
         static ref FETCH_URL_OUT_PATH: StorePath = StorePath::from_bytes(b"06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap();
 
         static ref FETCH_TARBALL : Fetch = Fetch::Tarball(
-            "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz".into(),
+            Url::parse("https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz").unwrap(),
             Some(nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm").unwrap())
         );
         static ref FETCH_TARBALL_OUT_PATH: StorePath = StorePath::from_bytes(b"7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap();
diff --git a/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.exp b/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.exp
index a2fd0f1fec84..37a04d577c59 100644
--- a/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.exp
+++ b/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.exp
@@ -1 +1 @@
-[ /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch ]
+[ /nix/store/y0r1p1cqmlvm0yqkz3gxvkc1p8kg2sz8-null /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch /nix/store/06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch ]
diff --git a/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.nix b/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.nix
index 80fe808c21bd..8a3910152554 100644
--- a/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.nix
+++ b/tvix/glue/src/tests/tvix_tests/eval-okay-fetchurl.nix
@@ -1,6 +1,8 @@
 [
-  # (fetchurl "url") cannot be tested, as that one has to fetch from the
-  # internet to calculate the path.
+  # (fetchurl "url") needs to immediately fetch, but our options without
+  # internet access are fairly limited.
+  # TODO: populate some fixtures at a known location instead.
+  (builtins.fetchurl "file:///dev/null")
 
   # fetchurl with url and sha256
   (builtins.fetchurl {