about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-02-19T15·20+0700
committerflokli <flokli@flokli.de>2024-06-12T22·31+0000
commit842d6816bfa5a8f393719e4cb1e8bfc4d8c14174 (patch)
tree577fd81f9c838a16c1df173cd3da0b10b13423d0
parentb08379096f51f746276ef8810ef18c14a12d6e84 (diff)
feat(tvix/glue): support builtin:fetchurl r/8254
nixpkgs calls <nix/fetchurl.nix> during nixpkgs bootstrap.

This produces a fake derivation with system = builtin
and builder = builtin:fetchurl, and needs to download files from the
internet.

At the end of the Derivation construction, if we have such a derivation,
also synthesize a `Fetch` struct, which we add to the known fetch paths.

This will then cause these fetches to be picked up like all other
fetches in TvixStoreIO.

Change-Id: I72cbca4f85da106b25eda97693a6a6e59911cd57
Reviewed-on: https://cl.tvl.fyi/c/depot/+/10975
Reviewed-by: Connor Brewster <cbrewster@hey.com>
Tested-by: BuildkiteCI
-rw-r--r--tvix/glue/src/builtins/derivation.rs12
-rw-r--r--tvix/glue/src/fetchers/mod.rs106
-rw-r--r--tvix/glue/src/fetchurl.rs82
-rw-r--r--tvix/glue/src/lib.rs2
-rw-r--r--tvix/glue/src/tvix_store_io.rs10
5 files changed, 197 insertions, 15 deletions
diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs
index 7878ce0ed9..f266141cb6 100644
--- a/tvix/glue/src/builtins/derivation.rs
+++ b/tvix/glue/src/builtins/derivation.rs
@@ -170,6 +170,7 @@ pub(crate) mod derivation_builtins {
     use std::collections::BTreeMap;
 
     use crate::builtins::utils::{select_string, strong_importing_coerce_to_string};
+    use crate::fetchurl::fetchurl_derivation_to_fetch;
 
     use super::*;
     use bstr::ByteSlice;
@@ -506,6 +507,17 @@ pub(crate) mod derivation_builtins {
                 ))),
         )));
 
+        // If the derivation is a fake derivation (builtins:fetchurl),
+        // synthesize a [Fetch] and add it there, too.
+        if drv.builder == "builtin:fetchurl" {
+            let (name, fetch) =
+                fetchurl_derivation_to_fetch(&drv).map_err(|e| ErrorKind::TvixError(Rc::new(e)))?;
+
+            known_paths
+                .add_fetch(fetch, &name)
+                .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?;
+        }
+
         // Register the Derivation in known_paths.
         known_paths.add_derivation(drv_path, drv);
 
diff --git a/tvix/glue/src/fetchers/mod.rs b/tvix/glue/src/fetchers/mod.rs
index 0ebc5fd3a6..376a4cca63 100644
--- a/tvix/glue/src/fetchers/mod.rs
+++ b/tvix/glue/src/fetchers/mod.rs
@@ -6,8 +6,8 @@ use nix_compat::{
 };
 use sha1::Sha1;
 use sha2::{digest::Output, Digest, Sha256, Sha512};
-use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite};
-use tokio_util::io::InspectReader;
+use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader};
+use tokio_util::io::{InspectReader, InspectWriter};
 use tracing::warn;
 use tvix_castore::{
     blobservice::BlobService,
@@ -381,7 +381,7 @@ where
                 });
 
                 // Ingest the NAR, get the root node.
-                let (root_node, actual_nar_sha256, actual_nar_size) =
+                let (root_node, _actual_nar_sha256, actual_nar_size) =
                     tvix_store::nar::ingest_nar_and_hash(
                         self.blob_service.clone(),
                         self.directory_service.clone(),
@@ -416,14 +416,99 @@ where
                         got: actual_hash,
                     });
                 }
-
                 Ok((
                     root_node,
-                    CAHash::Nar(NixHash::Sha256(actual_nar_sha256)),
+                    // use a CAHash::Nar with the algo from the input.
+                    CAHash::Nar(exp_hash),
                     actual_nar_size,
                 ))
             }
-            Fetch::Executable { url: _, hash: _ } => todo!(),
+            Fetch::Executable {
+                url,
+                hash: exp_hash,
+            } => {
+                // Construct a AsyncRead reading from the data as its downloaded.
+                let mut r = self.download(url.clone()).await?;
+
+                // Construct a AsyncWrite to write into the BlobService.
+                let mut blob_writer = self.blob_service.open_write().await;
+
+                // Copy the contents from the download reader to the blob writer.
+                let file_size = tokio::io::copy(&mut r, &mut blob_writer).await?;
+                let blob_digest = blob_writer.close().await?;
+
+                // Render the NAR representation on-the-fly into a hash function with
+                // the same algo as our expected hash.
+                // We cannot do this upfront, as we don't know the actual size.
+                // FUTUREWORK: make opportunistic use of Content-Length header?
+
+                let w = tokio::io::sink();
+                // Construct the hash function.
+                let mut hasher: Box<dyn DynDigest + Send> = match exp_hash.algo() {
+                    HashAlgo::Md5 => Box::new(Md5::new()),
+                    HashAlgo::Sha1 => Box::new(Sha1::new()),
+                    HashAlgo::Sha256 => Box::new(Sha256::new()),
+                    HashAlgo::Sha512 => Box::new(Sha512::new()),
+                };
+
+                let mut nar_size: u64 = 0;
+                let mut w = InspectWriter::new(w, |d| {
+                    hasher.update(d);
+                    nar_size += d.len() as u64;
+                });
+
+                {
+                    let node = nix_compat::nar::writer::r#async::open(&mut w).await?;
+
+                    let blob_reader = self
+                        .blob_service
+                        .open_read(&blob_digest)
+                        .await?
+                        .expect("Tvix bug: just-uploaded blob not found");
+
+                    node.file(true, file_size, &mut BufReader::new(blob_reader))
+                        .await?;
+
+                    w.flush().await?;
+                }
+
+                // finalize the hasher.
+                let actual_hash = {
+                    match exp_hash.algo() {
+                        HashAlgo::Md5 => {
+                            NixHash::Md5(hasher.finalize().to_vec().try_into().unwrap())
+                        }
+                        HashAlgo::Sha1 => {
+                            NixHash::Sha1(hasher.finalize().to_vec().try_into().unwrap())
+                        }
+                        HashAlgo::Sha256 => {
+                            NixHash::Sha256(hasher.finalize().to_vec().try_into().unwrap())
+                        }
+                        HashAlgo::Sha512 => {
+                            NixHash::Sha512(hasher.finalize().to_vec().try_into().unwrap())
+                        }
+                    }
+                };
+
+                if exp_hash != actual_hash {
+                    return Err(FetcherError::HashMismatch {
+                        url,
+                        wanted: exp_hash,
+                        got: actual_hash,
+                    });
+                }
+
+                // Construct and return the FileNode describing the downloaded contents,
+                // make it executable.
+                let root_node = Node::File(FileNode {
+                    name: vec![].into(),
+                    digest: blob_digest.into(),
+                    size: file_size,
+                    executable: true,
+                });
+
+                Ok((root_node, CAHash::Nar(actual_hash), file_size))
+            }
             Fetch::Git() => todo!(),
         }
     }
@@ -441,7 +526,7 @@ where
         // Fetch file, return the (unnamed) (File)Node of its contents, ca hash and filesize.
         let (node, ca_hash, size) = self.ingest(fetch).await?;
 
-        // Calculate the store path to return later, which is done with the ca_hash.
+        // Calculate the store path to return, by calculating from ca_hash.
         let store_path = build_ca_path(name, &ca_hash, Vec::<String>::new(), false)?;
 
         // Rename the node name to match the Store Path.
@@ -450,14 +535,15 @@ where
         // If the resulting hash is not a CAHash::Nar, we also need to invoke
         // `calculate_nar` to calculate this representation, as it's required in
         // the [PathInfo].
+        // FUTUREWORK: allow ingest() to return multiple hashes, or have it feed
+        // nar_calculation_service too?
         let (nar_size, nar_sha256) = match &ca_hash {
-            CAHash::Flat(_nix_hash) => self
+            CAHash::Nar(NixHash::Sha256(nar_sha256)) => (size, *nar_sha256),
+            CAHash::Nar(_) | CAHash::Flat(_) => self
                 .nar_calculation_service
                 .calculate_nar(&node)
                 .await
                 .map_err(|e| FetcherError::Io(e.into()))?,
-            CAHash::Nar(NixHash::Sha256(nar_sha256)) => (size, *nar_sha256),
-            CAHash::Nar(_) => unreachable!("Tvix bug: fetch returned non-sha256 CAHash::Nar"),
             CAHash::Text(_) => unreachable!("Tvix bug: fetch returned CAHash::Text"),
         };
 
diff --git a/tvix/glue/src/fetchurl.rs b/tvix/glue/src/fetchurl.rs
new file mode 100644
index 0000000000..9f57868b19
--- /dev/null
+++ b/tvix/glue/src/fetchurl.rs
@@ -0,0 +1,82 @@
+//! This contains the code translating from a `builtin:derivation` [Derivation]
+//! to a [Fetch].
+use crate::fetchers::Fetch;
+use nix_compat::{derivation::Derivation, nixhash::CAHash};
+use tracing::instrument;
+use url::Url;
+
+/// Takes a derivation produced by a call to `builtin:fetchurl` and returns the
+/// synthesized [Fetch] for it, as well as the name.
+#[instrument]
+pub(crate) fn fetchurl_derivation_to_fetch(drv: &Derivation) -> Result<(String, Fetch), Error> {
+    if drv.builder != "builtin:fetchurl" {
+        return Err(Error::BuilderInvalid);
+    }
+    if !drv.arguments.is_empty() {
+        return Err(Error::ArgumentsInvalud);
+    }
+    if drv.system != "builtin" {
+        return Err(Error::SystemInvalid);
+    }
+
+    // ensure this is a fixed-output derivation
+    if drv.outputs.len() != 1 {
+        return Err(Error::NoFOD);
+    }
+    let out_output = &drv.outputs.get("out").ok_or(Error::NoFOD)?;
+    let ca_hash = out_output.ca_hash.clone().ok_or(Error::NoFOD)?;
+
+    let name: String = drv
+        .environment
+        .get("name")
+        .ok_or(Error::NameMissing)?
+        .to_owned()
+        .try_into()
+        .map_err(|_| Error::NameInvalid)?;
+
+    let url: Url = std::str::from_utf8(drv.environment.get("url").ok_or(Error::URLMissing)?)
+        .map_err(|_| Error::URLInvalid)?
+        .parse()
+        .map_err(|_| Error::URLInvalid)?;
+
+    match ca_hash {
+        CAHash::Flat(hash) => {
+            return Ok((
+                name,
+                Fetch::URL {
+                    url,
+                    exp_hash: Some(hash),
+                },
+            ))
+        }
+        CAHash::Nar(hash) => {
+            if drv.environment.get("executable").map(|v| v.as_slice()) == Some(b"1") {
+                Ok((name, Fetch::Executable { url, hash }))
+            } else {
+                Ok((name, Fetch::NAR { url, hash }))
+            }
+        }
+        // you can't construct derivations containing this
+        CAHash::Text(_) => panic!("Tvix bug: got CaHash::Text in drv"),
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub(crate) enum Error {
+    #[error("Invalid builder")]
+    BuilderInvalid,
+    #[error("invalid arguments")]
+    ArgumentsInvalud,
+    #[error("Invalid system")]
+    SystemInvalid,
+    #[error("Derivation is not fixed-output")]
+    NoFOD,
+    #[error("Missing URL")]
+    URLMissing,
+    #[error("Invalid URL")]
+    URLInvalid,
+    #[error("Missing Name")]
+    NameMissing,
+    #[error("Name invalid")]
+    NameInvalid,
+}
diff --git a/tvix/glue/src/lib.rs b/tvix/glue/src/lib.rs
index 2e5a3be103..a5dbdb8742 100644
--- a/tvix/glue/src/lib.rs
+++ b/tvix/glue/src/lib.rs
@@ -6,6 +6,8 @@ pub mod tvix_build;
 pub mod tvix_io;
 pub mod tvix_store_io;
 
+mod fetchurl;
+
 #[cfg(test)]
 mod tests;
 
diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs
index f32dea512e..697b5a1dde 100644
--- a/tvix/glue/src/tvix_store_io.rs
+++ b/tvix/glue/src/tvix_store_io.rs
@@ -1,5 +1,4 @@
 //! This module provides an implementation of EvalIO talking to tvix-store.
-
 use bytes::Bytes;
 use futures::{StreamExt, TryStreamExt};
 use nix_compat::nixhash::NixHash;
@@ -138,7 +137,8 @@ impl TvixStoreIO {
                 // The store path doesn't exist yet, so we need to fetch or build it.
                 // We check for fetches first, as we might have both native
                 // fetchers and FODs in KnownPaths, and prefer the former.
-
+                // This will also find [Fetch] synthesized from
+                // `builtin:fetchurl` Derivations.
                 let maybe_fetch = self
                     .known_paths
                     .borrow()
@@ -156,9 +156,9 @@ impl TvixStoreIO {
                         })?;
 
                         debug_assert_eq!(
-                            sp.to_string(),
-                            store_path.to_string(),
-                            "store path returned from fetcher should match"
+                            sp.to_absolute_path(),
+                            store_path.as_ref().to_absolute_path(),
+                            "store path returned from fetcher must match store path we have in fetchers"
                         );
 
                         root_node