diff options
author | Florian Klink <flokli@flokli.de> | 2024-02-19T15·20+0700 |
---|---|---|
committer | flokli <flokli@flokli.de> | 2024-06-12T22·31+0000 |
commit | 842d6816bfa5a8f393719e4cb1e8bfc4d8c14174 (patch) | |
tree | 577fd81f9c838a16c1df173cd3da0b10b13423d0 | |
parent | b08379096f51f746276ef8810ef18c14a12d6e84 (diff) |
feat(tvix/glue): support builtin:fetchurl r/8254
nixpkgs calls <nix/fetchurl.nix> during nixpkgs bootstrap. This produces a fake derivation with system = builtin and builder = builtin:fetchurl, and needs to download files from the internet. At the end of the Derivation construction, if we have such a derivation, also synthesize a `Fetch` struct, which we add to the known fetch paths. This will then cause these fetches to be picked up like all other fetches in TvixStoreIO. Change-Id: I72cbca4f85da106b25eda97693a6a6e59911cd57 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10975 Reviewed-by: Connor Brewster <cbrewster@hey.com> Tested-by: BuildkiteCI
-rw-r--r-- | tvix/glue/src/builtins/derivation.rs | 12 | ||||
-rw-r--r-- | tvix/glue/src/fetchers/mod.rs | 106 | ||||
-rw-r--r-- | tvix/glue/src/fetchurl.rs | 82 | ||||
-rw-r--r-- | tvix/glue/src/lib.rs | 2 | ||||
-rw-r--r-- | tvix/glue/src/tvix_store_io.rs | 10 |
5 files changed, 197 insertions, 15 deletions
diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index 7878ce0ed9ad..f266141cb6e7 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -170,6 +170,7 @@ pub(crate) mod derivation_builtins { use std::collections::BTreeMap; use crate::builtins::utils::{select_string, strong_importing_coerce_to_string}; + use crate::fetchurl::fetchurl_derivation_to_fetch; use super::*; use bstr::ByteSlice; @@ -506,6 +507,17 @@ pub(crate) mod derivation_builtins { ))), ))); + // If the derivation is a fake derivation (builtins:fetchurl), + // synthesize a [Fetch] and add it there, too. + if drv.builder == "builtin:fetchurl" { + let (name, fetch) = + fetchurl_derivation_to_fetch(&drv).map_err(|e| ErrorKind::TvixError(Rc::new(e)))?; + + known_paths + .add_fetch(fetch, &name) + .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?; + } + // Register the Derivation in known_paths. known_paths.add_derivation(drv_path, drv); diff --git a/tvix/glue/src/fetchers/mod.rs b/tvix/glue/src/fetchers/mod.rs index 0ebc5fd3a638..376a4cca634c 100644 --- a/tvix/glue/src/fetchers/mod.rs +++ b/tvix/glue/src/fetchers/mod.rs @@ -6,8 +6,8 @@ use nix_compat::{ }; use sha1::Sha1; use sha2::{digest::Output, Digest, Sha256, Sha512}; -use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite}; -use tokio_util::io::InspectReader; +use tokio::io::{AsyncBufRead, AsyncRead, AsyncWrite, AsyncWriteExt, BufReader}; +use tokio_util::io::{InspectReader, InspectWriter}; use tracing::warn; use tvix_castore::{ blobservice::BlobService, @@ -381,7 +381,7 @@ where }); // Ingest the NAR, get the root node. - let (root_node, actual_nar_sha256, actual_nar_size) = + let (root_node, _actual_nar_sha256, actual_nar_size) = tvix_store::nar::ingest_nar_and_hash( self.blob_service.clone(), self.directory_service.clone(), @@ -416,14 +416,99 @@ where got: actual_hash, }); } - Ok(( root_node, - CAHash::Nar(NixHash::Sha256(actual_nar_sha256)), + // use a CAHash::Nar with the algo from the input. + CAHash::Nar(exp_hash), actual_nar_size, )) } - Fetch::Executable { url: _, hash: _ } => todo!(), + Fetch::Executable { + url, + hash: exp_hash, + } => { + // Construct a AsyncRead reading from the data as its downloaded. + let mut r = self.download(url.clone()).await?; + + // Construct a AsyncWrite to write into the BlobService. + let mut blob_writer = self.blob_service.open_write().await; + + // Copy the contents from the download reader to the blob writer. + let file_size = tokio::io::copy(&mut r, &mut blob_writer).await?; + let blob_digest = blob_writer.close().await?; + + // Render the NAR representation on-the-fly into a hash function with + // the same algo as our expected hash. + // We cannot do this upfront, as we don't know the actual size. + // FUTUREWORK: make opportunistic use of Content-Length header? + + let w = tokio::io::sink(); + // Construct the hash function. + let mut hasher: Box<dyn DynDigest + Send> = match exp_hash.algo() { + HashAlgo::Md5 => Box::new(Md5::new()), + HashAlgo::Sha1 => Box::new(Sha1::new()), + HashAlgo::Sha256 => Box::new(Sha256::new()), + HashAlgo::Sha512 => Box::new(Sha512::new()), + }; + + let mut nar_size: u64 = 0; + let mut w = InspectWriter::new(w, |d| { + hasher.update(d); + nar_size += d.len() as u64; + }); + + { + let node = nix_compat::nar::writer::r#async::open(&mut w).await?; + + let blob_reader = self + .blob_service + .open_read(&blob_digest) + .await? + .expect("Tvix bug: just-uploaded blob not found"); + + node.file(true, file_size, &mut BufReader::new(blob_reader)) + .await?; + + w.flush().await?; + } + + // finalize the hasher. + let actual_hash = { + match exp_hash.algo() { + HashAlgo::Md5 => { + NixHash::Md5(hasher.finalize().to_vec().try_into().unwrap()) + } + HashAlgo::Sha1 => { + NixHash::Sha1(hasher.finalize().to_vec().try_into().unwrap()) + } + HashAlgo::Sha256 => { + NixHash::Sha256(hasher.finalize().to_vec().try_into().unwrap()) + } + HashAlgo::Sha512 => { + NixHash::Sha512(hasher.finalize().to_vec().try_into().unwrap()) + } + } + }; + + if exp_hash != actual_hash { + return Err(FetcherError::HashMismatch { + url, + wanted: exp_hash, + got: actual_hash, + }); + } + + // Construct and return the FileNode describing the downloaded contents, + // make it executable. + let root_node = Node::File(FileNode { + name: vec![].into(), + digest: blob_digest.into(), + size: file_size, + executable: true, + }); + + Ok((root_node, CAHash::Nar(actual_hash), file_size)) + } Fetch::Git() => todo!(), } } @@ -441,7 +526,7 @@ where // Fetch file, return the (unnamed) (File)Node of its contents, ca hash and filesize. let (node, ca_hash, size) = self.ingest(fetch).await?; - // Calculate the store path to return later, which is done with the ca_hash. + // Calculate the store path to return, by calculating from ca_hash. let store_path = build_ca_path(name, &ca_hash, Vec::<String>::new(), false)?; // Rename the node name to match the Store Path. @@ -450,14 +535,15 @@ where // If the resulting hash is not a CAHash::Nar, we also need to invoke // `calculate_nar` to calculate this representation, as it's required in // the [PathInfo]. + // FUTUREWORK: allow ingest() to return multiple hashes, or have it feed + // nar_calculation_service too? let (nar_size, nar_sha256) = match &ca_hash { - CAHash::Flat(_nix_hash) => self + CAHash::Nar(NixHash::Sha256(nar_sha256)) => (size, *nar_sha256), + CAHash::Nar(_) | CAHash::Flat(_) => self .nar_calculation_service .calculate_nar(&node) .await .map_err(|e| FetcherError::Io(e.into()))?, - CAHash::Nar(NixHash::Sha256(nar_sha256)) => (size, *nar_sha256), - CAHash::Nar(_) => unreachable!("Tvix bug: fetch returned non-sha256 CAHash::Nar"), CAHash::Text(_) => unreachable!("Tvix bug: fetch returned CAHash::Text"), }; diff --git a/tvix/glue/src/fetchurl.rs b/tvix/glue/src/fetchurl.rs new file mode 100644 index 000000000000..9f57868b1991 --- /dev/null +++ b/tvix/glue/src/fetchurl.rs @@ -0,0 +1,82 @@ +//! This contains the code translating from a `builtin:derivation` [Derivation] +//! to a [Fetch]. +use crate::fetchers::Fetch; +use nix_compat::{derivation::Derivation, nixhash::CAHash}; +use tracing::instrument; +use url::Url; + +/// Takes a derivation produced by a call to `builtin:fetchurl` and returns the +/// synthesized [Fetch] for it, as well as the name. +#[instrument] +pub(crate) fn fetchurl_derivation_to_fetch(drv: &Derivation) -> Result<(String, Fetch), Error> { + if drv.builder != "builtin:fetchurl" { + return Err(Error::BuilderInvalid); + } + if !drv.arguments.is_empty() { + return Err(Error::ArgumentsInvalud); + } + if drv.system != "builtin" { + return Err(Error::SystemInvalid); + } + + // ensure this is a fixed-output derivation + if drv.outputs.len() != 1 { + return Err(Error::NoFOD); + } + let out_output = &drv.outputs.get("out").ok_or(Error::NoFOD)?; + let ca_hash = out_output.ca_hash.clone().ok_or(Error::NoFOD)?; + + let name: String = drv + .environment + .get("name") + .ok_or(Error::NameMissing)? + .to_owned() + .try_into() + .map_err(|_| Error::NameInvalid)?; + + let url: Url = std::str::from_utf8(drv.environment.get("url").ok_or(Error::URLMissing)?) + .map_err(|_| Error::URLInvalid)? + .parse() + .map_err(|_| Error::URLInvalid)?; + + match ca_hash { + CAHash::Flat(hash) => { + return Ok(( + name, + Fetch::URL { + url, + exp_hash: Some(hash), + }, + )) + } + CAHash::Nar(hash) => { + if drv.environment.get("executable").map(|v| v.as_slice()) == Some(b"1") { + Ok((name, Fetch::Executable { url, hash })) + } else { + Ok((name, Fetch::NAR { url, hash })) + } + } + // you can't construct derivations containing this + CAHash::Text(_) => panic!("Tvix bug: got CaHash::Text in drv"), + } +} + +#[derive(Debug, thiserror::Error)] +pub(crate) enum Error { + #[error("Invalid builder")] + BuilderInvalid, + #[error("invalid arguments")] + ArgumentsInvalud, + #[error("Invalid system")] + SystemInvalid, + #[error("Derivation is not fixed-output")] + NoFOD, + #[error("Missing URL")] + URLMissing, + #[error("Invalid URL")] + URLInvalid, + #[error("Missing Name")] + NameMissing, + #[error("Name invalid")] + NameInvalid, +} diff --git a/tvix/glue/src/lib.rs b/tvix/glue/src/lib.rs index 2e5a3be103a1..a5dbdb8742fc 100644 --- a/tvix/glue/src/lib.rs +++ b/tvix/glue/src/lib.rs @@ -6,6 +6,8 @@ pub mod tvix_build; pub mod tvix_io; pub mod tvix_store_io; +mod fetchurl; + #[cfg(test)] mod tests; diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index f32dea512ee3..697b5a1dde8a 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -1,5 +1,4 @@ //! This module provides an implementation of EvalIO talking to tvix-store. - use bytes::Bytes; use futures::{StreamExt, TryStreamExt}; use nix_compat::nixhash::NixHash; @@ -138,7 +137,8 @@ impl TvixStoreIO { // The store path doesn't exist yet, so we need to fetch or build it. // We check for fetches first, as we might have both native // fetchers and FODs in KnownPaths, and prefer the former. - + // This will also find [Fetch] synthesized from + // `builtin:fetchurl` Derivations. let maybe_fetch = self .known_paths .borrow() @@ -156,9 +156,9 @@ impl TvixStoreIO { })?; debug_assert_eq!( - sp.to_string(), - store_path.to_string(), - "store path returned from fetcher should match" + sp.to_absolute_path(), + store_path.as_ref().to_absolute_path(), + "store path returned from fetcher must match store path we have in fetchers" ); root_node |