From de727bccf99a1dcce2bb335e56af02f80e462dbc Mon Sep 17 00:00:00 2001 From: Aspen Smith Date: Fri, 23 Feb 2024 10:09:20 -0500 Subject: feat(tvix/glue): Implement builtins.fetchurl Implement the fetchurl builtin, and lay the groundwork for implementing the fetchTarball builtin (which works very similarly, and is implemented using almost the same code in C++ nix). An overview of how this works: 1. First, we check if the store path that *would* result from the download already exists in the store - if it does, we just return that 2. If we need to download the URL, TvixStoreIO has an `http_client: reqwest::Client` field now which we use to make the request 3. As we're downloading the blob, we hash the data incrementally into a SHA256 hasher 4. We compare the hash against the expected hash (if any) and bail out if it doesn't match 5. Finally, we put the blob in the store and return the store path Since the logic is very similar, this commit also implements a *chunk* of `fetchTarball` (though the actual implementation will likely include a refactor to some of the code reuse here). The main thing that's missing here is caching of downloaded blobs when fetchurl is called without a hash - I've opened b/381 to track the TODO there. Adding the `SSL_CERT_FILE` here is necessary to teach reqwest how to load it during tests - see 1c16dee20 (feat(tvix/store): use reqwests' rustls-native-roots feature, 2024-03-03) for more info. Change-Id: I83c4abbc7c0c3bfe92461917e23d6d3430fbf137 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11017 Tested-by: BuildkiteCI Reviewed-by: flokli Autosubmit: aspen --- tvix/glue/src/builtins/derivation.rs | 41 +----- tvix/glue/src/builtins/errors.rs | 30 +++- tvix/glue/src/builtins/fetchers.rs | 257 ++++++++++++++++++++++++++++++++++- tvix/glue/src/builtins/mod.rs | 3 +- tvix/glue/src/builtins/utils.rs | 36 +++++ tvix/glue/src/tvix_store_io.rs | 107 ++++++++++++++- 6 files changed, 425 insertions(+), 49 deletions(-) create mode 100644 tvix/glue/src/builtins/utils.rs (limited to 'tvix/glue/src') diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index 71249f1c7722..4ea9611a946c 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -10,8 +10,7 @@ use std::rc::Rc; use tvix_eval::builtin_macros::builtins; use tvix_eval::generators::{self, emit_warning_kind, GenCo}; use tvix_eval::{ - AddContext, CatchableErrorKind, CoercionKind, ErrorKind, NixAttrs, NixContext, - NixContextElement, Value, WarningKind, + AddContext, ErrorKind, NixAttrs, NixContext, NixContextElement, Value, WarningKind, }; // Constants used for strangely named fields in derivation inputs. @@ -144,6 +143,8 @@ fn handle_fixed_output( pub(crate) mod derivation_builtins { use std::collections::BTreeMap; + use crate::builtins::utils::{select_string, strong_importing_coerce_to_string}; + use super::*; use bstr::ByteSlice; use nix_compat::store_path::hash_placeholder; @@ -197,27 +198,6 @@ pub(crate) mod derivation_builtins { drv.outputs.insert("out".to_string(), Default::default()); let mut input_context = NixContext::new(); - #[inline] - async fn strong_importing_coerce_to_string( - co: &GenCo, - val: Value, - ) -> Result { - let val = generators::request_force(co, val).await; - match generators::request_string_coerce( - co, - val, - CoercionKind { - strong: true, - import_paths: true, - }, - ) - .await - { - Err(cek) => Err(cek), - Ok(val_str) => Ok(val_str), - } - } - /// Inserts a key and value into the drv.environment BTreeMap, and fails if the /// key did already exist before. fn insert_env( @@ -385,21 +365,6 @@ pub(crate) mod derivation_builtins { // Configure fixed-output derivations if required. { - async fn select_string( - co: &GenCo, - attrs: &NixAttrs, - key: &str, - ) -> Result, CatchableErrorKind>, ErrorKind> { - if let Some(attr) = attrs.select(key) { - match strong_importing_coerce_to_string(co, attr.clone()).await { - Err(cek) => return Ok(Err(cek)), - Ok(str) => return Ok(Ok(Some(str.to_str()?.to_owned()))), - } - } - - Ok(Ok(None)) - } - let output_hash = match select_string(&co, &input, "outputHash") .await .context("evaluating the `outputHash` parameter")? diff --git a/tvix/glue/src/builtins/errors.rs b/tvix/glue/src/builtins/errors.rs index b606aa6a7c48..c753a125e030 100644 --- a/tvix/glue/src/builtins/errors.rs +++ b/tvix/glue/src/builtins/errors.rs @@ -1,5 +1,8 @@ //! Contains errors that can occur during evaluation of builtins in this crate -use nix_compat::nixhash; +use nix_compat::{ + nixhash::{self, NixHash}, + store_path::BuildStorePathError, +}; use std::rc::Rc; use thiserror::Error; @@ -25,3 +28,28 @@ impl From for tvix_eval::ErrorKind { tvix_eval::ErrorKind::TvixError(Rc::new(err)) } } + +#[derive(Debug, Error)] +pub enum FetcherError { + #[error("hash mismatch in file downloaded from {url}:\n wanted: {wanted}\n got: {got}")] + HashMismatch { + url: String, + wanted: NixHash, + got: NixHash, + }, + + #[error("Invalid hash type '{0}' for fetcher")] + InvalidHashType(&'static str), + + #[error("Error in store path for fetcher output: {0}")] + StorePath(#[from] BuildStorePathError), + + #[error(transparent)] + Http(#[from] reqwest::Error), +} + +impl From for tvix_eval::ErrorKind { + fn from(err: FetcherError) -> Self { + tvix_eval::ErrorKind::TvixError(Rc::new(err)) + } +} diff --git a/tvix/glue/src/builtins/fetchers.rs b/tvix/glue/src/builtins/fetchers.rs index 7a5d49a725d1..07074d93de11 100644 --- a/tvix/glue/src/builtins/fetchers.rs +++ b/tvix/glue/src/builtins/fetchers.rs @@ -1,9 +1,189 @@ //! Contains builtins that fetch paths from the Internet use crate::tvix_store_io::TvixStoreIO; +use bstr::ByteSlice; +use nix_compat::nixhash::{self, CAHash}; +use nix_compat::store_path::{build_ca_path, StorePathRef}; use std::rc::Rc; use tvix_eval::builtin_macros::builtins; -use tvix_eval::Value; +use tvix_eval::generators::GenCo; +use tvix_eval::{CatchableErrorKind, ErrorKind, NixContextElement, NixString, Value}; + +use super::utils::select_string; +use super::{DerivationError, FetcherError}; + +/// Attempts to mimic `nix::libutil::baseNameOf` +fn url_basename(s: &str) -> &str { + if s.is_empty() { + return ""; + } + + let mut last = s.len() - 1; + if s.chars().nth(last).unwrap() == '/' && last > 0 { + last -= 1; + } + + if last == 0 { + return ""; + } + + let pos = match s[..=last].rfind('/') { + Some(pos) => { + if pos == last - 1 { + 0 + } else { + pos + } + } + None => 0, + }; + + &s[(pos + 1)..=last] +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum HashMode { + Flat, + Recursive, +} + +/// Struct representing the arguments passed to fetcher functions +#[derive(Debug, PartialEq, Eq)] +struct FetchArgs { + url: String, + name: String, + hash: Option, +} + +impl FetchArgs { + pub fn new( + url: String, + name: Option, + sha256: Option, + mode: HashMode, + ) -> nixhash::Result { + Ok(Self { + name: name.unwrap_or_else(|| url_basename(&url).to_owned()), + url, + hash: sha256 + .map(|h| { + let hash = nixhash::from_str(&h, Some("sha256"))?; + Ok(match mode { + HashMode::Flat => Some(nixhash::CAHash::Flat(hash)), + HashMode::Recursive => Some(nixhash::CAHash::Nar(hash)), + }) + }) + .transpose()? + .flatten(), + }) + } + + fn store_path(&self) -> Result, ErrorKind> { + let Some(h) = &self.hash else { + return Ok(None); + }; + build_ca_path(&self.name, h, Vec::::new(), false) + .map(Some) + .map_err(|e| FetcherError::from(e).into()) + } + + async fn extract( + co: &GenCo, + args: Value, + default_name: Option<&str>, + mode: HashMode, + ) -> Result, ErrorKind> { + if let Ok(url) = args.to_str() { + return Ok(Ok(FetchArgs::new( + url.to_str()?.to_owned(), + None, + None, + mode, + ) + .map_err(DerivationError::InvalidOutputHash)?)); + } + + let attrs = args.to_attrs().map_err(|_| ErrorKind::TypeError { + expected: "attribute set or string", + actual: args.type_of(), + })?; + + let url = match select_string(co, &attrs, "url").await? { + Ok(s) => s.ok_or_else(|| ErrorKind::AttributeNotFound { name: "url".into() })?, + Err(cek) => return Ok(Err(cek)), + }; + let name = match select_string(co, &attrs, "name").await? { + Ok(s) => s.or_else(|| default_name.map(|s| s.to_owned())), + Err(cek) => return Ok(Err(cek)), + }; + let sha256 = match select_string(co, &attrs, "sha256").await? { + Ok(s) => s, + Err(cek) => return Ok(Err(cek)), + }; + + Ok(Ok( + FetchArgs::new(url, name, sha256, mode).map_err(DerivationError::InvalidOutputHash)? + )) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FetchMode { + Url, + Tarball, +} + +impl From for HashMode { + fn from(value: FetchMode) -> Self { + match value { + FetchMode::Url => HashMode::Flat, + FetchMode::Tarball => HashMode::Recursive, + } + } +} + +impl FetchMode { + fn default_name(self) -> Option<&'static str> { + match self { + FetchMode::Url => None, + FetchMode::Tarball => Some("source"), + } + } +} + +fn string_from_store_path(store_path: StorePathRef) -> NixString { + NixString::new_context_from( + NixContextElement::Plain(store_path.to_absolute_path()).into(), + store_path.to_absolute_path(), + ) +} + +async fn fetch( + state: Rc, + co: GenCo, + args: Value, + mode: FetchMode, +) -> Result { + let args = match FetchArgs::extract(&co, args, mode.default_name(), mode.into()).await? { + Ok(args) => args, + Err(cek) => return Ok(cek.into()), + }; + + if let Some(store_path) = args.store_path()? { + if state.store_path_exists(store_path).await? { + return Ok(string_from_store_path(store_path).into()); + } + } + + let hash = args.hash.as_ref().map(|h| h.hash()); + let store_path = Rc::clone(&state).tokio_handle.block_on(state.fetch_url( + &args.url, + &args.name, + hash.as_deref(), + ))?; + + Ok(string_from_store_path(store_path.as_ref()).into()) +} #[allow(unused_variables)] // for the `state` arg, for now #[builtins(state = "Rc")] @@ -11,15 +191,14 @@ pub(crate) mod fetcher_builtins { use super::*; use tvix_eval::generators::Gen; - use tvix_eval::{generators::GenCo, ErrorKind}; #[builtin("fetchurl")] async fn builtin_fetchurl( state: Rc, co: GenCo, - url: Value, + args: Value, ) -> Result { - Err(ErrorKind::NotImplemented("fetchurl")) + fetch(state, co, args, FetchMode::Url).await } #[builtin("fetchTarball")] @@ -28,7 +207,7 @@ pub(crate) mod fetcher_builtins { co: GenCo, args: Value, ) -> Result { - Err(ErrorKind::NotImplemented("fetchTarball")) + fetch(state, co, args, FetchMode::Tarball).await } #[builtin("fetchGit")] @@ -40,3 +219,71 @@ pub(crate) mod fetcher_builtins { Err(ErrorKind::NotImplemented("fetchGit")) } } + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use nix_compat::store_path::StorePath; + + use super::*; + + #[test] + fn fetchurl_store_path() { + let url = "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch"; + let sha256 = "0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax"; + let args = FetchArgs::new(url.into(), None, Some(sha256.into()), HashMode::Flat).unwrap(); + + assert_eq!( + args.store_path().unwrap().unwrap().to_owned(), + StorePath::from_str("06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap() + ) + } + + #[test] + fn fetch_tarball_store_path() { + let url = "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz"; + let sha256 = "1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm"; + let args = FetchArgs::new( + url.into(), + Some("source".into()), + Some(sha256.into()), + HashMode::Recursive, + ) + .unwrap(); + + assert_eq!( + args.store_path().unwrap().unwrap().to_owned(), + StorePath::from_str("7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap() + ) + } + + mod url_basename { + use super::*; + + #[test] + fn empty_path() { + assert_eq!(url_basename(""), ""); + } + + #[test] + fn path_on_root() { + assert_eq!(url_basename("/dir"), "dir"); + } + + #[test] + fn relative_path() { + assert_eq!(url_basename("dir/foo"), "foo"); + } + + #[test] + fn root_with_trailing_slash() { + assert_eq!(url_basename("/"), ""); + } + + #[test] + fn trailing_slash() { + assert_eq!(url_basename("/dir/"), "dir"); + } + } +} diff --git a/tvix/glue/src/builtins/mod.rs b/tvix/glue/src/builtins/mod.rs index c528bd46424b..138a52633da8 100644 --- a/tvix/glue/src/builtins/mod.rs +++ b/tvix/glue/src/builtins/mod.rs @@ -8,8 +8,9 @@ mod derivation; mod errors; mod fetchers; mod import; +mod utils; -pub use errors::DerivationError; +pub use errors::{DerivationError, FetcherError}; /// Adds derivation-related builtins to the passed [tvix_eval::Evaluation]. /// diff --git a/tvix/glue/src/builtins/utils.rs b/tvix/glue/src/builtins/utils.rs new file mode 100644 index 000000000000..586169beeb69 --- /dev/null +++ b/tvix/glue/src/builtins/utils.rs @@ -0,0 +1,36 @@ +use bstr::ByteSlice; +use tvix_eval::{ + generators::{self, GenCo}, + CatchableErrorKind, CoercionKind, ErrorKind, NixAttrs, NixString, Value, +}; + +pub(super) async fn strong_importing_coerce_to_string( + co: &GenCo, + val: Value, +) -> Result { + let val = generators::request_force(co, val).await; + generators::request_string_coerce( + co, + val, + CoercionKind { + strong: true, + import_paths: true, + }, + ) + .await +} + +pub(super) async fn select_string( + co: &GenCo, + attrs: &NixAttrs, + key: &str, +) -> Result, CatchableErrorKind>, ErrorKind> { + if let Some(attr) = attrs.select(key) { + match strong_importing_coerce_to_string(co, attr.clone()).await { + Err(cek) => return Ok(Err(cek)), + Ok(str) => return Ok(Ok(Some(str.to_str()?.to_owned()))), + } + } + + Ok(Ok(None)) +} diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index c09f0098e43e..30ab97c0ca03 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -4,7 +4,12 @@ use async_recursion::async_recursion; use bytes::Bytes; use futures::Stream; use futures::{StreamExt, TryStreamExt}; +use nix_compat::nixhash::NixHash; +use nix_compat::store_path::{build_ca_path, StorePathRef}; use nix_compat::{nixhash::CAHash, store_path::StorePath}; +use sha2::{Digest, Sha256}; +use std::marker::Unpin; +use std::rc::Rc; use std::{ cell::RefCell, collections::BTreeSet, @@ -15,17 +20,18 @@ use std::{ use tokio::io::AsyncReadExt; use tracing::{error, instrument, warn, Level}; use tvix_build::buildservice::BuildService; -use tvix_eval::{EvalIO, FileType, StdIO}; +use tvix_eval::{ErrorKind, EvalIO, FileType, StdIO}; use walkdir::DirEntry; use tvix_castore::{ blobservice::BlobService, directoryservice::{self, DirectoryService}, - proto::{node::Node, NamedNode}, + proto::{node::Node, FileNode, NamedNode}, B3Digest, }; use tvix_store::{pathinfoservice::PathInfoService, proto::PathInfo}; +use crate::builtins::FetcherError; use crate::known_paths::KnownPaths; use crate::tvix_build::derivation_to_build_request; @@ -51,7 +57,8 @@ pub struct TvixStoreIO { std_io: StdIO, #[allow(dead_code)] build_service: Arc, - tokio_handle: tokio::runtime::Handle, + pub(crate) tokio_handle: tokio::runtime::Handle, + http_client: reqwest::Client, pub(crate) known_paths: RefCell, } @@ -70,6 +77,7 @@ impl TvixStoreIO { std_io: StdIO {}, build_service, tokio_handle, + http_client: reqwest::Client::new(), known_paths: Default::default(), } } @@ -278,7 +286,7 @@ impl TvixStoreIO { /// with a [`tokio::runtime::Handle::block_on`] call for synchronicity. pub(crate) fn ingest_entries_sync(&self, entries_stream: S) -> io::Result where - S: Stream + std::marker::Unpin, + S: Stream + Unpin, { self.tokio_handle.block_on(async move { tvix_castore::import::ingest_entries( @@ -346,6 +354,97 @@ impl TvixStoreIO { .await }) } + + pub async fn store_path_exists<'a>(&'a self, store_path: StorePathRef<'a>) -> io::Result { + Ok(self + .path_info_service + .as_ref() + .get(*store_path.digest()) + .await? + .is_some()) + } + + pub async fn fetch_url( + &self, + url: &str, + name: &str, + hash: Option<&NixHash>, + ) -> Result { + let resp = self + .http_client + .get(url) + .send() + .await + .map_err(FetcherError::from)?; + let mut sha = Sha256::new(); + let mut data = tokio_util::io::StreamReader::new( + resp.bytes_stream() + .inspect_ok(|data| { + sha.update(data); + }) + .map_err(|e| { + let e = e.without_url(); + warn!(%e, "failed to get response body"); + io::Error::new(io::ErrorKind::BrokenPipe, e.to_string()) + }), + ); + + let mut blob = self.blob_service.open_write().await; + let size = tokio::io::copy(&mut data, blob.as_mut()).await?; + let blob_digest = blob.close().await?; + let got = NixHash::Sha256(sha.finalize().into()); + + let hash = CAHash::Flat(if let Some(wanted) = hash { + if *wanted != got { + return Err(FetcherError::HashMismatch { + url: url.to_owned(), + wanted: wanted.clone(), + got, + } + .into()); + } + wanted.clone() + } else { + got + }); + + let path = build_ca_path(name, &hash, Vec::::new(), false) + .map_err(|e| io::Error::new(io::ErrorKind::Other, e))?; + let node = Node::File(FileNode { + name: path.to_string().into(), + digest: blob_digest.into(), + size, + executable: false, + }); + + let (nar_size, nar_sha256) = self + .path_info_service + .calculate_nar(&node) + .await + .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?; + + let path_info = PathInfo { + node: Some(tvix_castore::proto::Node { + node: Some(node.clone()), + }), + references: vec![], + narinfo: Some(tvix_store::proto::NarInfo { + nar_size, + nar_sha256: nar_sha256.to_vec().into(), + signatures: vec![], + reference_names: vec![], + deriver: None, /* ? */ + ca: Some((&hash).into()), + }), + }; + + self.path_info_service + .put(path_info) + .await + .map_err(|e| std::io::Error::new(io::ErrorKind::Other, e))?; + + Ok(path.to_owned()) + } } impl EvalIO for TvixStoreIO { -- cgit 1.4.1