From de727bccf99a1dcce2bb335e56af02f80e462dbc Mon Sep 17 00:00:00 2001 From: Aspen Smith Date: Fri, 23 Feb 2024 10:09:20 -0500 Subject: feat(tvix/glue): Implement builtins.fetchurl Implement the fetchurl builtin, and lay the groundwork for implementing the fetchTarball builtin (which works very similarly, and is implemented using almost the same code in C++ nix). An overview of how this works: 1. First, we check if the store path that *would* result from the download already exists in the store - if it does, we just return that 2. If we need to download the URL, TvixStoreIO has an `http_client: reqwest::Client` field now which we use to make the request 3. As we're downloading the blob, we hash the data incrementally into a SHA256 hasher 4. We compare the hash against the expected hash (if any) and bail out if it doesn't match 5. Finally, we put the blob in the store and return the store path Since the logic is very similar, this commit also implements a *chunk* of `fetchTarball` (though the actual implementation will likely include a refactor to some of the code reuse here). The main thing that's missing here is caching of downloaded blobs when fetchurl is called without a hash - I've opened b/381 to track the TODO there. Adding the `SSL_CERT_FILE` here is necessary to teach reqwest how to load it during tests - see 1c16dee20 (feat(tvix/store): use reqwests' rustls-native-roots feature, 2024-03-03) for more info. Change-Id: I83c4abbc7c0c3bfe92461917e23d6d3430fbf137 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11017 Tested-by: BuildkiteCI Reviewed-by: flokli Autosubmit: aspen --- tvix/glue/src/builtins/fetchers.rs | 257 ++++++++++++++++++++++++++++++++++++- 1 file changed, 252 insertions(+), 5 deletions(-) (limited to 'tvix/glue/src/builtins/fetchers.rs') diff --git a/tvix/glue/src/builtins/fetchers.rs b/tvix/glue/src/builtins/fetchers.rs index 7a5d49a725d1..07074d93de11 100644 --- a/tvix/glue/src/builtins/fetchers.rs +++ b/tvix/glue/src/builtins/fetchers.rs @@ -1,9 +1,189 @@ //! Contains builtins that fetch paths from the Internet use crate::tvix_store_io::TvixStoreIO; +use bstr::ByteSlice; +use nix_compat::nixhash::{self, CAHash}; +use nix_compat::store_path::{build_ca_path, StorePathRef}; use std::rc::Rc; use tvix_eval::builtin_macros::builtins; -use tvix_eval::Value; +use tvix_eval::generators::GenCo; +use tvix_eval::{CatchableErrorKind, ErrorKind, NixContextElement, NixString, Value}; + +use super::utils::select_string; +use super::{DerivationError, FetcherError}; + +/// Attempts to mimic `nix::libutil::baseNameOf` +fn url_basename(s: &str) -> &str { + if s.is_empty() { + return ""; + } + + let mut last = s.len() - 1; + if s.chars().nth(last).unwrap() == '/' && last > 0 { + last -= 1; + } + + if last == 0 { + return ""; + } + + let pos = match s[..=last].rfind('/') { + Some(pos) => { + if pos == last - 1 { + 0 + } else { + pos + } + } + None => 0, + }; + + &s[(pos + 1)..=last] +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum HashMode { + Flat, + Recursive, +} + +/// Struct representing the arguments passed to fetcher functions +#[derive(Debug, PartialEq, Eq)] +struct FetchArgs { + url: String, + name: String, + hash: Option, +} + +impl FetchArgs { + pub fn new( + url: String, + name: Option, + sha256: Option, + mode: HashMode, + ) -> nixhash::Result { + Ok(Self { + name: name.unwrap_or_else(|| url_basename(&url).to_owned()), + url, + hash: sha256 + .map(|h| { + let hash = nixhash::from_str(&h, Some("sha256"))?; + Ok(match mode { + HashMode::Flat => Some(nixhash::CAHash::Flat(hash)), + HashMode::Recursive => Some(nixhash::CAHash::Nar(hash)), + }) + }) + .transpose()? + .flatten(), + }) + } + + fn store_path(&self) -> Result, ErrorKind> { + let Some(h) = &self.hash else { + return Ok(None); + }; + build_ca_path(&self.name, h, Vec::::new(), false) + .map(Some) + .map_err(|e| FetcherError::from(e).into()) + } + + async fn extract( + co: &GenCo, + args: Value, + default_name: Option<&str>, + mode: HashMode, + ) -> Result, ErrorKind> { + if let Ok(url) = args.to_str() { + return Ok(Ok(FetchArgs::new( + url.to_str()?.to_owned(), + None, + None, + mode, + ) + .map_err(DerivationError::InvalidOutputHash)?)); + } + + let attrs = args.to_attrs().map_err(|_| ErrorKind::TypeError { + expected: "attribute set or string", + actual: args.type_of(), + })?; + + let url = match select_string(co, &attrs, "url").await? { + Ok(s) => s.ok_or_else(|| ErrorKind::AttributeNotFound { name: "url".into() })?, + Err(cek) => return Ok(Err(cek)), + }; + let name = match select_string(co, &attrs, "name").await? { + Ok(s) => s.or_else(|| default_name.map(|s| s.to_owned())), + Err(cek) => return Ok(Err(cek)), + }; + let sha256 = match select_string(co, &attrs, "sha256").await? { + Ok(s) => s, + Err(cek) => return Ok(Err(cek)), + }; + + Ok(Ok( + FetchArgs::new(url, name, sha256, mode).map_err(DerivationError::InvalidOutputHash)? + )) + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum FetchMode { + Url, + Tarball, +} + +impl From for HashMode { + fn from(value: FetchMode) -> Self { + match value { + FetchMode::Url => HashMode::Flat, + FetchMode::Tarball => HashMode::Recursive, + } + } +} + +impl FetchMode { + fn default_name(self) -> Option<&'static str> { + match self { + FetchMode::Url => None, + FetchMode::Tarball => Some("source"), + } + } +} + +fn string_from_store_path(store_path: StorePathRef) -> NixString { + NixString::new_context_from( + NixContextElement::Plain(store_path.to_absolute_path()).into(), + store_path.to_absolute_path(), + ) +} + +async fn fetch( + state: Rc, + co: GenCo, + args: Value, + mode: FetchMode, +) -> Result { + let args = match FetchArgs::extract(&co, args, mode.default_name(), mode.into()).await? { + Ok(args) => args, + Err(cek) => return Ok(cek.into()), + }; + + if let Some(store_path) = args.store_path()? { + if state.store_path_exists(store_path).await? { + return Ok(string_from_store_path(store_path).into()); + } + } + + let hash = args.hash.as_ref().map(|h| h.hash()); + let store_path = Rc::clone(&state).tokio_handle.block_on(state.fetch_url( + &args.url, + &args.name, + hash.as_deref(), + ))?; + + Ok(string_from_store_path(store_path.as_ref()).into()) +} #[allow(unused_variables)] // for the `state` arg, for now #[builtins(state = "Rc")] @@ -11,15 +191,14 @@ pub(crate) mod fetcher_builtins { use super::*; use tvix_eval::generators::Gen; - use tvix_eval::{generators::GenCo, ErrorKind}; #[builtin("fetchurl")] async fn builtin_fetchurl( state: Rc, co: GenCo, - url: Value, + args: Value, ) -> Result { - Err(ErrorKind::NotImplemented("fetchurl")) + fetch(state, co, args, FetchMode::Url).await } #[builtin("fetchTarball")] @@ -28,7 +207,7 @@ pub(crate) mod fetcher_builtins { co: GenCo, args: Value, ) -> Result { - Err(ErrorKind::NotImplemented("fetchTarball")) + fetch(state, co, args, FetchMode::Tarball).await } #[builtin("fetchGit")] @@ -40,3 +219,71 @@ pub(crate) mod fetcher_builtins { Err(ErrorKind::NotImplemented("fetchGit")) } } + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use nix_compat::store_path::StorePath; + + use super::*; + + #[test] + fn fetchurl_store_path() { + let url = "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch"; + let sha256 = "0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax"; + let args = FetchArgs::new(url.into(), None, Some(sha256.into()), HashMode::Flat).unwrap(); + + assert_eq!( + args.store_path().unwrap().unwrap().to_owned(), + StorePath::from_str("06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap() + ) + } + + #[test] + fn fetch_tarball_store_path() { + let url = "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz"; + let sha256 = "1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm"; + let args = FetchArgs::new( + url.into(), + Some("source".into()), + Some(sha256.into()), + HashMode::Recursive, + ) + .unwrap(); + + assert_eq!( + args.store_path().unwrap().unwrap().to_owned(), + StorePath::from_str("7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap() + ) + } + + mod url_basename { + use super::*; + + #[test] + fn empty_path() { + assert_eq!(url_basename(""), ""); + } + + #[test] + fn path_on_root() { + assert_eq!(url_basename("/dir"), "dir"); + } + + #[test] + fn relative_path() { + assert_eq!(url_basename("dir/foo"), "foo"); + } + + #[test] + fn root_with_trailing_slash() { + assert_eq!(url_basename("/"), ""); + } + + #[test] + fn trailing_slash() { + assert_eq!(url_basename("/dir/"), "dir"); + } + } +} -- cgit 1.4.1