diff options
author | Florian Klink <flokli@flokli.de> | 2024-04-22T11·02+0300 |
---|---|---|
committer | clbot <clbot@tvl.fyi> | 2024-04-23T12·30+0000 |
commit | 091de12a9a735e71c119e543dab9f2999a36a5a1 (patch) | |
tree | 8df65d32934e026cec80256226da031954f715c1 /tvix/glue/src/builtins/fetchers.rs | |
parent | dc444e55dcb75a634bce94fef9e29d90ea90fe5f (diff) |
refactor(tvix/glue): move Fetch[er] into its own types, fetch lazily r/7993
We actually want to delay fetching until we actually need the file. A simple evaluation asking for `.outPath` or `.drvPath` should work even in a pure offline environment. Before this CL, the fetching logic was quite distributed between tvix_store_io, and builtins/fetchers.rs. Rather than having various functions and conversions between structs, describe a Fetch as an enum type, with the fields describing the fetch. Define a store_path() function on top of `Fetch` which can be used to ask for the calculated store path (if the digest has been provided upfront). Have a `Fetcher` struct, and give it a `fetch_and_persist` function, taking a `Fetch` as well as a desired name, and have it deal with all the logic of persisting the PathInfos. It also returns a StorePathRef, similar to the `.store_path()` method on a `Fetch` struct. In a followup CL, we can extend KnownPaths to track fetches AND derivations, and then use `Fetcher` when we need to do IO into that store path. Change-Id: Ib39a96baeb661750a8706b461f8ba4abb342e777 Reviewed-on: https://cl.tvl.fyi/c/depot/+/11500 Reviewed-by: raitobezarius <tvl@lahfa.xyz> Autosubmit: flokli <flokli@flokli.de> Tested-by: BuildkiteCI
Diffstat (limited to 'tvix/glue/src/builtins/fetchers.rs')
-rw-r--r-- | tvix/glue/src/builtins/fetchers.rs | 355 |
1 files changed, 109 insertions, 246 deletions
diff --git a/tvix/glue/src/builtins/fetchers.rs b/tvix/glue/src/builtins/fetchers.rs index d5735b7d09a7..ec5dd969bced 100644 --- a/tvix/glue/src/builtins/fetchers.rs +++ b/tvix/glue/src/builtins/fetchers.rs @@ -1,197 +1,74 @@ //! Contains builtins that fetch paths from the Internet -use crate::tvix_store_io::TvixStoreIO; -use bstr::ByteSlice; -use nix_compat::nixhash::{self, CAHash}; -use nix_compat::store_path::{build_ca_path, StorePathRef}; +use super::utils::select_string; +use crate::{ + fetchers::{url_basename, Fetch}, + tvix_store_io::TvixStoreIO, +}; +use nix_compat::nixhash; +use nix_compat::nixhash::NixHash; use std::rc::Rc; +use tracing::info; use tvix_eval::builtin_macros::builtins; +use tvix_eval::generators::Gen; use tvix_eval::generators::GenCo; -use tvix_eval::{CatchableErrorKind, ErrorKind, NixContextElement, NixString, Value}; - -use super::utils::select_string; -use super::{DerivationError, FetcherError}; - -/// Attempts to mimic `nix::libutil::baseNameOf` -fn url_basename(s: &str) -> &str { - if s.is_empty() { - return ""; - } - - let mut last = s.len() - 1; - if s.chars().nth(last).unwrap() == '/' && last > 0 { - last -= 1; - } - - if last == 0 { - return ""; - } +use tvix_eval::{CatchableErrorKind, ErrorKind, Value}; - let pos = match s[..=last].rfind('/') { - Some(pos) => { - if pos == last - 1 { - 0 - } else { - pos - } - } - None => 0, - }; - - &s[(pos + 1)..=last] -} - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum HashMode { - Flat, - Recursive, -} - -/// Struct representing the arguments passed to fetcher functions -#[derive(Debug, PartialEq, Eq)] -struct FetchArgs { +struct NixFetchArgs { url: String, - name: String, - hash: Option<CAHash>, + name: Option<String>, + sha256: Option<[u8; 32]>, } -impl FetchArgs { - pub fn new( - url: String, - name: Option<String>, - sha256: Option<String>, - mode: HashMode, - ) -> nixhash::NixHashResult<Self> { - Ok(Self { - name: name.unwrap_or_else(|| url_basename(&url).to_owned()), +// `fetchurl` and `fetchTarball` accept a single argument, which can either be the URL (as string), +// or an attrset, where `url`, `sha256` and `name` keys are allowed. +async fn extract_fetch_args( + co: &GenCo, + args: Value, +) -> Result<Result<NixFetchArgs, CatchableErrorKind>, ErrorKind> { + if let Ok(url) = args.to_str() { + // Get the raw bytes, not the ToString repr. + let url = String::from_utf8(url.as_bytes().to_vec()).map_err(|_| ErrorKind::Utf8)?; + return Ok(Ok(NixFetchArgs { url, - hash: sha256 - .map(|h| { - let hash = nixhash::from_str(&h, Some("sha256"))?; - Ok(match mode { - HashMode::Flat => Some(nixhash::CAHash::Flat(hash)), - HashMode::Recursive => Some(nixhash::CAHash::Nar(hash)), - }) - }) - .transpose()? - .flatten(), - }) + name: None, + sha256: None, + })); } - fn store_path(&self) -> Result<Option<StorePathRef>, ErrorKind> { - let Some(h) = &self.hash else { - return Ok(None); - }; - build_ca_path(&self.name, h, Vec::<String>::new(), false) - .map(Some) - .map_err(|e| FetcherError::from(e).into()) - } - - async fn extract( - co: &GenCo, - args: Value, - default_name: Option<&str>, - mode: HashMode, - ) -> Result<Result<Self, CatchableErrorKind>, ErrorKind> { - if let Ok(url) = args.to_str() { - return Ok(Ok(FetchArgs::new( - url.to_str()?.to_owned(), - None, - None, - mode, - ) - .map_err(DerivationError::InvalidOutputHash)?)); - } - - let attrs = args.to_attrs().map_err(|_| ErrorKind::TypeError { - expected: "attribute set or string", - actual: args.type_of(), - })?; - - let url = match select_string(co, &attrs, "url").await? { - Ok(s) => s.ok_or_else(|| ErrorKind::AttributeNotFound { name: "url".into() })?, - Err(cek) => return Ok(Err(cek)), - }; - let name = match select_string(co, &attrs, "name").await? { - Ok(s) => s.or_else(|| default_name.map(|s| s.to_owned())), - Err(cek) => return Ok(Err(cek)), - }; - let sha256 = match select_string(co, &attrs, "sha256").await? { - Ok(s) => s, - Err(cek) => return Ok(Err(cek)), - }; + let attrs = args.to_attrs().map_err(|_| ErrorKind::TypeError { + expected: "attribute set or contextless string", + actual: args.type_of(), + })?; - Ok(Ok( - FetchArgs::new(url, name, sha256, mode).map_err(DerivationError::InvalidOutputHash)? - )) - } -} + let url = match select_string(co, &attrs, "url").await? { + Ok(s) => s.ok_or_else(|| ErrorKind::AttributeNotFound { name: "url".into() })?, + Err(cek) => return Ok(Err(cek)), + }; + let name = match select_string(co, &attrs, "name").await? { + Ok(s) => s, + Err(cek) => return Ok(Err(cek)), + }; + let sha256_str = match select_string(co, &attrs, "sha256").await? { + Ok(s) => s, + Err(cek) => return Ok(Err(cek)), + }; -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum FetchMode { - Url, - Tarball, -} + // TODO: disallow other attrset keys, to match Nix' behaviour. -impl From<FetchMode> for HashMode { - fn from(value: FetchMode) -> Self { - match value { - FetchMode::Url => HashMode::Flat, - FetchMode::Tarball => HashMode::Recursive, - } - } -} + // parse the sha256 string into a digest. + let sha256 = match sha256_str { + Some(sha256_str) => { + let nixhash = nixhash::from_str(&sha256_str, Some("sha256")) + // TODO: DerivationError::InvalidOutputHash should be moved to ErrorKind::InvalidHash and used here instead + .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?; -impl FetchMode { - fn default_name(self) -> Option<&'static str> { - match self { - FetchMode::Url => None, - FetchMode::Tarball => Some("source"), + Some(nixhash.digest_as_bytes().try_into().expect("is sha256")) } - } -} - -fn string_from_store_path(store_path: StorePathRef) -> NixString { - NixString::new_context_from( - NixContextElement::Plain(store_path.to_absolute_path()).into(), - store_path.to_absolute_path(), - ) -} - -async fn fetch( - state: Rc<TvixStoreIO>, - co: GenCo, - args: Value, - mode: FetchMode, -) -> Result<Value, ErrorKind> { - let args = match FetchArgs::extract(&co, args, mode.default_name(), mode.into()).await? { - Ok(args) => args, - Err(cek) => return Ok(cek.into()), + None => None, }; - if let Some(store_path) = args.store_path()? { - if state.store_path_exists(store_path).await? { - return Ok(string_from_store_path(store_path).into()); - } - } - - let ca = args.hash; - let store_path = Rc::clone(&state).tokio_handle.block_on(async move { - match mode { - FetchMode::Url => { - state - .fetch_url( - &args.url, - &args.name, - ca.as_ref().map(|c| c.hash().into_owned()).as_ref(), - ) - .await - } - FetchMode::Tarball => state.fetch_tarball(&args.url, &args.name, ca).await, - } - })?; - - Ok(string_from_store_path(store_path.as_ref()).into()) + Ok(Ok(NixFetchArgs { url, name, sha256 })) } #[allow(unused_variables)] // for the `state` arg, for now @@ -199,7 +76,36 @@ async fn fetch( pub(crate) mod fetcher_builtins { use super::*; - use tvix_eval::generators::Gen; + /// Consumes a fetch. + /// If there is enough info to calculate the store path without fetching, + /// queue the fetch to be fetched lazily, and return the store path. + /// If there's not enough info to calculate it, do the fetch now, and then + /// return the store path. + fn fetch_lazy(state: Rc<TvixStoreIO>, name: String, fetch: Fetch) -> Result<Value, ErrorKind> { + match fetch + .store_path(&name) + .map_err(|e| ErrorKind::TvixError(Rc::new(e)))? + { + Some(store_path) => { + let path = store_path.to_absolute_path().into(); + // TODO: add fetch to fetcher + drop(fetch); + + Ok(Value::Path(Box::new(path))) + } + None => { + // If we don't have enough info, do the fetch now. + info!(?fetch, "triggering required fetch"); + + let (store_path, _root_node) = state + .tokio_handle + .block_on(async { state.fetcher.ingest_and_persist(&name, fetch).await }) + .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?; + + Ok(Value::Path(Box::new(store_path.to_absolute_path().into()))) + } + } + } #[builtin("fetchurl")] async fn builtin_fetchurl( @@ -207,7 +113,21 @@ pub(crate) mod fetcher_builtins { co: GenCo, args: Value, ) -> Result<Value, ErrorKind> { - fetch(state, co, args, FetchMode::Url).await + let args = match extract_fetch_args(&co, args).await? { + Ok(args) => args, + Err(cek) => return Ok(Value::from(cek)), + }; + + // Derive the name from the URL basename if not set explicitly. + let name = args + .name + .unwrap_or_else(|| url_basename(&args.url).to_owned()); + + fetch_lazy( + state, + name, + Fetch::URL(args.url, args.sha256.map(NixHash::Sha256)), + ) } #[builtin("fetchTarball")] @@ -216,7 +136,18 @@ pub(crate) mod fetcher_builtins { co: GenCo, args: Value, ) -> Result<Value, ErrorKind> { - fetch(state, co, args, FetchMode::Tarball).await + let args = match extract_fetch_args(&co, args).await? { + Ok(args) => args, + Err(cek) => return Ok(Value::from(cek)), + }; + + // Name defaults to "source" if not set explicitly. + const DEFAULT_NAME_FETCH_TARBALL: &str = "source"; + let name = args + .name + .unwrap_or_else(|| DEFAULT_NAME_FETCH_TARBALL.to_owned()); + + fetch_lazy(state, name, Fetch::Tarball(args.url, args.sha256)) } #[builtin("fetchGit")] @@ -228,71 +159,3 @@ pub(crate) mod fetcher_builtins { Err(ErrorKind::NotImplemented("fetchGit")) } } - -#[cfg(test)] -mod tests { - use std::str::FromStr; - - use nix_compat::store_path::StorePath; - - use super::*; - - #[test] - fn fetchurl_store_path() { - let url = "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch"; - let sha256 = "0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax"; - let args = FetchArgs::new(url.into(), None, Some(sha256.into()), HashMode::Flat).unwrap(); - - assert_eq!( - args.store_path().unwrap().unwrap().to_owned(), - StorePath::from_str("06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap() - ) - } - - #[test] - fn fetch_tarball_store_path() { - let url = "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz"; - let sha256 = "1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm"; - let args = FetchArgs::new( - url.into(), - Some("source".into()), - Some(sha256.into()), - HashMode::Recursive, - ) - .unwrap(); - - assert_eq!( - args.store_path().unwrap().unwrap().to_owned(), - StorePath::from_str("7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap() - ) - } - - mod url_basename { - use super::*; - - #[test] - fn empty_path() { - assert_eq!(url_basename(""), ""); - } - - #[test] - fn path_on_root() { - assert_eq!(url_basename("/dir"), "dir"); - } - - #[test] - fn relative_path() { - assert_eq!(url_basename("dir/foo"), "foo"); - } - - #[test] - fn root_with_trailing_slash() { - assert_eq!(url_basename("/"), ""); - } - - #[test] - fn trailing_slash() { - assert_eq!(url_basename("/dir/"), "dir"); - } - } -} |