From 30950833c943c6b6b48d204ab0027f38af356f5c Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Mon, 22 Apr 2024 15:18:14 +0300 Subject: feat(tvix/glue/store_io): have KnownPaths track fetches too Have fetcher builtins call queue_fetch() whenever they don't need to fetch something immediately, and teach TvixStoreIO::store_path_to_node on how to look up (and call ingest_and persist on our Fetcher). Change-Id: Id4bd9d639fac9e4bee20c0b1c584148740b15c2f Reviewed-on: https://cl.tvl.fyi/c/depot/+/11501 Reviewed-by: raitobezarius Tested-by: BuildkiteCI Autosubmit: flokli --- tvix/glue/src/builtins/derivation.rs | 2 +- tvix/glue/src/builtins/fetchers.rs | 19 ++++-- tvix/glue/src/fetchers.rs | 2 +- tvix/glue/src/known_paths.rs | 111 ++++++++++++++++++++++++++++++++--- tvix/glue/src/tvix_store_io.rs | 32 +++++++++- 5 files changed, 147 insertions(+), 19 deletions(-) (limited to 'tvix') diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs index 1a8d18943e..8c7df96f91 100644 --- a/tvix/glue/src/builtins/derivation.rs +++ b/tvix/glue/src/builtins/derivation.rs @@ -475,7 +475,7 @@ pub(crate) mod derivation_builtins { .map_err(DerivationError::InvalidDerivation)?; // TODO: avoid cloning - known_paths.add(drv_path.clone(), drv.clone()); + known_paths.add_derivation(drv_path.clone(), drv.clone()); let mut new_attrs: Vec<(String, NixString)> = drv .outputs diff --git a/tvix/glue/src/builtins/fetchers.rs b/tvix/glue/src/builtins/fetchers.rs index ec5dd969bc..0de3d54625 100644 --- a/tvix/glue/src/builtins/fetchers.rs +++ b/tvix/glue/src/builtins/fetchers.rs @@ -87,11 +87,20 @@ pub(crate) mod fetcher_builtins { .map_err(|e| ErrorKind::TvixError(Rc::new(e)))? { Some(store_path) => { - let path = store_path.to_absolute_path().into(); - // TODO: add fetch to fetcher - drop(fetch); - - Ok(Value::Path(Box::new(path))) + // Move the fetch to KnownPaths, so it can be actually fetched later. + let sp = state + .known_paths + .borrow_mut() + .add_fetch(fetch, &name) + .expect("Tvix bug: should only fail if the store path cannot be calculated"); + + debug_assert_eq!( + sp, store_path, + "calculated store path by KnownPaths should match" + ); + + // Emit the calculated Store Path. + Ok(Value::Path(Box::new(store_path.to_absolute_path().into()))) } None => { // If we don't have enough info, do the fetch now. diff --git a/tvix/glue/src/fetchers.rs b/tvix/glue/src/fetchers.rs index 977be4a203..ee0111c88f 100644 --- a/tvix/glue/src/fetchers.rs +++ b/tvix/glue/src/fetchers.rs @@ -19,7 +19,7 @@ use tvix_store::{pathinfoservice::PathInfoService, proto::PathInfo}; use crate::{builtins::FetcherError, decompression::DecompressedReader}; /// Representing options for doing a fetch. -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub enum Fetch { /// Fetch a literal file from the given URL, with an optional expected /// NixHash of it. diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs index 9cd9470fa9..873efdcf8a 100644 --- a/tvix/glue/src/known_paths.rs +++ b/tvix/glue/src/known_paths.rs @@ -8,9 +8,14 @@ //! This data is required to find the derivation needed to actually trigger the //! build, if necessary. -use nix_compat::{derivation::Derivation, store_path::StorePath}; +use nix_compat::{ + derivation::Derivation, + store_path::{BuildStorePathError, StorePath, StorePathRef}, +}; use std::collections::HashMap; +use crate::fetchers::Fetch; + /// Struct keeping track of all known Derivations in the current evaluation. /// This keeps both the Derivation struct, as well as the "Hash derivation /// modulo". @@ -26,6 +31,9 @@ pub struct KnownPaths { /// Note that in the case of FODs, multiple drvs can produce the same output /// path. We use one of them. outputs_to_drvpath: HashMap, + + /// A map from output path to fetches (and their names). + outputs_to_fetches: HashMap, } impl KnownPaths { @@ -50,12 +58,12 @@ impl KnownPaths { self.outputs_to_drvpath.get(output_path) } - /// Insert a new Derivation into this struct. + /// Insert a new [Derivation] into this struct. /// The Derivation struct must pass validation, and its output paths need to /// be fully calculated. /// All input derivations this refers to must also be inserted to this /// struct. - pub fn add(&mut self, drv_path: StorePath, drv: Derivation) { + pub fn add_derivation(&mut self, drv_path: StorePath, drv: Derivation) { // check input derivations to have been inserted. #[cfg(debug_assertions)] { @@ -95,11 +103,39 @@ impl KnownPaths { } } } + + /// Insert a new [Fetch] into this struct, which *must* have an expected + /// hash (otherwise we wouldn't be able to calculate the store path). + /// Fetches without a known hash need to be fetched inside builtins. + pub fn add_fetch<'a>( + &mut self, + fetch: Fetch, + name: &'a str, + ) -> Result, BuildStorePathError> { + let store_path = fetch + .store_path(name)? + .expect("Tvix bug: fetch must have an expected hash"); + // insert the fetch. + self.outputs_to_fetches + .insert(store_path.to_owned(), (name.to_owned(), fetch)); + + Ok(store_path) + } + + /// Return the name and fetch producing the passed output path. + /// Note there can also be (multiple) Derivations producing the same output path. + pub fn get_fetch_for_output_path(&self, output_path: &StorePath) -> Option<(String, Fetch)> { + self.outputs_to_fetches + .get(output_path) + .map(|(name, fetch)| (name.to_owned(), fetch.to_owned())) + } } #[cfg(test)] mod tests { - use nix_compat::{derivation::Derivation, store_path::StorePath}; + use nix_compat::{derivation::Derivation, nixbase32, nixhash::NixHash, store_path::StorePath}; + + use crate::fetchers::Fetch; use super::KnownPaths; use hex_literal::hex; @@ -122,21 +158,33 @@ mod tests { StorePath::from_bytes(b"mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar").expect("must parse"); static ref FOO_OUT_PATH: StorePath = StorePath::from_bytes(b"fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo").expect("must parse"); + + static ref FETCH_URL : Fetch = Fetch::URL( + "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch".into(), + Some(NixHash::Sha256(nixbase32::decode_fixed("0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax").unwrap())) + ); + static ref FETCH_URL_OUT_PATH: StorePath = StorePath::from_bytes(b"06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap(); + + static ref FETCH_TARBALL : Fetch = Fetch::Tarball( + "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz".into(), + Some(nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm").unwrap()) + ); + static ref FETCH_TARBALL_OUT_PATH: StorePath = StorePath::from_bytes(b"7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap(); } /// ensure we don't allow acdding a Derivation that depends on another, /// not-yet-added Derivation. #[test] #[should_panic] - fn reject_if_missing_input_drv() { + fn drv_reject_if_missing_input_drv() { let mut known_paths = KnownPaths::default(); // FOO_DRV depends on BAR_DRV, which wasn't added. - known_paths.add(FOO_DRV_PATH.clone(), FOO_DRV.clone()); + known_paths.add_derivation(FOO_DRV_PATH.clone(), FOO_DRV.clone()); } #[test] - fn happy_path() { + fn drv_happy_path() { let mut known_paths = KnownPaths::default(); // get_drv_by_drvpath should return None for non-existing Derivations, @@ -149,7 +197,7 @@ mod tests { ); // Add BAR_DRV - known_paths.add(BAR_DRV_PATH.clone(), BAR_DRV.clone()); + known_paths.add_derivation(BAR_DRV_PATH.clone(), BAR_DRV.clone()); // We should get it back assert_eq!( @@ -173,7 +221,7 @@ mod tests { // Now insert FOO_DRV too. It shouldn't panic, as BAR_DRV is already // added. - known_paths.add(FOO_DRV_PATH.clone(), FOO_DRV.clone()); + known_paths.add_derivation(FOO_DRV_PATH.clone(), FOO_DRV.clone()); assert_eq!( Some(&FOO_DRV.clone()), @@ -192,4 +240,49 @@ mod tests { known_paths.get_drv_path_for_output_path(&FOO_OUT_PATH) ); } + + #[test] + fn fetch_happy_path() { + let mut known_paths = KnownPaths::default(); + + // get_fetch_for_output_path should return None for new fetches. + assert!(known_paths + .get_fetch_for_output_path(&FETCH_TARBALL_OUT_PATH) + .is_none()); + + // add_fetch should return the properly calculated store paths. + assert_eq!( + *FETCH_TARBALL_OUT_PATH, + known_paths + .add_fetch(FETCH_TARBALL.clone(), "source") + .unwrap() + .to_owned() + ); + + assert_eq!( + *FETCH_URL_OUT_PATH, + known_paths + .add_fetch(FETCH_URL.clone(), "notmuch-extract-patch") + .unwrap() + .to_owned() + ); + + // We should be able to get these fetches out, when asking for their out path. + let (got_name, got_fetch) = known_paths + .get_fetch_for_output_path(&FETCH_URL_OUT_PATH) + .expect("must be some"); + + assert_eq!("notmuch-extract-patch", got_name); + assert_eq!(FETCH_URL.clone(), got_fetch); + + // … multiple times. + let (got_name, got_fetch) = known_paths + .get_fetch_for_output_path(&FETCH_URL_OUT_PATH) + .expect("must be some"); + + assert_eq!("notmuch-extract-patch", got_name); + assert_eq!(FETCH_URL.clone(), got_fetch); + } + + // TODO: add test panicking about missing digest } diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs index 0994c44dfa..c7133e1d10 100644 --- a/tvix/glue/src/tvix_store_io.rs +++ b/tvix/glue/src/tvix_store_io.rs @@ -15,7 +15,7 @@ use std::{ sync::Arc, }; use tokio_util::io::SyncIoBridge; -use tracing::{error, instrument, warn, Level}; +use tracing::{error, info, instrument, warn, Level}; use tvix_build::buildservice::BuildService; use tvix_castore::proto::node::Node; use tvix_eval::{EvalIO, FileType, StdIO}; @@ -61,6 +61,7 @@ pub struct TvixStoreIO { pub(crate) fetcher: Fetcher, Arc, Arc>, + // Paths known how to produce, by building or fetching. pub(crate) known_paths: RefCell, } @@ -121,8 +122,31 @@ impl TvixStoreIO { // it for things like pointing to a store path. // In the future, these things will (need to) have PathInfo. None => { - // The store path doesn't exist yet, so we need to build it. - warn!("triggering build"); + // The store path doesn't exist yet, so we need to fetch or build it. + // We check for fetches first, as we might have both native + // fetchers and FODs in KnownPaths, and prefer the former. + + let maybe_fetch = self + .known_paths + .borrow() + .get_fetch_for_output_path(store_path); + + if let Some((name, fetch)) = maybe_fetch { + info!(?fetch, "triggering lazy fetch"); + let (sp, root_node) = self + .fetcher + .ingest_and_persist(&name, fetch) + .await + .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?; + + debug_assert_eq!( + sp.to_string(), + store_path.to_string(), + "store path returned from fetcher should match" + ); + + return Ok(Some(root_node)); + } // Look up the derivation for this output path. let (drv_path, drv) = { @@ -140,6 +164,8 @@ impl TvixStoreIO { } }; + warn!("triggering build"); + // derivation_to_build_request needs castore nodes for all inputs. // Provide them, which means, here is where we recursively build // all dependencies. -- cgit 1.4.1