about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorian Klink <flokli@flokli.de>2024-04-22T12·18+0300
committerclbot <clbot@tvl.fyi>2024-04-23T12·40+0000
commit30950833c943c6b6b48d204ab0027f38af356f5c (patch)
treeda2cd1c09a8568cb2b2916a4b2f0a294634c0c9c
parent091de12a9a735e71c119e543dab9f2999a36a5a1 (diff)
feat(tvix/glue/store_io): have KnownPaths track fetches too r/7994
Have fetcher builtins call queue_fetch() whenever they don't need to
fetch something immediately, and teach TvixStoreIO::store_path_to_node
on how to look up (and call ingest_and persist on our Fetcher).

Change-Id: Id4bd9d639fac9e4bee20c0b1c584148740b15c2f
Reviewed-on: https://cl.tvl.fyi/c/depot/+/11501
Reviewed-by: raitobezarius <tvl@lahfa.xyz>
Tested-by: BuildkiteCI
Autosubmit: flokli <flokli@flokli.de>
-rw-r--r--tvix/glue/src/builtins/derivation.rs2
-rw-r--r--tvix/glue/src/builtins/fetchers.rs19
-rw-r--r--tvix/glue/src/fetchers.rs2
-rw-r--r--tvix/glue/src/known_paths.rs111
-rw-r--r--tvix/glue/src/tvix_store_io.rs32
5 files changed, 147 insertions, 19 deletions
diff --git a/tvix/glue/src/builtins/derivation.rs b/tvix/glue/src/builtins/derivation.rs
index 1a8d18943ebe..8c7df96f91c1 100644
--- a/tvix/glue/src/builtins/derivation.rs
+++ b/tvix/glue/src/builtins/derivation.rs
@@ -475,7 +475,7 @@ pub(crate) mod derivation_builtins {
             .map_err(DerivationError::InvalidDerivation)?;
 
         // TODO: avoid cloning
-        known_paths.add(drv_path.clone(), drv.clone());
+        known_paths.add_derivation(drv_path.clone(), drv.clone());
 
         let mut new_attrs: Vec<(String, NixString)> = drv
             .outputs
diff --git a/tvix/glue/src/builtins/fetchers.rs b/tvix/glue/src/builtins/fetchers.rs
index ec5dd969bced..0de3d5462561 100644
--- a/tvix/glue/src/builtins/fetchers.rs
+++ b/tvix/glue/src/builtins/fetchers.rs
@@ -87,11 +87,20 @@ pub(crate) mod fetcher_builtins {
             .map_err(|e| ErrorKind::TvixError(Rc::new(e)))?
         {
             Some(store_path) => {
-                let path = store_path.to_absolute_path().into();
-                // TODO: add fetch to fetcher
-                drop(fetch);
-
-                Ok(Value::Path(Box::new(path)))
+                // Move the fetch to KnownPaths, so it can be actually fetched later.
+                let sp = state
+                    .known_paths
+                    .borrow_mut()
+                    .add_fetch(fetch, &name)
+                    .expect("Tvix bug: should only fail if the store path cannot be calculated");
+
+                debug_assert_eq!(
+                    sp, store_path,
+                    "calculated store path by KnownPaths should match"
+                );
+
+                // Emit the calculated Store Path.
+                Ok(Value::Path(Box::new(store_path.to_absolute_path().into())))
             }
             None => {
                 // If we don't have enough info, do the fetch now.
diff --git a/tvix/glue/src/fetchers.rs b/tvix/glue/src/fetchers.rs
index 977be4a203af..ee0111c88f4f 100644
--- a/tvix/glue/src/fetchers.rs
+++ b/tvix/glue/src/fetchers.rs
@@ -19,7 +19,7 @@ use tvix_store::{pathinfoservice::PathInfoService, proto::PathInfo};
 use crate::{builtins::FetcherError, decompression::DecompressedReader};
 
 /// Representing options for doing a fetch.
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, Eq, PartialEq)]
 pub enum Fetch {
     /// Fetch a literal file from the given URL, with an optional expected
     /// NixHash of it.
diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs
index 9cd9470fa949..873efdcf8a06 100644
--- a/tvix/glue/src/known_paths.rs
+++ b/tvix/glue/src/known_paths.rs
@@ -8,9 +8,14 @@
 //! This data is required to find the derivation needed to actually trigger the
 //! build, if necessary.
 
-use nix_compat::{derivation::Derivation, store_path::StorePath};
+use nix_compat::{
+    derivation::Derivation,
+    store_path::{BuildStorePathError, StorePath, StorePathRef},
+};
 use std::collections::HashMap;
 
+use crate::fetchers::Fetch;
+
 /// Struct keeping track of all known Derivations in the current evaluation.
 /// This keeps both the Derivation struct, as well as the "Hash derivation
 /// modulo".
@@ -26,6 +31,9 @@ pub struct KnownPaths {
     /// Note that in the case of FODs, multiple drvs can produce the same output
     /// path. We use one of them.
     outputs_to_drvpath: HashMap<StorePath, StorePath>,
+
+    /// A map from output path to fetches (and their names).
+    outputs_to_fetches: HashMap<StorePath, (String, Fetch)>,
 }
 
 impl KnownPaths {
@@ -50,12 +58,12 @@ impl KnownPaths {
         self.outputs_to_drvpath.get(output_path)
     }
 
-    /// Insert a new Derivation into this struct.
+    /// Insert a new [Derivation] into this struct.
     /// The Derivation struct must pass validation, and its output paths need to
     /// be fully calculated.
     /// All input derivations this refers to must also be inserted to this
     /// struct.
-    pub fn add(&mut self, drv_path: StorePath, drv: Derivation) {
+    pub fn add_derivation(&mut self, drv_path: StorePath, drv: Derivation) {
         // check input derivations to have been inserted.
         #[cfg(debug_assertions)]
         {
@@ -95,11 +103,39 @@ impl KnownPaths {
             }
         }
     }
+
+    /// Insert a new [Fetch] into this struct, which *must* have an expected
+    /// hash (otherwise we wouldn't be able to calculate the store path).
+    /// Fetches without a known hash need to be fetched inside builtins.
+    pub fn add_fetch<'a>(
+        &mut self,
+        fetch: Fetch,
+        name: &'a str,
+    ) -> Result<StorePathRef<'a>, BuildStorePathError> {
+        let store_path = fetch
+            .store_path(name)?
+            .expect("Tvix bug: fetch must have an expected hash");
+        // insert the fetch.
+        self.outputs_to_fetches
+            .insert(store_path.to_owned(), (name.to_owned(), fetch));
+
+        Ok(store_path)
+    }
+
+    /// Return the name and fetch producing the passed output path.
+    /// Note there can also be (multiple) Derivations producing the same output path.
+    pub fn get_fetch_for_output_path(&self, output_path: &StorePath) -> Option<(String, Fetch)> {
+        self.outputs_to_fetches
+            .get(output_path)
+            .map(|(name, fetch)| (name.to_owned(), fetch.to_owned()))
+    }
 }
 
 #[cfg(test)]
 mod tests {
-    use nix_compat::{derivation::Derivation, store_path::StorePath};
+    use nix_compat::{derivation::Derivation, nixbase32, nixhash::NixHash, store_path::StorePath};
+
+    use crate::fetchers::Fetch;
 
     use super::KnownPaths;
     use hex_literal::hex;
@@ -122,21 +158,33 @@ mod tests {
             StorePath::from_bytes(b"mp57d33657rf34lzvlbpfa1gjfv5gmpg-bar").expect("must parse");
         static ref FOO_OUT_PATH: StorePath =
             StorePath::from_bytes(b"fhaj6gmwns62s6ypkcldbaj2ybvkhx3p-foo").expect("must parse");
+
+        static ref FETCH_URL : Fetch = Fetch::URL(
+            "https://raw.githubusercontent.com/aaptel/notmuch-extract-patch/f732a53e12a7c91a06755ebfab2007adc9b3063b/notmuch-extract-patch".into(),
+            Some(NixHash::Sha256(nixbase32::decode_fixed("0nawkl04sj7psw6ikzay7kydj3dhd0fkwghcsf5rzaw4bmp4kbax").unwrap()))
+        );
+        static ref FETCH_URL_OUT_PATH: StorePath = StorePath::from_bytes(b"06qi00hylriyfm0nl827crgjvbax84mz-notmuch-extract-patch").unwrap();
+
+        static ref FETCH_TARBALL : Fetch = Fetch::Tarball(
+            "https://github.com/NixOS/nixpkgs/archive/91050ea1e57e50388fa87a3302ba12d188ef723a.tar.gz".into(),
+            Some(nixbase32::decode_fixed("1hf6cgaci1n186kkkjq106ryf8mmlq9vnwgfwh625wa8hfgdn4dm").unwrap())
+        );
+        static ref FETCH_TARBALL_OUT_PATH: StorePath = StorePath::from_bytes(b"7adgvk5zdfq4pwrhsm3n9lzypb12gw0g-source").unwrap();
     }
 
     /// ensure we don't allow acdding a Derivation that depends on another,
     /// not-yet-added Derivation.
     #[test]
     #[should_panic]
-    fn reject_if_missing_input_drv() {
+    fn drv_reject_if_missing_input_drv() {
         let mut known_paths = KnownPaths::default();
 
         // FOO_DRV depends on BAR_DRV, which wasn't added.
-        known_paths.add(FOO_DRV_PATH.clone(), FOO_DRV.clone());
+        known_paths.add_derivation(FOO_DRV_PATH.clone(), FOO_DRV.clone());
     }
 
     #[test]
-    fn happy_path() {
+    fn drv_happy_path() {
         let mut known_paths = KnownPaths::default();
 
         // get_drv_by_drvpath should return None for non-existing Derivations,
@@ -149,7 +197,7 @@ mod tests {
         );
 
         // Add BAR_DRV
-        known_paths.add(BAR_DRV_PATH.clone(), BAR_DRV.clone());
+        known_paths.add_derivation(BAR_DRV_PATH.clone(), BAR_DRV.clone());
 
         // We should get it back
         assert_eq!(
@@ -173,7 +221,7 @@ mod tests {
 
         // Now insert FOO_DRV too. It shouldn't panic, as BAR_DRV is already
         // added.
-        known_paths.add(FOO_DRV_PATH.clone(), FOO_DRV.clone());
+        known_paths.add_derivation(FOO_DRV_PATH.clone(), FOO_DRV.clone());
 
         assert_eq!(
             Some(&FOO_DRV.clone()),
@@ -192,4 +240,49 @@ mod tests {
             known_paths.get_drv_path_for_output_path(&FOO_OUT_PATH)
         );
     }
+
+    #[test]
+    fn fetch_happy_path() {
+        let mut known_paths = KnownPaths::default();
+
+        // get_fetch_for_output_path should return None for new fetches.
+        assert!(known_paths
+            .get_fetch_for_output_path(&FETCH_TARBALL_OUT_PATH)
+            .is_none());
+
+        // add_fetch should return the properly calculated store paths.
+        assert_eq!(
+            *FETCH_TARBALL_OUT_PATH,
+            known_paths
+                .add_fetch(FETCH_TARBALL.clone(), "source")
+                .unwrap()
+                .to_owned()
+        );
+
+        assert_eq!(
+            *FETCH_URL_OUT_PATH,
+            known_paths
+                .add_fetch(FETCH_URL.clone(), "notmuch-extract-patch")
+                .unwrap()
+                .to_owned()
+        );
+
+        // We should be able to get these fetches out, when asking for their out path.
+        let (got_name, got_fetch) = known_paths
+            .get_fetch_for_output_path(&FETCH_URL_OUT_PATH)
+            .expect("must be some");
+
+        assert_eq!("notmuch-extract-patch", got_name);
+        assert_eq!(FETCH_URL.clone(), got_fetch);
+
+        // … multiple times.
+        let (got_name, got_fetch) = known_paths
+            .get_fetch_for_output_path(&FETCH_URL_OUT_PATH)
+            .expect("must be some");
+
+        assert_eq!("notmuch-extract-patch", got_name);
+        assert_eq!(FETCH_URL.clone(), got_fetch);
+    }
+
+    // TODO: add test panicking about missing digest
 }
diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs
index 0994c44dfaff..c7133e1d1086 100644
--- a/tvix/glue/src/tvix_store_io.rs
+++ b/tvix/glue/src/tvix_store_io.rs
@@ -15,7 +15,7 @@ use std::{
     sync::Arc,
 };
 use tokio_util::io::SyncIoBridge;
-use tracing::{error, instrument, warn, Level};
+use tracing::{error, info, instrument, warn, Level};
 use tvix_build::buildservice::BuildService;
 use tvix_castore::proto::node::Node;
 use tvix_eval::{EvalIO, FileType, StdIO};
@@ -61,6 +61,7 @@ pub struct TvixStoreIO {
     pub(crate) fetcher:
         Fetcher<Arc<dyn BlobService>, Arc<dyn DirectoryService>, Arc<dyn PathInfoService>>,
 
+    // Paths known how to produce, by building or fetching.
     pub(crate) known_paths: RefCell<KnownPaths>,
 }
 
@@ -121,8 +122,31 @@ impl TvixStoreIO {
             // it for things like <nixpkgs> pointing to a store path.
             // In the future, these things will (need to) have PathInfo.
             None => {
-                // The store path doesn't exist yet, so we need to build it.
-                warn!("triggering build");
+                // The store path doesn't exist yet, so we need to fetch or build it.
+                // We check for fetches first, as we might have both native
+                // fetchers and FODs in KnownPaths, and prefer the former.
+
+                let maybe_fetch = self
+                    .known_paths
+                    .borrow()
+                    .get_fetch_for_output_path(store_path);
+
+                if let Some((name, fetch)) = maybe_fetch {
+                    info!(?fetch, "triggering lazy fetch");
+                    let (sp, root_node) = self
+                        .fetcher
+                        .ingest_and_persist(&name, fetch)
+                        .await
+                        .map_err(|e| std::io::Error::new(std::io::ErrorKind::InvalidData, e))?;
+
+                    debug_assert_eq!(
+                        sp.to_string(),
+                        store_path.to_string(),
+                        "store path returned from fetcher should match"
+                    );
+
+                    return Ok(Some(root_node));
+                }
 
                 // Look up the derivation for this output path.
                 let (drv_path, drv) = {
@@ -140,6 +164,8 @@ impl TvixStoreIO {
                     }
                 };
 
+                warn!("triggering build");
+
                 // derivation_to_build_request needs castore nodes for all inputs.
                 // Provide them, which means, here is where we recursively build
                 // all dependencies.