about summary refs log tree commit diff
path: root/tvix/glue
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/glue')
-rw-r--r--tvix/glue/Cargo.toml20
-rw-r--r--tvix/glue/default.nix5
-rw-r--r--tvix/glue/src/.skip-subtree1
-rw-r--r--tvix/glue/src/derivation.nix36
-rw-r--r--tvix/glue/src/derivation.rs601
-rw-r--r--tvix/glue/src/errors.rs26
-rw-r--r--tvix/glue/src/fetchurl.nix53
-rw-r--r--tvix/glue/src/known_paths.rs186
-rw-r--r--tvix/glue/src/lib.rs28
-rw-r--r--tvix/glue/src/refscan.rs115
-rw-r--r--tvix/glue/src/tvix_io.rs80
-rw-r--r--tvix/glue/src/tvix_store_io.rs359
12 files changed, 1510 insertions, 0 deletions
diff --git a/tvix/glue/Cargo.toml b/tvix/glue/Cargo.toml
new file mode 100644
index 000000000000..4ebfda870399
--- /dev/null
+++ b/tvix/glue/Cargo.toml
@@ -0,0 +1,20 @@
+[package]
+name = "tvix-glue"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+nix-compat = { path = "../nix-compat" }
+tvix-eval = { path = "../eval" }
+tvix-castore = { path = "../castore" }
+tvix-store = { path = "../store", default-features = false, features = []}
+bytes = "1.4.0"
+tracing = "0.1.37"
+tokio = "1.28.0"
+thiserror = "1.0.38"
+
+[dependencies.wu-manber]
+git = "https://github.com/tvlfyi/wu-manber.git"
+
+[dev-dependencies]
+test-case = "2.2.2"
diff --git a/tvix/glue/default.nix b/tvix/glue/default.nix
new file mode 100644
index 000000000000..975b0a5a7007
--- /dev/null
+++ b/tvix/glue/default.nix
@@ -0,0 +1,5 @@
+{ depot, ... }:
+
+(depot.tvix.crates.workspaceMembers.tvix-glue.build.override {
+  runTests = true;
+})
diff --git a/tvix/glue/src/.skip-subtree b/tvix/glue/src/.skip-subtree
new file mode 100644
index 000000000000..a16a2afe1f1e
--- /dev/null
+++ b/tvix/glue/src/.skip-subtree
@@ -0,0 +1 @@
+Because of the derivation.nix file ...
diff --git a/tvix/glue/src/derivation.nix b/tvix/glue/src/derivation.nix
new file mode 100644
index 000000000000..9355cc3a96f0
--- /dev/null
+++ b/tvix/glue/src/derivation.nix
@@ -0,0 +1,36 @@
+# LGPL-2.1-or-later
+#
+# taken from: https://github.com/NixOS/nix/blob/master/src/libexpr/primops/derivation.nix
+#
+# TODO: rewrite in native Rust code
+
+/* This is the implementation of the ‘derivation’ builtin function.
+   It's actually a wrapper around the ‘derivationStrict’ primop. */
+
+drvAttrs @ { outputs ? [ "out" ], ... }:
+
+let
+
+  strict = derivationStrict drvAttrs;
+
+  commonAttrs = drvAttrs // (builtins.listToAttrs outputsList) //
+    {
+      all = map (x: x.value) outputsList;
+      inherit drvAttrs;
+    };
+
+  outputToAttrListElement = outputName:
+    {
+      name = outputName;
+      value = commonAttrs // {
+        outPath = builtins.getAttr outputName strict;
+        drvPath = strict.drvPath;
+        type = "derivation";
+        inherit outputName;
+      };
+    };
+
+  outputsList = map outputToAttrListElement outputs;
+
+in
+(builtins.head outputsList).value
diff --git a/tvix/glue/src/derivation.rs b/tvix/glue/src/derivation.rs
new file mode 100644
index 000000000000..86a271a3966d
--- /dev/null
+++ b/tvix/glue/src/derivation.rs
@@ -0,0 +1,601 @@
+//! Implements `builtins.derivation`, the core of what makes Nix build packages.
+use nix_compat::derivation::{Derivation, Output};
+use nix_compat::nixhash;
+use std::cell::RefCell;
+use std::collections::{btree_map, BTreeSet};
+use std::rc::Rc;
+use tvix_eval::builtin_macros::builtins;
+use tvix_eval::generators::{self, emit_warning_kind, GenCo};
+use tvix_eval::{
+    AddContext, CatchableErrorKind, CoercionKind, ErrorKind, NixAttrs, NixList, Value, WarningKind,
+};
+
+use crate::errors::Error;
+use crate::known_paths::{KnownPaths, PathKind, PathName};
+
+// Constants used for strangely named fields in derivation inputs.
+const STRUCTURED_ATTRS: &str = "__structuredAttrs";
+const IGNORE_NULLS: &str = "__ignoreNulls";
+
+/// Helper function for populating the `drv.outputs` field from a
+/// manually specified set of outputs, instead of the default
+/// `outputs`.
+async fn populate_outputs(
+    co: &GenCo,
+    drv: &mut Derivation,
+    outputs: NixList,
+) -> Result<(), ErrorKind> {
+    // Remove the original default `out` output.
+    drv.outputs.clear();
+
+    for output in outputs {
+        let output_name = generators::request_force(co, output)
+            .await
+            .to_str()
+            .context("determining output name")?;
+
+        if drv
+            .outputs
+            .insert(output_name.as_str().into(), Default::default())
+            .is_some()
+        {
+            return Err(Error::DuplicateOutput(output_name.as_str().into()).into());
+        }
+    }
+
+    Ok(())
+}
+
+/// Populate the inputs of a derivation from the build references
+/// found when scanning the derivation's parameters.
+fn populate_inputs<I: IntoIterator<Item = PathName>>(
+    drv: &mut Derivation,
+    known_paths: &KnownPaths,
+    references: I,
+) {
+    for reference in references.into_iter() {
+        let reference = &known_paths[&reference];
+        match &reference.kind {
+            PathKind::Plain => {
+                drv.input_sources.insert(reference.path.clone());
+            }
+
+            PathKind::Output { name, derivation } => {
+                match drv.input_derivations.entry(derivation.clone()) {
+                    btree_map::Entry::Vacant(entry) => {
+                        entry.insert(BTreeSet::from([name.clone()]));
+                    }
+
+                    btree_map::Entry::Occupied(mut entry) => {
+                        entry.get_mut().insert(name.clone());
+                    }
+                }
+            }
+
+            PathKind::Derivation { output_names } => {
+                match drv.input_derivations.entry(reference.path.clone()) {
+                    btree_map::Entry::Vacant(entry) => {
+                        entry.insert(output_names.clone());
+                    }
+
+                    btree_map::Entry::Occupied(mut entry) => {
+                        entry.get_mut().extend(output_names.clone().into_iter());
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// Populate the output configuration of a derivation based on the
+/// parameters passed to the call, configuring a fixed-output derivation output
+/// if necessary.
+///
+/// This function handles all possible combinations of the
+/// parameters, including invalid ones.
+///
+/// Due to the support for SRI hashes, and how these are passed along to
+/// builtins.derivation, outputHash and outputHashAlgo can have values which
+/// need to be further modified before constructing the Derivation struct.
+///
+/// If outputHashAlgo is an SRI hash, outputHashAlgo must either be an empty
+/// string, or the hash algorithm as specified in the (single) SRI (entry).
+/// SRI strings with multiple hash algorithms are not supported.
+///
+/// In case an SRI string was used, the (single) fixed output is populated
+/// with the hash algo name, and the hash digest is populated with the
+/// (lowercase) hex encoding of the digest.
+///
+/// These values are only rewritten for the outputs, not what's passed to env.
+fn handle_fixed_output(
+    drv: &mut Derivation,
+    hash_str: Option<String>,      // in nix: outputHash
+    hash_algo_str: Option<String>, // in nix: outputHashAlgo
+    hash_mode_str: Option<String>, // in nix: outputHashmode
+) -> Result<(), ErrorKind> {
+    // If outputHash is provided, ensure hash_algo_str is compatible.
+    // If outputHash is not provided, do nothing.
+    if let Some(hash_str) = hash_str {
+        // treat an empty algo as None
+        let hash_algo_str = match hash_algo_str {
+            Some(s) if s.is_empty() => None,
+            Some(s) => Some(s),
+            None => None,
+        };
+
+        // construct a NixHash.
+        let nixhash = nixhash::from_str(&hash_str, hash_algo_str.as_deref())
+            .map_err(Error::InvalidOutputHash)?;
+
+        // construct the fixed output.
+        drv.outputs.insert(
+            "out".to_string(),
+            Output {
+                path: "".to_string(),
+                ca_hash: match hash_mode_str.as_deref() {
+                    None | Some("flat") => Some(nixhash::CAHash::Flat(nixhash)),
+                    Some("recursive") => Some(nixhash::CAHash::Nar(nixhash)),
+                    Some(other) => return Err(Error::InvalidOutputHashMode(other.to_string()))?,
+                },
+            },
+        );
+    }
+    Ok(())
+}
+
+/// Handles derivation parameters which are not just forwarded to
+/// the environment. The return value indicates whether the
+/// parameter should be included in the environment.
+async fn handle_derivation_parameters(
+    drv: &mut Derivation,
+    co: &GenCo,
+    name: &str,
+    value: &Value,
+    val_str: &str,
+) -> Result<Result<bool, CatchableErrorKind>, ErrorKind> {
+    match name {
+        IGNORE_NULLS => return Ok(Ok(false)),
+
+        // Command line arguments to the builder.
+        "args" => {
+            let args = value.to_list()?;
+            for arg in args {
+                match strong_coerce_to_string(co, arg).await? {
+                    Err(cek) => return Ok(Err(cek)),
+                    Ok(s) => drv.arguments.push(s),
+                }
+            }
+
+            // The arguments do not appear in the environment.
+            return Ok(Ok(false));
+        }
+
+        // Explicitly specified drv outputs (instead of default [ "out" ])
+        "outputs" => {
+            let outputs = value
+                .to_list()
+                .context("looking at the `outputs` parameter of the derivation")?;
+
+            populate_outputs(co, drv, outputs).await?;
+        }
+
+        "builder" => {
+            drv.builder = val_str.to_string();
+        }
+
+        "system" => {
+            drv.system = val_str.to_string();
+        }
+
+        _ => {}
+    }
+
+    Ok(Ok(true))
+}
+
+async fn strong_coerce_to_string(
+    co: &GenCo,
+    val: Value,
+) -> Result<Result<String, CatchableErrorKind>, ErrorKind> {
+    let val = generators::request_force(co, val).await;
+    match generators::request_string_coerce(co, val, CoercionKind::Strong).await {
+        Err(cek) => Ok(Err(cek)),
+        Ok(val_str) => Ok(Ok(val_str.as_str().to_string())),
+    }
+}
+
+#[builtins(state = "Rc<RefCell<KnownPaths>>")]
+mod derivation_builtins {
+    use super::*;
+    use nix_compat::store_path::hash_placeholder;
+    use tvix_eval::generators::Gen;
+
+    #[builtin("placeholder")]
+    async fn builtin_placeholder(co: GenCo, input: Value) -> Result<Value, ErrorKind> {
+        let placeholder = hash_placeholder(
+            input
+                .to_str()
+                .context("looking at output name in builtins.placeholder")?
+                .as_str(),
+        );
+
+        Ok(placeholder.into())
+    }
+
+    /// Strictly construct a Nix derivation from the supplied arguments.
+    ///
+    /// This is considered an internal function, users usually want to
+    /// use the higher-level `builtins.derivation` instead.
+    #[builtin("derivationStrict")]
+    async fn builtin_derivation_strict(
+        state: Rc<RefCell<KnownPaths>>,
+        co: GenCo,
+        input: Value,
+    ) -> Result<Value, ErrorKind> {
+        let input = input.to_attrs()?;
+        let name = generators::request_force(&co, input.select_required("name")?.clone())
+            .await
+            .to_str()
+            .context("determining derivation name")?;
+
+        if name.is_empty() {
+            return Err(ErrorKind::Abort("derivation has empty name".to_string()));
+        }
+
+        // Check whether attributes should be passed as a JSON file.
+        // TODO: the JSON serialisation has to happen here.
+        if let Some(sa) = input.select(STRUCTURED_ATTRS) {
+            if generators::request_force(&co, sa.clone()).await.as_bool()? {
+                return Err(ErrorKind::NotImplemented(STRUCTURED_ATTRS));
+            }
+        }
+
+        // Check whether null attributes should be ignored or passed through.
+        let ignore_nulls = match input.select(IGNORE_NULLS) {
+            Some(b) => generators::request_force(&co, b.clone()).await.as_bool()?,
+            None => false,
+        };
+
+        let mut drv = Derivation::default();
+        drv.outputs.insert("out".to_string(), Default::default());
+
+        async fn select_string(
+            co: &GenCo,
+            attrs: &NixAttrs,
+            key: &str,
+        ) -> Result<Result<Option<String>, CatchableErrorKind>, ErrorKind> {
+            if let Some(attr) = attrs.select(key) {
+                match strong_coerce_to_string(co, attr.clone()).await? {
+                    Err(cek) => return Ok(Err(cek)),
+                    Ok(str) => return Ok(Ok(Some(str))),
+                }
+            }
+
+            Ok(Ok(None))
+        }
+
+        for (name, value) in input.clone().into_iter_sorted() {
+            let value = generators::request_force(&co, value).await;
+            if ignore_nulls && matches!(value, Value::Null) {
+                continue;
+            }
+
+            match strong_coerce_to_string(&co, value.clone()).await? {
+                Err(cek) => return Ok(Value::Catchable(cek)),
+                Ok(val_str) => {
+                    // handle_derivation_parameters tells us whether the
+                    // argument should be added to the environment; continue
+                    // to the next one otherwise
+                    match handle_derivation_parameters(
+                        &mut drv,
+                        &co,
+                        name.as_str(),
+                        &value,
+                        &val_str,
+                    )
+                    .await?
+                    {
+                        Err(cek) => return Ok(Value::Catchable(cek)),
+                        Ok(false) => continue,
+                        _ => (),
+                    }
+
+                    // Most of these are also added to the builder's environment in "raw" form.
+                    if drv
+                        .environment
+                        .insert(name.as_str().to_string(), val_str.into())
+                        .is_some()
+                    {
+                        return Err(Error::DuplicateEnvVar(name.as_str().to_string()).into());
+                    }
+                }
+            }
+        }
+
+        // Configure fixed-output derivations if required.
+        {
+            let output_hash = match select_string(&co, &input, "outputHash")
+                .await
+                .context("evaluating the `outputHash` parameter")?
+            {
+                Err(cek) => return Ok(Value::Catchable(cek)),
+                Ok(s) => s,
+            };
+            let output_hash_algo = match select_string(&co, &input, "outputHashAlgo")
+                .await
+                .context("evaluating the `outputHashAlgo` parameter")?
+            {
+                Err(cek) => return Ok(Value::Catchable(cek)),
+                Ok(s) => s,
+            };
+            let output_hash_mode = match select_string(&co, &input, "outputHashMode")
+                .await
+                .context("evaluating the `outputHashMode` parameter")?
+            {
+                Err(cek) => return Ok(Value::Catchable(cek)),
+                Ok(s) => s,
+            };
+            handle_fixed_output(&mut drv, output_hash, output_hash_algo, output_hash_mode)?;
+        }
+
+        // Scan references in relevant attributes to detect any build-references.
+        let references = {
+            let state = state.borrow();
+            if state.is_empty() {
+                // skip reference scanning, create an empty result
+                Default::default()
+            } else {
+                let mut refscan = state.reference_scanner();
+                drv.arguments.iter().for_each(|s| refscan.scan(s));
+                drv.environment.values().for_each(|s| refscan.scan(s));
+                refscan.scan(&drv.builder);
+                refscan.finalise()
+            }
+        };
+
+        // Each output name needs to exist in the environment, at this
+        // point initialised as an empty string because that is the
+        // way of Golang ;)
+        for output in drv.outputs.keys() {
+            if drv
+                .environment
+                .insert(output.to_string(), String::new().into())
+                .is_some()
+            {
+                emit_warning_kind(&co, WarningKind::ShadowedOutput(output.to_string())).await;
+            }
+        }
+
+        let mut known_paths = state.borrow_mut();
+        populate_inputs(&mut drv, &known_paths, references);
+
+        // At this point, derivation fields are fully populated from
+        // eval data structures.
+        drv.validate(false).map_err(Error::InvalidDerivation)?;
+
+        // Calculate the derivation_or_fod_hash for the current derivation.
+        // This one is still intermediate (so not added to known_paths)
+        let derivation_or_fod_hash_tmp =
+            drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv));
+
+        // Mutate the Derivation struct and set output paths
+        drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp)
+            .map_err(Error::InvalidDerivation)?;
+
+        let derivation_path = drv
+            .calculate_derivation_path(&name)
+            .map_err(Error::InvalidDerivation)?;
+
+        // recompute the hash derivation modulo and add to known_paths
+        let derivation_or_fod_hash_final =
+            drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv));
+
+        known_paths.add_hash_derivation_modulo(
+            derivation_path.to_absolute_path(),
+            &derivation_or_fod_hash_final,
+        );
+
+        // mark all the new paths as known
+        let output_names: Vec<String> = drv.outputs.keys().map(Clone::clone).collect();
+        known_paths.drv(derivation_path.to_absolute_path(), &output_names);
+
+        for (output_name, output) in &drv.outputs {
+            known_paths.output(
+                &output.path,
+                output_name,
+                derivation_path.to_absolute_path(),
+            );
+        }
+
+        let mut new_attrs: Vec<(String, String)> = drv
+            .outputs
+            .into_iter()
+            .map(|(name, output)| (name, output.path))
+            .collect();
+
+        new_attrs.push(("drvPath".to_string(), derivation_path.to_absolute_path()));
+
+        Ok(Value::Attrs(Box::new(NixAttrs::from_iter(
+            new_attrs.into_iter(),
+        ))))
+    }
+
+    #[builtin("toFile")]
+    async fn builtin_to_file(
+        state: Rc<RefCell<KnownPaths>>,
+        co: GenCo,
+        name: Value,
+        content: Value,
+    ) -> Result<Value, ErrorKind> {
+        let name = name
+            .to_str()
+            .context("evaluating the `name` parameter of builtins.toFile")?;
+        let content = content
+            .to_str()
+            .context("evaluating the `content` parameter of builtins.toFile")?;
+
+        let mut refscan = state.borrow().reference_scanner();
+        refscan.scan(content.as_str());
+        let refs = {
+            let paths = state.borrow();
+            refscan
+                .finalise()
+                .into_iter()
+                .map(|path| paths[&path].path.to_string())
+                .collect::<Vec<_>>()
+        };
+
+        // TODO: fail on derivation references (only "plain" is allowed here)
+
+        let path = nix_compat::store_path::build_text_path(name.as_str(), content.as_str(), refs)
+            .map_err(|_e| {
+                nix_compat::derivation::DerivationError::InvalidOutputName(
+                    name.as_str().to_string(),
+                )
+            })
+            .map_err(Error::InvalidDerivation)?
+            .to_absolute_path();
+
+        state.borrow_mut().plain(&path);
+
+        // TODO: actually persist the file in the store at that path ...
+
+        Ok(Value::String(path.into()))
+    }
+}
+
+pub use derivation_builtins::builtins as derivation_builtins;
+
+#[cfg(test)]
+mod tests {
+    use crate::known_paths::KnownPaths;
+    use nix_compat::store_path::hash_placeholder;
+    use std::{cell::RefCell, rc::Rc};
+    use test_case::test_case;
+    use tvix_eval::EvaluationResult;
+
+    /// evaluates a given nix expression and returns the result.
+    /// Takes care of setting up the evaluator so it knows about the
+    // `derivation` builtin.
+    fn eval(str: &str) -> EvaluationResult {
+        let mut eval = tvix_eval::Evaluation::new_impure(str, None);
+
+        let known_paths: Rc<RefCell<KnownPaths>> = Default::default();
+
+        eval.builtins
+            .extend(crate::derivation::derivation_builtins(known_paths));
+
+        // Add the actual `builtins.derivation` from compiled Nix code
+        eval.src_builtins
+            .push(("derivation", include_str!("derivation.nix")));
+
+        // run the evaluation itself.
+        eval.evaluate()
+    }
+
+    #[test]
+    fn derivation() {
+        let result = eval(
+            r#"(derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#,
+        );
+
+        assert!(result.errors.is_empty(), "expect evaluation to succeed");
+        let value = result.value.expect("must be some");
+
+        match value {
+            tvix_eval::Value::String(s) => {
+                assert_eq!(
+                    "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo",
+                    s.as_str()
+                );
+            }
+            _ => panic!("unexpected value type: {:?}", value),
+        }
+    }
+
+    /// a derivation with an empty name is an error.
+    #[test]
+    fn derivation_empty_name_fail() {
+        let result = eval(
+            r#"(derivation { name = ""; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#,
+        );
+
+        assert!(!result.errors.is_empty(), "expect evaluation to fail");
+    }
+
+    /// construct some calls to builtins.derivation and compare produced output
+    /// paths.
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256")]
+    #[test_case(r#"(builtins.derivation { name = "foo2"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/gi0p8vd635vpk1nq029cz3aa3jkhar5k-foo2"; "r:sha256 other name")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha1"; outputHash = "sha1-VUCRC+16gU5lcrLYHlPSUyx0Y/Q="; }).outPath"#, "/nix/store/p5sammmhpa84ama7ymkbgwwzrilva24x-foo"; "r:sha1")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "md5"; outputHash = "md5-07BzhNET7exJ6qYjitX/AA=="; }).outPath"#, "/nix/store/gmmxgpy1jrzs86r5y05wy6wiy2m15xgi-foo"; "r:md5")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha512"; outputHash = "sha512-DPkYCnZKuoY6Z7bXLwkYvBMcZ3JkLLLc5aNPCnAvlHDdwr8SXBIZixmVwjPDS0r9NGxUojNMNQqUilG26LTmtg=="; }).outPath"#, "/nix/store/lfi2bfyyap88y45mfdwi4j99gkaxaj19-foo"; "r:sha512")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "4374173a8cbe88de152b609f96f46e958bcf65762017474eec5a05ec2bd61530"; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 base16")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "0c0msqmyq1asxi74f5r0frjwz2wmdvs9d7v05caxx25yihx1fx23"; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 nixbase32")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 base64")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-fgIr3TyFGDAXP5+qoAaiMKDg/a1MlT6Fv/S/DaA24S8="; }).outPath"#, "/nix/store/xm1l9dx4zgycv9qdhcqqvji1z88z534b-foo"; "r:sha256 base64 nopad")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/q4pkwkxdib797fhk22p0k3g1q32jmxvf-foo"; "sha256")]
+    #[test_case(r#"(builtins.derivation { name = "foo2"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha256"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/znw17xlmx9r6gw8izjkqxkl6s28sza4l-foo2"; "sha256 other name")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha1"; outputHash = "sha1-VUCRC+16gU5lcrLYHlPSUyx0Y/Q="; }).outPath"#, "/nix/store/zgpnjjmga53d8srp8chh3m9fn7nnbdv6-foo"; "sha1")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "md5"; outputHash = "md5-07BzhNET7exJ6qYjitX/AA=="; }).outPath"#, "/nix/store/jfhcwnq1852ccy9ad9nakybp2wadngnd-foo"; "md5")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "flat"; outputHashAlgo = "sha512"; outputHash = "sha512-DPkYCnZKuoY6Z7bXLwkYvBMcZ3JkLLLc5aNPCnAvlHDdwr8SXBIZixmVwjPDS0r9NGxUojNMNQqUilG26LTmtg=="; }).outPath"#, "/nix/store/as736rr116ian9qzg457f96j52ki8bm3-foo"; "sha512")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/17wgs52s7kcamcyin4ja58njkf91ipq8-foo"; "r:sha256 outputHashAlgo omitted")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#, "/nix/store/q4pkwkxdib797fhk22p0k3g1q32jmxvf-foo"; "r:sha256 outputHashAlgo and outputHashMode omitted")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; }).outPath"#, "/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo"; "outputHash* omitted")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; outputs = ["foo" "bar"]; system = "x86_64-linux"; }).outPath"#, "/nix/store/hkwdinvz2jpzgnjy9lv34d2zxvclj4s3-foo-foo"; "multiple outputs")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; args = ["--foo" "42" "--bar"]; system = "x86_64-linux"; }).outPath"#, "/nix/store/365gi78n2z7vwc1bvgb98k0a9cqfp6as-foo"; "args")]
+    #[test_case(r#"
+                   let
+                     bar = builtins.derivation {
+                       name = "bar";
+                       builder = ":";
+                       system = ":";
+                       outputHash = "08813cbee9903c62be4c5027726a418a300da4500b2d369d3af9286f4815ceba";
+                       outputHashAlgo = "sha256";
+                       outputHashMode = "recursive";
+                     };
+                   in
+                   (builtins.derivation {
+                     name = "foo";
+                     builder = ":";
+                     system = ":";
+                     inherit bar;
+                   }).outPath
+        "#, "/nix/store/5vyvcwah9l9kf07d52rcgdk70g2f4y13-foo"; "full")]
+    fn test_outpath(code: &str, expected_path: &str) {
+        let value = eval(code).value.expect("must succeed");
+
+        match value {
+            tvix_eval::Value::String(s) => {
+                assert_eq!(expected_path, s.as_str());
+            }
+            _ => panic!("unexpected value type: {:?}", value),
+        }
+    }
+
+    /// construct some calls to builtins.derivation that should be rejected
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha256"; outputHash = "sha256-00"; }).outPath"#; "invalid outputhash")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux"; outputHashMode = "recursive"; outputHashAlgo = "sha1"; outputHash = "sha256-Q3QXOoy+iN4VK2CflvRulYvPZXYgF0dO7FoF7CvWFTA="; }).outPath"#; "sha1 and sha256")]
+    #[test_case(r#"(builtins.derivation { name = "foo"; builder = "/bin/sh"; outputs = ["foo" "foo"]; system = "x86_64-linux"; }).outPath"#; "duplicate output names")]
+    fn test_outpath_invalid(code: &str) {
+        let resp = eval(code);
+        assert!(resp.value.is_none(), "Value should be None");
+        assert!(
+            !resp.errors.is_empty(),
+            "There should have been some errors"
+        );
+    }
+
+    #[test]
+    fn builtins_placeholder_hashes() {
+        assert_eq!(
+            hash_placeholder("out").as_str(),
+            "/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9"
+        );
+
+        assert_eq!(
+            hash_placeholder("").as_str(),
+            "/171rf4jhx57xqz3p7swniwkig249cif71pa08p80mgaf0mqz5bmr"
+        );
+    }
+}
diff --git a/tvix/glue/src/errors.rs b/tvix/glue/src/errors.rs
new file mode 100644
index 000000000000..5cbddcbba811
--- /dev/null
+++ b/tvix/glue/src/errors.rs
@@ -0,0 +1,26 @@
+use nix_compat::{derivation::DerivationError, nixhash};
+use std::rc::Rc;
+use thiserror::Error;
+
+/// Errors related to derivation construction
+#[derive(Debug, Error)]
+pub enum Error {
+    #[error("an output with the name '{0}' is already defined")]
+    DuplicateOutput(String),
+    #[error("fixed-output derivations can only have the default `out`-output")]
+    ConflictingOutputTypes,
+    #[error("the environment variable '{0}' has already been set in this derivation")]
+    DuplicateEnvVar(String),
+    #[error("invalid derivation parameters: {0}")]
+    InvalidDerivation(DerivationError),
+    #[error("invalid output hash: {0}")]
+    InvalidOutputHash(nixhash::Error),
+    #[error("invalid output hash mode: '{0}', only 'recursive' and 'flat` are supported")]
+    InvalidOutputHashMode(String),
+}
+
+impl From<Error> for tvix_eval::ErrorKind {
+    fn from(err: Error) -> Self {
+        tvix_eval::ErrorKind::TvixError(Rc::new(err))
+    }
+}
diff --git a/tvix/glue/src/fetchurl.nix b/tvix/glue/src/fetchurl.nix
new file mode 100644
index 000000000000..3f182a5a319b
--- /dev/null
+++ b/tvix/glue/src/fetchurl.nix
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: LGPL-2.1
+#
+# This file is vendored from C++ Nix, as it needs to be bundled with
+# an evaluator to be able to evaluate nixpkgs.
+#
+# Source: https://github.com/NixOS/nix/blob/2.3.16/corepkgs/fetchurl.nix
+
+{ system ? "" # obsolete
+, url
+, hash ? "" # an SRI hash
+
+  # Legacy hash specification
+, md5 ? ""
+, sha1 ? ""
+, sha256 ? ""
+, sha512 ? ""
+, outputHash ? if hash != "" then hash else if sha512 != "" then sha512 else if sha1 != "" then sha1 else if md5 != "" then md5 else sha256
+, outputHashAlgo ? if hash != "" then "" else if sha512 != "" then "sha512" else if sha1 != "" then "sha1" else if md5 != "" then "md5" else "sha256"
+
+, executable ? false
+, unpack ? false
+, name ? baseNameOf (toString url)
+}:
+
+derivation {
+  builder = "builtin:fetchurl";
+
+  # New-style output content requirements.
+  inherit outputHashAlgo outputHash;
+  outputHashMode = if unpack || executable then "recursive" else "flat";
+
+  inherit name url executable unpack;
+
+  system = "builtin";
+
+  # No need to double the amount of network traffic
+  preferLocalBuild = true;
+
+  impureEnvVars = [
+    # We borrow these environment variables from the caller to allow
+    # easy proxy configuration.  This is impure, but a fixed-output
+    # derivation like fetchurl is allowed to do so since its result is
+    # by definition pure.
+    "http_proxy"
+    "https_proxy"
+    "ftp_proxy"
+    "all_proxy"
+    "no_proxy"
+  ];
+
+  # To make "nix-prefetch-url" work.
+  urls = [ url ];
+}
diff --git a/tvix/glue/src/known_paths.rs b/tvix/glue/src/known_paths.rs
new file mode 100644
index 000000000000..07373ef0da7a
--- /dev/null
+++ b/tvix/glue/src/known_paths.rs
@@ -0,0 +1,186 @@
+//! This module implements logic required for persisting known paths
+//! during an evaluation.
+//!
+//! Tvix needs to be able to keep track of each Nix store path that it
+//! knows about during the scope of a single evaluation and its
+//! related builds.
+//!
+//! This data is required to scan derivation inputs for the build
+//! references (the "build closure") that they make use of.
+//!
+//! Please see //tvix/eval/docs/build-references.md for more
+//! information.
+
+use crate::refscan::{ReferenceScanner, STORE_PATH_LEN};
+use nix_compat::nixhash::NixHash;
+use std::{
+    collections::{hash_map, BTreeSet, HashMap},
+    ops::Index,
+};
+
+#[derive(Debug, PartialEq)]
+pub enum PathKind {
+    /// A literal derivation (`.drv`-file), and the *names* of its outputs.
+    Derivation { output_names: BTreeSet<String> },
+
+    /// An output of a derivation, its name, and the path of its derivation.
+    Output { name: String, derivation: String },
+
+    /// A plain store path (e.g. source files copied to the store).
+    Plain,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct KnownPath {
+    pub path: String,
+    pub kind: PathKind,
+}
+
+impl KnownPath {
+    fn new(path: String, kind: PathKind) -> Self {
+        KnownPath { path, kind }
+    }
+}
+
+/// Internal struct to prevent accidental leaks of the truncated path
+/// names.
+#[repr(transparent)]
+#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Ord, Eq, Hash)]
+pub struct PathName(String);
+
+impl From<&str> for PathName {
+    fn from(s: &str) -> Self {
+        PathName(s[..STORE_PATH_LEN].to_string())
+    }
+}
+
+/// This instance is required to pass PathName instances as needles to
+/// the reference scanner.
+impl AsRef<[u8]> for PathName {
+    fn as_ref(&self) -> &[u8] {
+        self.0.as_ref()
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct KnownPaths {
+    /// All known paths, keyed by a truncated version of their store
+    /// path used for reference scanning.
+    paths: HashMap<PathName, KnownPath>,
+
+    /// All known derivation or FOD hashes.
+    ///
+    /// Keys are derivation paths, values is the NixHash.
+    derivation_or_fod_hashes: HashMap<String, NixHash>,
+}
+
+impl Index<&PathName> for KnownPaths {
+    type Output = KnownPath;
+
+    fn index(&self, index: &PathName) -> &Self::Output {
+        &self.paths[index]
+    }
+}
+
+impl KnownPaths {
+    fn insert_path(&mut self, path: String, path_kind: PathKind) {
+        match self.paths.entry(path.as_str().into()) {
+            hash_map::Entry::Vacant(entry) => {
+                entry.insert(KnownPath::new(path, path_kind));
+            }
+
+            hash_map::Entry::Occupied(mut entry) => {
+                match (path_kind, &mut entry.get_mut().kind) {
+                    // These variant combinations require no "merging action".
+                    (PathKind::Plain, PathKind::Plain) => (),
+                    (PathKind::Output { .. }, PathKind::Output { .. }) => (),
+
+                    (
+                        PathKind::Derivation { output_names: new },
+                        PathKind::Derivation {
+                            output_names: ref mut old,
+                        },
+                    ) => {
+                        old.extend(new);
+                    }
+
+                    _ => panic!(
+                        "path '{}' inserted twice with different types",
+                        entry.key().0
+                    ),
+                };
+            }
+        };
+    }
+
+    /// Mark a plain path as known.
+    pub fn plain<S: ToString>(&mut self, path: S) {
+        self.insert_path(path.to_string(), PathKind::Plain);
+    }
+
+    /// Mark a derivation as known.
+    pub fn drv<P: ToString, O: ToString>(&mut self, path: P, outputs: &[O]) {
+        self.insert_path(
+            path.to_string(),
+            PathKind::Derivation {
+                output_names: outputs.iter().map(ToString::to_string).collect(),
+            },
+        );
+    }
+
+    /// Mark a derivation output path as known.
+    pub fn output<P: ToString, N: ToString, D: ToString>(
+        &mut self,
+        output_path: P,
+        name: N,
+        drv_path: D,
+    ) {
+        self.insert_path(
+            output_path.to_string(),
+            PathKind::Output {
+                name: name.to_string(),
+                derivation: drv_path.to_string(),
+            },
+        );
+    }
+
+    /// Checks whether there are any known paths. If not, a reference
+    /// scanner can not be created.
+    pub fn is_empty(&self) -> bool {
+        self.paths.is_empty()
+    }
+
+    /// Create a reference scanner from the current set of known paths.
+    pub fn reference_scanner(&self) -> ReferenceScanner<PathName> {
+        let candidates = self.paths.keys().map(Clone::clone).collect();
+        ReferenceScanner::new(candidates)
+    }
+
+    /// Fetch the opaque "hash derivation modulo" for a given derivation path.
+    pub fn get_hash_derivation_modulo(&self, drv_path: &str) -> NixHash {
+        // TODO: we rely on an invariant that things *should* have
+        // been calculated if we get this far.
+        self.derivation_or_fod_hashes[drv_path].clone()
+    }
+
+    pub fn add_hash_derivation_modulo<D: ToString>(
+        &mut self,
+        drv: D,
+        hash_derivation_modulo: &NixHash,
+    ) {
+        #[allow(unused_variables)] // assertions on this only compiled in debug builds
+        let old = self
+            .derivation_or_fod_hashes
+            .insert(drv.to_string(), hash_derivation_modulo.to_owned());
+
+        #[cfg(debug_assertions)]
+        {
+            if let Some(old) = old {
+                debug_assert!(
+                    old == *hash_derivation_modulo,
+                    "hash derivation modulo for a given derivation should always be calculated the same"
+                );
+            }
+        }
+    }
+}
diff --git a/tvix/glue/src/lib.rs b/tvix/glue/src/lib.rs
new file mode 100644
index 000000000000..acb81d31445f
--- /dev/null
+++ b/tvix/glue/src/lib.rs
@@ -0,0 +1,28 @@
+use std::{cell::RefCell, rc::Rc};
+
+use known_paths::KnownPaths;
+
+pub mod derivation;
+pub mod errors;
+pub mod known_paths;
+pub mod refscan;
+pub mod tvix_io;
+pub mod tvix_store_io;
+
+/// Adds derivation-related builtins to the passed [tvix_eval::Evaluation].
+///
+/// These are `derivation` and `derivationStrict`.
+///
+/// As they need to interact with `known_paths`, we also need to pass in
+/// `known_paths`.
+pub fn add_derivation_builtins(
+    eval: &mut tvix_eval::Evaluation,
+    known_paths: Rc<RefCell<KnownPaths>>,
+) {
+    eval.builtins
+        .extend(derivation::derivation_builtins(known_paths));
+
+    // Add the actual `builtins.derivation` from compiled Nix code
+    eval.src_builtins
+        .push(("derivation", include_str!("derivation.nix")));
+}
diff --git a/tvix/glue/src/refscan.rs b/tvix/glue/src/refscan.rs
new file mode 100644
index 000000000000..0e0bb6c77828
--- /dev/null
+++ b/tvix/glue/src/refscan.rs
@@ -0,0 +1,115 @@
+//! Simple scanner for non-overlapping, known references of Nix store paths in a
+//! given string.
+//!
+//! This is used for determining build references (see
+//! //tvix/eval/docs/build-references.md for more details).
+//!
+//! The scanner itself is using the Wu-Manber string-matching algorithm, using
+//! our fork of the `wu-mamber` crate.
+
+use std::collections::BTreeSet;
+use wu_manber::TwoByteWM;
+
+pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len();
+
+/// Represents a "primed" reference scanner with an automaton that knows the set
+/// of store paths to scan for.
+pub struct ReferenceScanner<P: Ord + AsRef<[u8]>> {
+    candidates: Vec<P>,
+    searcher: Option<TwoByteWM>,
+    matches: Vec<usize>,
+}
+
+impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> {
+    /// Construct a new `ReferenceScanner` that knows how to scan for the given
+    /// candidate store paths.
+    pub fn new(candidates: Vec<P>) -> Self {
+        let searcher = if candidates.is_empty() {
+            None
+        } else {
+            Some(TwoByteWM::new(&candidates))
+        };
+
+        ReferenceScanner {
+            searcher,
+            candidates,
+            matches: Default::default(),
+        }
+    }
+
+    /// Scan the given str for all non-overlapping matches and collect them
+    /// in the scanner.
+    pub fn scan<S: AsRef<[u8]>>(&mut self, haystack: S) {
+        if haystack.as_ref().len() < STORE_PATH_LEN {
+            return;
+        }
+
+        if let Some(searcher) = &self.searcher {
+            for m in searcher.find(haystack) {
+                self.matches.push(m.pat_idx);
+            }
+        }
+    }
+
+    /// Finalise the reference scanner and return the resulting matches.
+    pub fn finalise(self) -> BTreeSet<P> {
+        self.matches
+            .into_iter()
+            .map(|idx| self.candidates[idx].clone())
+            .collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // The actual derivation of `nixpkgs.hello`.
+    const HELLO_DRV: &str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#;
+
+    #[test]
+    fn test_no_patterns() {
+        let mut scanner: ReferenceScanner<String> = ReferenceScanner::new(vec![]);
+
+        scanner.scan(HELLO_DRV);
+
+        let result = scanner.finalise();
+
+        assert_eq!(result.len(), 0);
+    }
+
+    #[test]
+    fn test_single_match() {
+        let mut scanner = ReferenceScanner::new(vec![
+            "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(),
+        ]);
+        scanner.scan(HELLO_DRV);
+
+        let result = scanner.finalise();
+
+        assert_eq!(result.len(), 1);
+        assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16"));
+    }
+
+    #[test]
+    fn test_multiple_matches() {
+        let candidates = vec![
+            // these exist in the drv:
+            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(),
+            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(),
+            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(),
+            // this doesn't:
+            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(),
+        ];
+
+        let mut scanner = ReferenceScanner::new(candidates.clone());
+        scanner.scan(HELLO_DRV);
+
+        let result = scanner.finalise();
+        assert_eq!(result.len(), 3);
+
+        for c in candidates[..3].iter() {
+            assert!(result.contains(c));
+        }
+    }
+}
diff --git a/tvix/glue/src/tvix_io.rs b/tvix/glue/src/tvix_io.rs
new file mode 100644
index 000000000000..caadbeb5e663
--- /dev/null
+++ b/tvix/glue/src/tvix_io.rs
@@ -0,0 +1,80 @@
+//! This module implements a wrapper around tvix-eval's [EvalIO] type,
+//! adding functionality which is required by tvix-cli:
+//!
+//! 1. Marking plain paths known to the reference scanner.
+//! 2. Handling the C++ Nix `__corepkgs__`-hack for nixpkgs bootstrapping.
+//!
+//! All uses of [EvalIO] in tvix-cli must make use of this wrapper,
+//! otherwise fundamental features like nixpkgs bootstrapping and hash
+//! calculation will not work.
+
+use std::cell::RefCell;
+use std::io;
+use std::path::{Path, PathBuf};
+use std::rc::Rc;
+use tvix_eval::{EvalIO, FileType};
+
+use crate::known_paths::KnownPaths;
+
+// TODO: Merge this together with TvixStoreIO?
+pub struct TvixIO<T: EvalIO> {
+    /// Ingested paths must be reported to this known paths tracker
+    /// for accurate build reference scanning.
+    known_paths: Rc<RefCell<KnownPaths>>,
+
+    // Actual underlying [EvalIO] implementation.
+    actual: T,
+}
+
+impl<T: EvalIO> TvixIO<T> {
+    pub fn new(known_paths: Rc<RefCell<KnownPaths>>, actual: T) -> Self {
+        Self {
+            known_paths,
+            actual,
+        }
+    }
+}
+
+impl<T: EvalIO> EvalIO for TvixIO<T> {
+    fn store_dir(&self) -> Option<String> {
+        self.actual.store_dir()
+    }
+
+    fn import_path(&self, path: &Path) -> Result<PathBuf, io::Error> {
+        let imported_path = self.actual.import_path(path)?;
+        self.known_paths
+            .borrow_mut()
+            .plain(imported_path.to_string_lossy());
+
+        Ok(imported_path)
+    }
+
+    fn path_exists(&self, path: &Path) -> Result<bool, io::Error> {
+        if path.starts_with("/__corepkgs__") {
+            return Ok(true);
+        }
+
+        self.actual.path_exists(path)
+    }
+
+    fn read_to_string(&self, path: &Path) -> Result<String, io::Error> {
+        // Bundled version of corepkgs/fetchurl.nix. The counterpart
+        // of this happens in `main`, where the `nix_path` of the
+        // evaluation has `nix=/__corepkgs__` added to it.
+        //
+        // This workaround is similar to what cppnix does for passing
+        // the path through.
+        //
+        // TODO: this comparison is bad and allocates, we should use
+        // the sane path library.
+        if path.starts_with("/__corepkgs__/fetchurl.nix") {
+            return Ok(include_str!("fetchurl.nix").to_string());
+        }
+
+        self.actual.read_to_string(path)
+    }
+
+    fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> {
+        self.actual.read_dir(path)
+    }
+}
diff --git a/tvix/glue/src/tvix_store_io.rs b/tvix/glue/src/tvix_store_io.rs
new file mode 100644
index 000000000000..9be896ffc40f
--- /dev/null
+++ b/tvix/glue/src/tvix_store_io.rs
@@ -0,0 +1,359 @@
+//! This module provides an implementation of EvalIO talking to tvix-store.
+
+use nix_compat::store_path::{self, StorePath};
+use std::{io, path::Path, path::PathBuf, sync::Arc};
+use tokio::io::AsyncReadExt;
+use tracing::{error, instrument, warn};
+use tvix_eval::{EvalIO, FileType, StdIO};
+
+use tvix_castore::{
+    blobservice::BlobService,
+    directoryservice::{self, DirectoryService},
+    import,
+    proto::{node::Node, NamedNode},
+    B3Digest,
+};
+use tvix_store::{
+    nar::calculate_size_and_sha256,
+    pathinfoservice::PathInfoService,
+    proto::{NarInfo, PathInfo},
+};
+
+/// Implements [EvalIO], asking given [PathInfoService], [DirectoryService]
+/// and [BlobService].
+///
+/// In case the given path does not exist in these stores, we ask StdIO.
+/// This is to both cover cases of syntactically valid store paths, that exist
+/// on the filesystem (still managed by Nix), as well as being able to read
+/// files outside store paths.
+pub struct TvixStoreIO {
+    blob_service: Arc<dyn BlobService>,
+    directory_service: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+    std_io: StdIO,
+    tokio_handle: tokio::runtime::Handle,
+}
+
+impl TvixStoreIO {
+    pub fn new(
+        blob_service: Arc<dyn BlobService>,
+        directory_service: Arc<dyn DirectoryService>,
+        path_info_service: Arc<dyn PathInfoService>,
+        tokio_handle: tokio::runtime::Handle,
+    ) -> Self {
+        Self {
+            blob_service,
+            directory_service,
+            path_info_service,
+            std_io: StdIO {},
+            tokio_handle,
+        }
+    }
+
+    /// for a given [StorePath] and additional [Path] inside the store path,
+    /// look up the [PathInfo], and if it exists, and then use
+    /// [directoryservice::traverse_to] to return the
+    /// [Node] specified by `sub_path`.
+    #[instrument(skip(self), ret, err)]
+    fn store_path_to_root_node(
+        &self,
+        store_path: &StorePath,
+        sub_path: &Path,
+    ) -> Result<Option<Node>, io::Error> {
+        let path_info_service = self.path_info_service.clone();
+        let task = self.tokio_handle.spawn({
+            let digest = *store_path.digest();
+            async move { path_info_service.get(digest).await }
+        });
+        let path_info = match self.tokio_handle.block_on(task).unwrap()? {
+            // If there's no PathInfo found, early exit
+            None => return Ok(None),
+            Some(path_info) => path_info,
+        };
+
+        let root_node = {
+            match path_info.node {
+                None => {
+                    warn!(
+                        "returned PathInfo {:?} node is None, this shouldn't happen.",
+                        &path_info
+                    );
+                    return Ok(None);
+                }
+                Some(root_node) => match root_node.node {
+                    None => {
+                        warn!("node for {:?} is None, this shouldn't happen.", &root_node);
+                        return Ok(None);
+                    }
+                    Some(root_node) => root_node,
+                },
+            }
+        };
+
+        let directory_service = self.directory_service.clone();
+        let sub_path = sub_path.to_owned();
+        let task = self.tokio_handle.spawn(async move {
+            directoryservice::descend_to(directory_service, root_node, &sub_path).await
+        });
+
+        Ok(self.tokio_handle.block_on(task).unwrap()?)
+    }
+}
+
+impl EvalIO for TvixStoreIO {
+    #[instrument(skip(self), ret, err)]
+    fn path_exists(&self, path: &Path) -> Result<bool, io::Error> {
+        if let Ok((store_path, sub_path)) =
+            StorePath::from_absolute_path_full(&path.to_string_lossy())
+        {
+            if self
+                .store_path_to_root_node(&store_path, &sub_path)?
+                .is_some()
+            {
+                Ok(true)
+            } else {
+                // As tvix-store doesn't manage /nix/store on the filesystem,
+                // we still need to also ask self.std_io here.
+                self.std_io.path_exists(path)
+            }
+        } else {
+            // The store path is no store path, so do regular StdIO.
+            self.std_io.path_exists(path)
+        }
+    }
+
+    #[instrument(skip(self), ret, err)]
+    fn read_to_string(&self, path: &Path) -> Result<String, io::Error> {
+        if let Ok((store_path, sub_path)) =
+            StorePath::from_absolute_path_full(&path.to_string_lossy())
+        {
+            if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? {
+                // depending on the node type, treat read_to_string differently
+                match node {
+                    Node::Directory(_) => {
+                        // This would normally be a io::ErrorKind::IsADirectory (still unstable)
+                        Err(io::Error::new(
+                            io::ErrorKind::Unsupported,
+                            format!("tried to read directory at {:?} to string", path),
+                        ))
+                    }
+                    Node::File(file_node) => {
+                        let digest: B3Digest =
+                            file_node.digest.clone().try_into().map_err(|_e| {
+                                error!(
+                                    file_node = ?file_node,
+                                    "invalid digest"
+                                );
+                                io::Error::new(
+                                    io::ErrorKind::InvalidData,
+                                    format!("invalid digest length in file node: {:?}", file_node),
+                                )
+                            })?;
+
+                        let blob_service = self.blob_service.clone();
+
+                        let task = self.tokio_handle.spawn(async move {
+                            let mut reader = {
+                                let resp = blob_service.open_read(&digest).await?;
+                                match resp {
+                                    Some(blob_reader) => blob_reader,
+                                    None => {
+                                        error!(
+                                            blob.digest = %digest,
+                                            "blob not found",
+                                        );
+                                        Err(io::Error::new(
+                                            io::ErrorKind::NotFound,
+                                            format!("blob {} not found", &digest),
+                                        ))?
+                                    }
+                                }
+                            };
+
+                            let mut buf = String::new();
+
+                            reader.read_to_string(&mut buf).await?;
+                            Ok(buf)
+                        });
+
+                        self.tokio_handle.block_on(task).unwrap()
+                    }
+                    Node::Symlink(_symlink_node) => Err(io::Error::new(
+                        io::ErrorKind::Unsupported,
+                        "read_to_string for symlinks is unsupported",
+                    ))?,
+                }
+            } else {
+                // As tvix-store doesn't manage /nix/store on the filesystem,
+                // we still need to also ask self.std_io here.
+                self.std_io.read_to_string(path)
+            }
+        } else {
+            // The store path is no store path, so do regular StdIO.
+            self.std_io.read_to_string(path)
+        }
+    }
+
+    #[instrument(skip(self), ret, err)]
+    fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> {
+        if let Ok((store_path, sub_path)) =
+            StorePath::from_absolute_path_full(&path.to_string_lossy())
+        {
+            if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? {
+                match node {
+                    Node::Directory(directory_node) => {
+                        // fetch the Directory itself.
+                        let digest: B3Digest =
+                            directory_node.digest.clone().try_into().map_err(|_e| {
+                                io::Error::new(
+                                    io::ErrorKind::InvalidData,
+                                    format!(
+                                        "invalid digest length in directory node: {:?}",
+                                        directory_node
+                                    ),
+                                )
+                            })?;
+
+                        let directory_service = self.directory_service.clone();
+                        let digest_clone = digest.clone();
+                        let task = self
+                            .tokio_handle
+                            .spawn(async move { directory_service.get(&digest_clone).await });
+                        if let Some(directory) = self.tokio_handle.block_on(task).unwrap()? {
+                            let mut children: Vec<(bytes::Bytes, FileType)> = Vec::new();
+                            for node in directory.nodes() {
+                                children.push(match node {
+                                    Node::Directory(e) => (e.name, FileType::Directory),
+                                    Node::File(e) => (e.name, FileType::Regular),
+                                    Node::Symlink(e) => (e.name, FileType::Symlink),
+                                })
+                            }
+                            Ok(children)
+                        } else {
+                            // If we didn't get the directory node that's linked, that's a store inconsistency!
+                            error!(
+                                directory.digest = %digest,
+                                path = ?path,
+                                "directory not found",
+                            );
+                            Err(io::Error::new(
+                                io::ErrorKind::NotFound,
+                                format!("directory {digest} does not exist"),
+                            ))?
+                        }
+                    }
+                    Node::File(_file_node) => {
+                        // This would normally be a io::ErrorKind::NotADirectory (still unstable)
+                        Err(io::Error::new(
+                            io::ErrorKind::Unsupported,
+                            "tried to readdir path {:?}, which is a file",
+                        ))?
+                    }
+                    Node::Symlink(_symlink_node) => Err(io::Error::new(
+                        io::ErrorKind::Unsupported,
+                        "read_dir for symlinks is unsupported",
+                    ))?,
+                }
+            } else {
+                self.std_io.read_dir(path)
+            }
+        } else {
+            self.std_io.read_dir(path)
+        }
+    }
+
+    #[instrument(skip(self), ret, err)]
+    fn import_path(&self, path: &std::path::Path) -> Result<PathBuf, std::io::Error> {
+        let p = path.to_owned();
+        let blob_service = self.blob_service.clone();
+        let directory_service = self.directory_service.clone();
+        let path_info_service = self.path_info_service.clone();
+
+        let task = self.tokio_handle.spawn(async move {
+            import_path_with_pathinfo(blob_service, directory_service, path_info_service, &p).await
+        });
+
+        let path_info = self.tokio_handle.block_on(task).unwrap()?;
+
+        // from the [PathInfo], extract the store path (as string).
+        Ok({
+            let mut path = PathBuf::from(nix_compat::store_path::STORE_DIR_WITH_SLASH);
+
+            let root_node_name = path_info.node.unwrap().node.unwrap().get_name().to_vec();
+
+            // This must be a string, otherwise it would have failed validation.
+            let root_node_name = String::from_utf8(root_node_name).unwrap();
+
+            // append to the PathBuf
+            path.push(root_node_name);
+
+            // and return it
+            path
+        })
+    }
+
+    #[instrument(skip(self), ret)]
+    fn store_dir(&self) -> Option<String> {
+        Some("/nix/store".to_string())
+    }
+}
+
+/// Imports a given path on the filesystem into the store, and returns the
+/// [PathInfo] describing the path, that was sent to
+/// [PathInfoService].
+#[instrument(skip(blob_service, directory_service, path_info_service), ret, err)]
+async fn import_path_with_pathinfo(
+    blob_service: Arc<dyn BlobService>,
+    directory_service: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+    path: &std::path::Path,
+) -> Result<PathInfo, io::Error> {
+    // Call [import::ingest_path], which will walk over the given path and return a root_node.
+    let root_node = import::ingest_path(blob_service.clone(), directory_service.clone(), path)
+        .await
+        .expect("error during import_path");
+
+    // Render the NAR.
+    let (nar_size, nar_sha256) =
+        calculate_size_and_sha256(&root_node, blob_service.clone(), directory_service.clone())
+            .await
+            .expect("error during nar calculation"); // TODO: handle error
+
+    // TODO: make a path_to_name helper function?
+    let name = path
+        .file_name()
+        .expect("path must not be ..")
+        .to_str()
+        .expect("path must be valid unicode");
+
+    let output_path = store_path::build_nar_based_store_path(&nar_sha256, name);
+
+    // assemble a new root_node with a name that is derived from the nar hash.
+    let root_node = root_node.rename(output_path.to_string().into_bytes().into());
+
+    // assemble the [PathInfo] object.
+    let path_info = PathInfo {
+        node: Some(tvix_castore::proto::Node {
+            node: Some(root_node),
+        }),
+        // There's no reference scanning on path contents ingested like this.
+        references: vec![],
+        narinfo: Some(NarInfo {
+            nar_size,
+            nar_sha256: nar_sha256.to_vec().into(),
+            signatures: vec![],
+            reference_names: vec![],
+            deriver: None,
+            ca: Some(tvix_store::proto::nar_info::Ca {
+                r#type: tvix_store::proto::nar_info::ca::Hash::NarSha256.into(),
+                digest: nar_sha256.to_vec().into(),
+            }),
+        }),
+    };
+
+    // put into [PathInfoService], and return the [PathInfo] that we get
+    // back from there (it might contain additional signatures).
+    let path_info = path_info_service.put(path_info).await?;
+
+    Ok(path_info)
+}