about summary refs log tree commit diff
path: root/tvix/cli/src
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/cli/src')
-rw-r--r--tvix/cli/src/.skip-subtree1
-rw-r--r--tvix/cli/src/derivation.nix36
-rw-r--r--tvix/cli/src/derivation.rs783
-rw-r--r--tvix/cli/src/errors.rs26
-rw-r--r--tvix/cli/src/fetchurl.nix53
-rw-r--r--tvix/cli/src/known_paths.rs186
-rw-r--r--tvix/cli/src/main.rs287
-rw-r--r--tvix/cli/src/refscan.rs115
-rw-r--r--tvix/cli/src/tvix_io.rs79
-rw-r--r--tvix/cli/src/tvix_store_io.rs356
10 files changed, 1922 insertions, 0 deletions
diff --git a/tvix/cli/src/.skip-subtree b/tvix/cli/src/.skip-subtree
new file mode 100644
index 000000000000..a16a2afe1f1e
--- /dev/null
+++ b/tvix/cli/src/.skip-subtree
@@ -0,0 +1 @@
+Because of the derivation.nix file ...
diff --git a/tvix/cli/src/derivation.nix b/tvix/cli/src/derivation.nix
new file mode 100644
index 000000000000..9355cc3a96f0
--- /dev/null
+++ b/tvix/cli/src/derivation.nix
@@ -0,0 +1,36 @@
+# LGPL-2.1-or-later
+#
+# taken from: https://github.com/NixOS/nix/blob/master/src/libexpr/primops/derivation.nix
+#
+# TODO: rewrite in native Rust code
+
+/* This is the implementation of the ‘derivation’ builtin function.
+   It's actually a wrapper around the ‘derivationStrict’ primop. */
+
+drvAttrs @ { outputs ? [ "out" ], ... }:
+
+let
+
+  strict = derivationStrict drvAttrs;
+
+  commonAttrs = drvAttrs // (builtins.listToAttrs outputsList) //
+    {
+      all = map (x: x.value) outputsList;
+      inherit drvAttrs;
+    };
+
+  outputToAttrListElement = outputName:
+    {
+      name = outputName;
+      value = commonAttrs // {
+        outPath = builtins.getAttr outputName strict;
+        drvPath = strict.drvPath;
+        type = "derivation";
+        inherit outputName;
+      };
+    };
+
+  outputsList = map outputToAttrListElement outputs;
+
+in
+(builtins.head outputsList).value
diff --git a/tvix/cli/src/derivation.rs b/tvix/cli/src/derivation.rs
new file mode 100644
index 000000000000..76cc3f60f77e
--- /dev/null
+++ b/tvix/cli/src/derivation.rs
@@ -0,0 +1,783 @@
+//! Implements `builtins.derivation`, the core of what makes Nix build packages.
+use nix_compat::derivation::Derivation;
+use nix_compat::nixhash;
+use std::cell::RefCell;
+use std::collections::{btree_map, BTreeSet};
+use std::rc::Rc;
+use tvix_eval::builtin_macros::builtins;
+use tvix_eval::generators::{self, emit_warning_kind, GenCo};
+use tvix_eval::{
+    AddContext, CatchableErrorKind, CoercionKind, ErrorKind, NixAttrs, NixList, Value, WarningKind,
+};
+
+use crate::errors::Error;
+use crate::known_paths::{KnownPaths, PathKind, PathName};
+
+// Constants used for strangely named fields in derivation inputs.
+const STRUCTURED_ATTRS: &str = "__structuredAttrs";
+const IGNORE_NULLS: &str = "__ignoreNulls";
+
+/// Helper function for populating the `drv.outputs` field from a
+/// manually specified set of outputs, instead of the default
+/// `outputs`.
+async fn populate_outputs(
+    co: &GenCo,
+    drv: &mut Derivation,
+    outputs: NixList,
+) -> Result<(), ErrorKind> {
+    // Remove the original default `out` output.
+    drv.outputs.clear();
+
+    for output in outputs {
+        let output_name = generators::request_force(co, output)
+            .await
+            .to_str()
+            .context("determining output name")?;
+
+        if drv
+            .outputs
+            .insert(output_name.as_str().into(), Default::default())
+            .is_some()
+        {
+            return Err(Error::DuplicateOutput(output_name.as_str().into()).into());
+        }
+    }
+
+    Ok(())
+}
+
+/// Populate the inputs of a derivation from the build references
+/// found when scanning the derivation's parameters.
+fn populate_inputs<I: IntoIterator<Item = PathName>>(
+    drv: &mut Derivation,
+    known_paths: &KnownPaths,
+    references: I,
+) {
+    for reference in references.into_iter() {
+        let reference = &known_paths[&reference];
+        match &reference.kind {
+            PathKind::Plain => {
+                drv.input_sources.insert(reference.path.clone());
+            }
+
+            PathKind::Output { name, derivation } => {
+                match drv.input_derivations.entry(derivation.clone()) {
+                    btree_map::Entry::Vacant(entry) => {
+                        entry.insert(BTreeSet::from([name.clone()]));
+                    }
+
+                    btree_map::Entry::Occupied(mut entry) => {
+                        entry.get_mut().insert(name.clone());
+                    }
+                }
+            }
+
+            PathKind::Derivation { output_names } => {
+                match drv.input_derivations.entry(reference.path.clone()) {
+                    btree_map::Entry::Vacant(entry) => {
+                        entry.insert(output_names.clone());
+                    }
+
+                    btree_map::Entry::Occupied(mut entry) => {
+                        entry.get_mut().extend(output_names.clone().into_iter());
+                    }
+                }
+            }
+        }
+    }
+}
+
+/// Populate the output configuration of a derivation based on the
+/// parameters passed to the call, flipping the required
+/// parameters for a fixed-output derivation if necessary.
+///
+/// This function handles all possible combinations of the
+/// parameters, including invalid ones.
+///
+/// Due to the support for SRI hashes, and how these are passed along to
+/// builtins.derivation, outputHash and outputHashAlgo can have values which
+/// need to be further modified before constructing the Derivation struct.
+///
+/// If outputHashAlgo is an SRI hash, outputHashAlgo must either be an empty
+/// string, or the hash algorithm as specified in the (single) SRI (entry).
+/// SRI strings with multiple hash algorithms are not supported.
+///
+/// In case an SRI string was used, the (single) fixed output is populated
+/// with the hash algo name, and the hash digest is populated with the
+/// (lowercase) hex encoding of the digest.
+///
+/// These values are only rewritten for the outputs, not what's passed to env.
+fn populate_output_configuration(
+    drv: &mut Derivation,
+    hash: Option<String>,      // in nix: outputHash
+    hash_algo: Option<String>, // in nix: outputHashAlgo
+    hash_mode: Option<String>, // in nix: outputHashmode
+) -> Result<(), ErrorKind> {
+    // We only do something when `digest` and `algo` are `Some(_)``, and
+    // there's an `out` output.
+    if let (Some(hash), Some(algo), hash_mode) = (hash, hash_algo, hash_mode) {
+        match drv.outputs.get_mut("out") {
+            None => return Err(Error::ConflictingOutputTypes.into()),
+            Some(out) => {
+                // treat an empty algo as None
+                let a = if algo.is_empty() {
+                    None
+                } else {
+                    Some(algo.as_ref())
+                };
+
+                let output_hash = nixhash::from_str(&hash, a).map_err(Error::InvalidOutputHash)?;
+
+                // construct the NixHashWithMode.
+                out.hash_with_mode = match hash_mode.as_deref() {
+                    None | Some("flat") => Some(nixhash::NixHashWithMode::Flat(output_hash)),
+                    Some("recursive") => Some(nixhash::NixHashWithMode::Recursive(output_hash)),
+                    Some(other) => {
+                        return Err(Error::InvalidOutputHashMode(other.to_string()).into())
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Handles derivation parameters which are not just forwarded to
+/// the environment. The return value indicates whether the
+/// parameter should be included in the environment.
+async fn handle_derivation_parameters(
+    drv: &mut Derivation,
+    co: &GenCo,
+    name: &str,
+    value: &Value,
+    val_str: &str,
+) -> Result<Result<bool, CatchableErrorKind>, ErrorKind> {
+    match name {
+        IGNORE_NULLS => return Ok(Ok(false)),
+
+        // Command line arguments to the builder.
+        "args" => {
+            let args = value.to_list()?;
+            for arg in args {
+                match strong_coerce_to_string(co, arg).await? {
+                    Err(cek) => return Ok(Err(cek)),
+                    Ok(s) => drv.arguments.push(s),
+                }
+            }
+
+            // The arguments do not appear in the environment.
+            return Ok(Ok(false));
+        }
+
+        // Explicitly specified drv outputs (instead of default [ "out" ])
+        "outputs" => {
+            let outputs = value
+                .to_list()
+                .context("looking at the `outputs` parameter of the derivation")?;
+
+            populate_outputs(co, drv, outputs).await?;
+        }
+
+        "builder" => {
+            drv.builder = val_str.to_string();
+        }
+
+        "system" => {
+            drv.system = val_str.to_string();
+        }
+
+        _ => {}
+    }
+
+    Ok(Ok(true))
+}
+
+async fn strong_coerce_to_string(
+    co: &GenCo,
+    val: Value,
+) -> Result<Result<String, CatchableErrorKind>, ErrorKind> {
+    let val = generators::request_force(co, val).await;
+    match generators::request_string_coerce(co, val, CoercionKind::Strong).await {
+        Err(cek) => Ok(Err(cek)),
+        Ok(val_str) => Ok(Ok(val_str.as_str().to_string())),
+    }
+}
+
+#[builtins(state = "Rc<RefCell<KnownPaths>>")]
+mod derivation_builtins {
+    use super::*;
+    use nix_compat::store_path::hash_placeholder;
+    use tvix_eval::generators::Gen;
+
+    #[builtin("placeholder")]
+    async fn builtin_placeholder(co: GenCo, input: Value) -> Result<Value, ErrorKind> {
+        let placeholder = hash_placeholder(
+            input
+                .to_str()
+                .context("looking at output name in builtins.placeholder")?
+                .as_str(),
+        );
+
+        Ok(placeholder.into())
+    }
+
+    /// Strictly construct a Nix derivation from the supplied arguments.
+    ///
+    /// This is considered an internal function, users usually want to
+    /// use the higher-level `builtins.derivation` instead.
+    #[builtin("derivationStrict")]
+    async fn builtin_derivation_strict(
+        state: Rc<RefCell<KnownPaths>>,
+        co: GenCo,
+        input: Value,
+    ) -> Result<Value, ErrorKind> {
+        let input = input.to_attrs()?;
+        let name = generators::request_force(&co, input.select_required("name")?.clone())
+            .await
+            .to_str()
+            .context("determining derivation name")?;
+
+        if name.is_empty() {
+            return Err(ErrorKind::Abort("derivation has empty name".to_string()));
+        }
+
+        // Check whether attributes should be passed as a JSON file.
+        // TODO: the JSON serialisation has to happen here.
+        if let Some(sa) = input.select(STRUCTURED_ATTRS) {
+            if generators::request_force(&co, sa.clone()).await.as_bool()? {
+                return Err(ErrorKind::NotImplemented(STRUCTURED_ATTRS));
+            }
+        }
+
+        // Check whether null attributes should be ignored or passed through.
+        let ignore_nulls = match input.select(IGNORE_NULLS) {
+            Some(b) => generators::request_force(&co, b.clone()).await.as_bool()?,
+            None => false,
+        };
+
+        let mut drv = Derivation::default();
+        drv.outputs.insert("out".to_string(), Default::default());
+
+        // Configure fixed-output derivations if required.
+
+        async fn select_string(
+            co: &GenCo,
+            attrs: &NixAttrs,
+            key: &str,
+        ) -> Result<Result<Option<String>, CatchableErrorKind>, ErrorKind> {
+            if let Some(attr) = attrs.select(key) {
+                match strong_coerce_to_string(co, attr.clone()).await? {
+                    Err(cek) => return Ok(Err(cek)),
+                    Ok(str) => return Ok(Ok(Some(str))),
+                }
+            }
+
+            Ok(Ok(None))
+        }
+
+        for (name, value) in input.clone().into_iter_sorted() {
+            let value = generators::request_force(&co, value).await;
+            if ignore_nulls && matches!(value, Value::Null) {
+                continue;
+            }
+
+            match strong_coerce_to_string(&co, value.clone()).await? {
+                Err(cek) => return Ok(Value::Catchable(cek)),
+                Ok(val_str) => {
+                    // handle_derivation_parameters tells us whether the
+                    // argument should be added to the environment; continue
+                    // to the next one otherwise
+                    match handle_derivation_parameters(
+                        &mut drv,
+                        &co,
+                        name.as_str(),
+                        &value,
+                        &val_str,
+                    )
+                    .await?
+                    {
+                        Err(cek) => return Ok(Value::Catchable(cek)),
+                        Ok(false) => continue,
+                        _ => (),
+                    }
+
+                    // Most of these are also added to the builder's environment in "raw" form.
+                    if drv
+                        .environment
+                        .insert(name.as_str().to_string(), val_str.into())
+                        .is_some()
+                    {
+                        return Err(Error::DuplicateEnvVar(name.as_str().to_string()).into());
+                    }
+                }
+            }
+        }
+
+        let output_hash = match select_string(&co, &input, "outputHash")
+            .await
+            .context("evaluating the `outputHash` parameter")?
+        {
+            Err(cek) => return Ok(Value::Catchable(cek)),
+            Ok(s) => s,
+        };
+        let output_hash_algo = match select_string(&co, &input, "outputHashAlgo")
+            .await
+            .context("evaluating the `outputHashAlgo` parameter")?
+        {
+            Err(cek) => return Ok(Value::Catchable(cek)),
+            Ok(s) => s,
+        };
+        let output_hash_mode = match select_string(&co, &input, "outputHashMode")
+            .await
+            .context("evaluating the `outputHashMode` parameter")?
+        {
+            Err(cek) => return Ok(Value::Catchable(cek)),
+            Ok(s) => s,
+        };
+        populate_output_configuration(&mut drv, output_hash, output_hash_algo, output_hash_mode)?;
+
+        // Scan references in relevant attributes to detect any build-references.
+        let references = {
+            let state = state.borrow();
+            if state.is_empty() {
+                // skip reference scanning, create an empty result
+                Default::default()
+            } else {
+                let mut refscan = state.reference_scanner();
+                drv.arguments.iter().for_each(|s| refscan.scan(s));
+                drv.environment.values().for_each(|s| refscan.scan(s));
+                refscan.scan(&drv.builder);
+                refscan.finalise()
+            }
+        };
+
+        // Each output name needs to exist in the environment, at this
+        // point initialised as an empty string because that is the
+        // way of Golang ;)
+        for output in drv.outputs.keys() {
+            if drv
+                .environment
+                .insert(output.to_string(), String::new().into())
+                .is_some()
+            {
+                emit_warning_kind(&co, WarningKind::ShadowedOutput(output.to_string())).await;
+            }
+        }
+
+        let mut known_paths = state.borrow_mut();
+        populate_inputs(&mut drv, &known_paths, references);
+
+        // At this point, derivation fields are fully populated from
+        // eval data structures.
+        drv.validate(false).map_err(Error::InvalidDerivation)?;
+
+        // Calculate the derivation_or_fod_hash for the current derivation.
+        // This one is still intermediate (so not added to known_paths)
+        let derivation_or_fod_hash_tmp =
+            drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv));
+
+        // Mutate the Derivation struct and set output paths
+        drv.calculate_output_paths(&name, &derivation_or_fod_hash_tmp)
+            .map_err(Error::InvalidDerivation)?;
+
+        let derivation_path = drv
+            .calculate_derivation_path(&name)
+            .map_err(Error::InvalidDerivation)?;
+
+        // recompute the hash derivation modulo and add to known_paths
+        let derivation_or_fod_hash_final =
+            drv.derivation_or_fod_hash(|drv| known_paths.get_hash_derivation_modulo(drv));
+
+        known_paths.add_hash_derivation_modulo(
+            derivation_path.to_absolute_path(),
+            &derivation_or_fod_hash_final,
+        );
+
+        // mark all the new paths as known
+        let output_names: Vec<String> = drv.outputs.keys().map(Clone::clone).collect();
+        known_paths.drv(derivation_path.to_absolute_path(), &output_names);
+
+        for (output_name, output) in &drv.outputs {
+            known_paths.output(
+                &output.path,
+                output_name,
+                derivation_path.to_absolute_path(),
+            );
+        }
+
+        let mut new_attrs: Vec<(String, String)> = drv
+            .outputs
+            .into_iter()
+            .map(|(name, output)| (name, output.path))
+            .collect();
+
+        new_attrs.push(("drvPath".to_string(), derivation_path.to_absolute_path()));
+
+        Ok(Value::Attrs(Box::new(NixAttrs::from_iter(
+            new_attrs.into_iter(),
+        ))))
+    }
+
+    #[builtin("toFile")]
+    async fn builtin_to_file(
+        state: Rc<RefCell<KnownPaths>>,
+        co: GenCo,
+        name: Value,
+        content: Value,
+    ) -> Result<Value, ErrorKind> {
+        let name = name
+            .to_str()
+            .context("evaluating the `name` parameter of builtins.toFile")?;
+        let content = content
+            .to_str()
+            .context("evaluating the `content` parameter of builtins.toFile")?;
+
+        let mut refscan = state.borrow().reference_scanner();
+        refscan.scan(content.as_str());
+        let refs = {
+            let paths = state.borrow();
+            refscan
+                .finalise()
+                .into_iter()
+                .map(|path| paths[&path].path.to_string())
+                .collect::<Vec<_>>()
+        };
+
+        // TODO: fail on derivation references (only "plain" is allowed here)
+
+        let path = nix_compat::store_path::build_text_path(name.as_str(), content.as_str(), refs)
+            .map_err(|_e| {
+                nix_compat::derivation::DerivationError::InvalidOutputName(
+                    name.as_str().to_string(),
+                )
+            })
+            .map_err(Error::InvalidDerivation)?
+            .to_absolute_path();
+
+        state.borrow_mut().plain(&path);
+
+        // TODO: actually persist the file in the store at that path ...
+
+        Ok(Value::String(path.into()))
+    }
+}
+
+pub use derivation_builtins::builtins as derivation_builtins;
+
+#[cfg(test)]
+mod tests {
+    use crate::known_paths::KnownPaths;
+    use nix_compat::store_path::hash_placeholder;
+    use std::{cell::RefCell, rc::Rc};
+
+    #[test]
+    fn derivation() {
+        let mut eval = tvix_eval::Evaluation::new_impure(
+            r#"(derivation { name = "foo"; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#,
+            None,
+        );
+
+        let known_paths: Rc<RefCell<KnownPaths>> = Default::default();
+
+        eval.builtins
+            .extend(crate::derivation::derivation_builtins(known_paths));
+
+        // Add the actual `builtins.derivation` from compiled Nix code
+        // TODO: properly compose this
+        eval.src_builtins
+            .push(("derivation", include_str!("derivation.nix")));
+
+        let result = eval.evaluate();
+
+        assert!(result.errors.is_empty(), "expect evaluation to succeed");
+        let value = result.value.expect("must be some");
+        // TODO: test this more reliably, derive Eq?
+        assert_eq!(
+            "\"/nix/store/xpcvxsx5sw4rbq666blz6sxqlmsqphmr-foo\"",
+            value.to_string()
+        );
+    }
+
+    #[test]
+    fn derivation_empty_name() {
+        let mut eval = tvix_eval::Evaluation::new_impure(
+            r#"(derivation { name = ""; builder = "/bin/sh"; system = "x86_64-linux";}).outPath"#,
+            None,
+        );
+
+        let known_paths: Rc<RefCell<KnownPaths>> = Default::default();
+
+        eval.builtins
+            .extend(crate::derivation::derivation_builtins(known_paths));
+
+        // Add the actual `builtins.derivation` from compiled Nix code
+        // TODO: properly compose this
+        eval.src_builtins
+            .push(("derivation", include_str!("derivation.nix")));
+
+        assert!(
+            !eval.evaluate().errors.is_empty(),
+            "expect evaluation to fail"
+        );
+    }
+
+    // TODO: These tests are commented out because we do not have
+    // scaffolding to drive generators during testing at the moment.
+
+    // static mut OBSERVER: NoOpObserver = NoOpObserver {};
+
+    // // Creates a fake VM for tests, which can *not* actually be
+    // // used to force (most) values but can satisfy the type
+    // // parameter.
+    // fn fake_vm() -> VM<'static> {
+    //     // safe because accessing the observer doesn't actually do anything
+    //     unsafe {
+    //         VM::new(
+    //             Default::default(),
+    //             Box::new(tvix_eval::DummyIO),
+    //             &mut OBSERVER,
+    //             Default::default(),
+    //             todo!(),
+    //         )
+    //     }
+    // }
+
+    // #[test]
+    // fn populate_outputs_ok() {
+    //     let mut vm = fake_vm();
+    //     let mut drv = Derivation::default();
+    //     drv.outputs.insert("out".to_string(), Default::default());
+
+    //     let outputs = NixList::construct(
+    //         2,
+    //         vec![Value::String("foo".into()), Value::String("bar".into())],
+    //     );
+
+    //     populate_outputs(&mut vm, &mut drv, outputs).expect("populate_outputs should succeed");
+
+    //     assert_eq!(drv.outputs.len(), 2);
+    //     assert!(drv.outputs.contains_key("bar"));
+    //     assert!(drv.outputs.contains_key("foo"));
+    // }
+
+    // #[test]
+    // fn populate_outputs_duplicate() {
+    //     let mut vm = fake_vm();
+    //     let mut drv = Derivation::default();
+    //     drv.outputs.insert("out".to_string(), Default::default());
+
+    //     let outputs = NixList::construct(
+    //         2,
+    //         vec![Value::String("foo".into()), Value::String("foo".into())],
+    //     );
+
+    //     populate_outputs(&mut vm, &mut drv, outputs)
+    //         .expect_err("supplying duplicate outputs should fail");
+    // }
+
+    // #[test]
+    // fn populate_inputs_empty() {
+    //     let mut drv = Derivation::default();
+    //     let paths = KnownPaths::default();
+    //     let inputs = vec![];
+
+    //     populate_inputs(&mut drv, &paths, inputs);
+
+    //     assert!(drv.input_sources.is_empty());
+    //     assert!(drv.input_derivations.is_empty());
+    // }
+
+    // #[test]
+    // fn populate_inputs_all() {
+    //     let mut drv = Derivation::default();
+
+    //     let mut paths = KnownPaths::default();
+    //     paths.plain("/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-foo");
+    //     paths.drv(
+    //         "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv",
+    //         &["out"],
+    //     );
+    //     paths.output(
+    //         "/nix/store/zvpskvjwi72fjxg0vzq822sfvq20mq4l-bar",
+    //         "out",
+    //         "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv",
+    //     );
+
+    //     let inputs = vec![
+    //         "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-foo".into(),
+    //         "/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv".into(),
+    //         "/nix/store/zvpskvjwi72fjxg0vzq822sfvq20mq4l-bar".into(),
+    //     ];
+
+    //     populate_inputs(&mut drv, &paths, inputs);
+
+    //     assert_eq!(drv.input_sources.len(), 1);
+    //     assert!(drv
+    //         .input_sources
+    //         .contains("/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-foo"));
+
+    //     assert_eq!(drv.input_derivations.len(), 1);
+    //     assert!(drv
+    //         .input_derivations
+    //         .contains_key("/nix/store/aqffiyqx602lbam7n1zsaz3yrh6v08pc-bar.drv"));
+    // }
+
+    // #[test]
+    // fn populate_output_config_std() {
+    //     let mut drv = Derivation::default();
+
+    //     populate_output_configuration(&mut drv, None, None, None)
+    //         .expect("populate_output_configuration() should succeed");
+
+    //     assert_eq!(drv, Derivation::default(), "derivation should be unchanged");
+    // }
+
+    // #[test]
+    // fn populate_output_config_fod() {
+    //     let mut drv = Derivation::default();
+    //     drv.outputs.insert("out".to_string(), Default::default());
+
+    //     populate_output_configuration(
+    //         &mut drv,
+    //         Some("0000000000000000000000000000000000000000000000000000000000000000".into()),
+    //         Some("sha256".into()),
+    //         None,
+    //     )
+    //     .expect("populate_output_configuration() should succeed");
+
+    //     let expected = Hash {
+    //         algo: "sha256".into(),
+    //         digest: "0000000000000000000000000000000000000000000000000000000000000000".into(),
+    //     };
+
+    //     assert_eq!(drv.outputs["out"].hash, Some(expected));
+    // }
+
+    // #[test]
+    // fn populate_output_config_fod_recursive() {
+    //     let mut drv = Derivation::default();
+    //     drv.outputs.insert("out".to_string(), Default::default());
+
+    //     populate_output_configuration(
+    //         &mut drv,
+    //         Some("0000000000000000000000000000000000000000000000000000000000000000".into()),
+    //         Some("sha256".into()),
+    //         Some("recursive".into()),
+    //     )
+    //     .expect("populate_output_configuration() should succeed");
+
+    //     let expected = Hash {
+    //         algo: "r:sha256".into(),
+    //         digest: "0000000000000000000000000000000000000000000000000000000000000000".into(),
+    //     };
+
+    //     assert_eq!(drv.outputs["out"].hash, Some(expected));
+    // }
+
+    // #[test]
+    // /// hash_algo set to sha256, but SRI hash passed
+    // fn populate_output_config_flat_sri_sha256() {
+    //     let mut drv = Derivation::default();
+    //     drv.outputs.insert("out".to_string(), Default::default());
+
+    //     populate_output_configuration(
+    //         &mut drv,
+    //         Some("sha256-swapHA/ZO8QoDPwumMt6s5gf91oYe+oyk4EfRSyJqMg=".into()),
+    //         Some("sha256".into()),
+    //         Some("flat".into()),
+    //     )
+    //     .expect("populate_output_configuration() should succeed");
+
+    //     let expected = Hash {
+    //         algo: "sha256".into(),
+    //         digest: "b306a91c0fd93bc4280cfc2e98cb7ab3981ff75a187bea3293811f452c89a8c8".into(), // lower hex
+    //     };
+
+    //     assert_eq!(drv.outputs["out"].hash, Some(expected));
+    // }
+
+    // #[test]
+    // /// hash_algo set to empty string, SRI hash passed
+    // fn populate_output_config_flat_sri() {
+    //     let mut drv = Derivation::default();
+    //     drv.outputs.insert("out".to_string(), Default::default());
+
+    //     populate_output_configuration(
+    //         &mut drv,
+    //         Some("sha256-s6JN6XqP28g1uYMxaVAQMLiXcDG8tUs7OsE3QPhGqzA=".into()),
+    //         Some("".into()),
+    //         Some("flat".into()),
+    //     )
+    //     .expect("populate_output_configuration() should succeed");
+
+    //     let expected = Hash {
+    //         algo: "sha256".into(),
+    //         digest: "b3a24de97a8fdbc835b9833169501030b8977031bcb54b3b3ac13740f846ab30".into(), // lower hex
+    //     };
+
+    //     assert_eq!(drv.outputs["out"].hash, Some(expected));
+    // }
+
+    // #[test]
+    // fn handle_outputs_parameter() {
+    //     let mut vm = fake_vm();
+    //     let mut drv = Derivation::default();
+    //     drv.outputs.insert("out".to_string(), Default::default());
+
+    //     let outputs = Value::List(NixList::construct(
+    //         2,
+    //         vec![Value::String("foo".into()), Value::String("bar".into())],
+    //     ));
+    //     let outputs_str = outputs
+    //         .coerce_to_string(CoercionKind::Strong, &mut vm)
+    //         .unwrap();
+
+    //     handle_derivation_parameters(&mut drv, &mut vm, "outputs", &outputs, outputs_str.as_str())
+    //         .expect("handling 'outputs' parameter should succeed");
+
+    //     assert_eq!(drv.outputs.len(), 2);
+    //     assert!(drv.outputs.contains_key("bar"));
+    //     assert!(drv.outputs.contains_key("foo"));
+    // }
+
+    // #[test]
+    // fn handle_args_parameter() {
+    //     let mut vm = fake_vm();
+    //     let mut drv = Derivation::default();
+
+    //     let args = Value::List(NixList::construct(
+    //         3,
+    //         vec![
+    //             Value::String("--foo".into()),
+    //             Value::String("42".into()),
+    //             Value::String("--bar".into()),
+    //         ],
+    //     ));
+
+    //     let args_str = args
+    //         .coerce_to_string(CoercionKind::Strong, &mut vm)
+    //         .unwrap();
+
+    //     handle_derivation_parameters(&mut drv, &mut vm, "args", &args, args_str.as_str())
+    //         .expect("handling 'args' parameter should succeed");
+
+    //     assert_eq!(
+    //         drv.arguments,
+    //         vec!["--foo".to_string(), "42".to_string(), "--bar".to_string()]
+    //     );
+    // }
+
+    #[test]
+    fn builtins_placeholder_hashes() {
+        assert_eq!(
+            hash_placeholder("out").as_str(),
+            "/1rz4g4znpzjwh1xymhjpm42vipw92pr73vdgl6xs1hycac8kf2n9"
+        );
+
+        assert_eq!(
+            hash_placeholder("").as_str(),
+            "/171rf4jhx57xqz3p7swniwkig249cif71pa08p80mgaf0mqz5bmr"
+        );
+    }
+}
diff --git a/tvix/cli/src/errors.rs b/tvix/cli/src/errors.rs
new file mode 100644
index 000000000000..5cbddcbba811
--- /dev/null
+++ b/tvix/cli/src/errors.rs
@@ -0,0 +1,26 @@
+use nix_compat::{derivation::DerivationError, nixhash};
+use std::rc::Rc;
+use thiserror::Error;
+
+/// Errors related to derivation construction
+#[derive(Debug, Error)]
+pub enum Error {
+    #[error("an output with the name '{0}' is already defined")]
+    DuplicateOutput(String),
+    #[error("fixed-output derivations can only have the default `out`-output")]
+    ConflictingOutputTypes,
+    #[error("the environment variable '{0}' has already been set in this derivation")]
+    DuplicateEnvVar(String),
+    #[error("invalid derivation parameters: {0}")]
+    InvalidDerivation(DerivationError),
+    #[error("invalid output hash: {0}")]
+    InvalidOutputHash(nixhash::Error),
+    #[error("invalid output hash mode: '{0}', only 'recursive' and 'flat` are supported")]
+    InvalidOutputHashMode(String),
+}
+
+impl From<Error> for tvix_eval::ErrorKind {
+    fn from(err: Error) -> Self {
+        tvix_eval::ErrorKind::TvixError(Rc::new(err))
+    }
+}
diff --git a/tvix/cli/src/fetchurl.nix b/tvix/cli/src/fetchurl.nix
new file mode 100644
index 000000000000..3f182a5a319b
--- /dev/null
+++ b/tvix/cli/src/fetchurl.nix
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: LGPL-2.1
+#
+# This file is vendored from C++ Nix, as it needs to be bundled with
+# an evaluator to be able to evaluate nixpkgs.
+#
+# Source: https://github.com/NixOS/nix/blob/2.3.16/corepkgs/fetchurl.nix
+
+{ system ? "" # obsolete
+, url
+, hash ? "" # an SRI hash
+
+  # Legacy hash specification
+, md5 ? ""
+, sha1 ? ""
+, sha256 ? ""
+, sha512 ? ""
+, outputHash ? if hash != "" then hash else if sha512 != "" then sha512 else if sha1 != "" then sha1 else if md5 != "" then md5 else sha256
+, outputHashAlgo ? if hash != "" then "" else if sha512 != "" then "sha512" else if sha1 != "" then "sha1" else if md5 != "" then "md5" else "sha256"
+
+, executable ? false
+, unpack ? false
+, name ? baseNameOf (toString url)
+}:
+
+derivation {
+  builder = "builtin:fetchurl";
+
+  # New-style output content requirements.
+  inherit outputHashAlgo outputHash;
+  outputHashMode = if unpack || executable then "recursive" else "flat";
+
+  inherit name url executable unpack;
+
+  system = "builtin";
+
+  # No need to double the amount of network traffic
+  preferLocalBuild = true;
+
+  impureEnvVars = [
+    # We borrow these environment variables from the caller to allow
+    # easy proxy configuration.  This is impure, but a fixed-output
+    # derivation like fetchurl is allowed to do so since its result is
+    # by definition pure.
+    "http_proxy"
+    "https_proxy"
+    "ftp_proxy"
+    "all_proxy"
+    "no_proxy"
+  ];
+
+  # To make "nix-prefetch-url" work.
+  urls = [ url ];
+}
diff --git a/tvix/cli/src/known_paths.rs b/tvix/cli/src/known_paths.rs
new file mode 100644
index 000000000000..07373ef0da7a
--- /dev/null
+++ b/tvix/cli/src/known_paths.rs
@@ -0,0 +1,186 @@
+//! This module implements logic required for persisting known paths
+//! during an evaluation.
+//!
+//! Tvix needs to be able to keep track of each Nix store path that it
+//! knows about during the scope of a single evaluation and its
+//! related builds.
+//!
+//! This data is required to scan derivation inputs for the build
+//! references (the "build closure") that they make use of.
+//!
+//! Please see //tvix/eval/docs/build-references.md for more
+//! information.
+
+use crate::refscan::{ReferenceScanner, STORE_PATH_LEN};
+use nix_compat::nixhash::NixHash;
+use std::{
+    collections::{hash_map, BTreeSet, HashMap},
+    ops::Index,
+};
+
+#[derive(Debug, PartialEq)]
+pub enum PathKind {
+    /// A literal derivation (`.drv`-file), and the *names* of its outputs.
+    Derivation { output_names: BTreeSet<String> },
+
+    /// An output of a derivation, its name, and the path of its derivation.
+    Output { name: String, derivation: String },
+
+    /// A plain store path (e.g. source files copied to the store).
+    Plain,
+}
+
+#[derive(Debug, PartialEq)]
+pub struct KnownPath {
+    pub path: String,
+    pub kind: PathKind,
+}
+
+impl KnownPath {
+    fn new(path: String, kind: PathKind) -> Self {
+        KnownPath { path, kind }
+    }
+}
+
+/// Internal struct to prevent accidental leaks of the truncated path
+/// names.
+#[repr(transparent)]
+#[derive(Clone, Debug, Default, PartialEq, PartialOrd, Ord, Eq, Hash)]
+pub struct PathName(String);
+
+impl From<&str> for PathName {
+    fn from(s: &str) -> Self {
+        PathName(s[..STORE_PATH_LEN].to_string())
+    }
+}
+
+/// This instance is required to pass PathName instances as needles to
+/// the reference scanner.
+impl AsRef<[u8]> for PathName {
+    fn as_ref(&self) -> &[u8] {
+        self.0.as_ref()
+    }
+}
+
+#[derive(Debug, Default)]
+pub struct KnownPaths {
+    /// All known paths, keyed by a truncated version of their store
+    /// path used for reference scanning.
+    paths: HashMap<PathName, KnownPath>,
+
+    /// All known derivation or FOD hashes.
+    ///
+    /// Keys are derivation paths, values is the NixHash.
+    derivation_or_fod_hashes: HashMap<String, NixHash>,
+}
+
+impl Index<&PathName> for KnownPaths {
+    type Output = KnownPath;
+
+    fn index(&self, index: &PathName) -> &Self::Output {
+        &self.paths[index]
+    }
+}
+
+impl KnownPaths {
+    fn insert_path(&mut self, path: String, path_kind: PathKind) {
+        match self.paths.entry(path.as_str().into()) {
+            hash_map::Entry::Vacant(entry) => {
+                entry.insert(KnownPath::new(path, path_kind));
+            }
+
+            hash_map::Entry::Occupied(mut entry) => {
+                match (path_kind, &mut entry.get_mut().kind) {
+                    // These variant combinations require no "merging action".
+                    (PathKind::Plain, PathKind::Plain) => (),
+                    (PathKind::Output { .. }, PathKind::Output { .. }) => (),
+
+                    (
+                        PathKind::Derivation { output_names: new },
+                        PathKind::Derivation {
+                            output_names: ref mut old,
+                        },
+                    ) => {
+                        old.extend(new);
+                    }
+
+                    _ => panic!(
+                        "path '{}' inserted twice with different types",
+                        entry.key().0
+                    ),
+                };
+            }
+        };
+    }
+
+    /// Mark a plain path as known.
+    pub fn plain<S: ToString>(&mut self, path: S) {
+        self.insert_path(path.to_string(), PathKind::Plain);
+    }
+
+    /// Mark a derivation as known.
+    pub fn drv<P: ToString, O: ToString>(&mut self, path: P, outputs: &[O]) {
+        self.insert_path(
+            path.to_string(),
+            PathKind::Derivation {
+                output_names: outputs.iter().map(ToString::to_string).collect(),
+            },
+        );
+    }
+
+    /// Mark a derivation output path as known.
+    pub fn output<P: ToString, N: ToString, D: ToString>(
+        &mut self,
+        output_path: P,
+        name: N,
+        drv_path: D,
+    ) {
+        self.insert_path(
+            output_path.to_string(),
+            PathKind::Output {
+                name: name.to_string(),
+                derivation: drv_path.to_string(),
+            },
+        );
+    }
+
+    /// Checks whether there are any known paths. If not, a reference
+    /// scanner can not be created.
+    pub fn is_empty(&self) -> bool {
+        self.paths.is_empty()
+    }
+
+    /// Create a reference scanner from the current set of known paths.
+    pub fn reference_scanner(&self) -> ReferenceScanner<PathName> {
+        let candidates = self.paths.keys().map(Clone::clone).collect();
+        ReferenceScanner::new(candidates)
+    }
+
+    /// Fetch the opaque "hash derivation modulo" for a given derivation path.
+    pub fn get_hash_derivation_modulo(&self, drv_path: &str) -> NixHash {
+        // TODO: we rely on an invariant that things *should* have
+        // been calculated if we get this far.
+        self.derivation_or_fod_hashes[drv_path].clone()
+    }
+
+    pub fn add_hash_derivation_modulo<D: ToString>(
+        &mut self,
+        drv: D,
+        hash_derivation_modulo: &NixHash,
+    ) {
+        #[allow(unused_variables)] // assertions on this only compiled in debug builds
+        let old = self
+            .derivation_or_fod_hashes
+            .insert(drv.to_string(), hash_derivation_modulo.to_owned());
+
+        #[cfg(debug_assertions)]
+        {
+            if let Some(old) = old {
+                debug_assert!(
+                    old == *hash_derivation_modulo,
+                    "hash derivation modulo for a given derivation should always be calculated the same"
+                );
+            }
+        }
+    }
+}
diff --git a/tvix/cli/src/main.rs b/tvix/cli/src/main.rs
new file mode 100644
index 000000000000..ebcfe4b800b8
--- /dev/null
+++ b/tvix/cli/src/main.rs
@@ -0,0 +1,287 @@
+mod derivation;
+mod errors;
+mod known_paths;
+mod refscan;
+mod tvix_io;
+mod tvix_store_io;
+
+use std::cell::RefCell;
+use std::rc::Rc;
+use std::sync::Arc;
+use std::{fs, path::PathBuf};
+
+use clap::Parser;
+use known_paths::KnownPaths;
+use rustyline::{error::ReadlineError, Editor};
+use tvix_castore::blobservice::MemoryBlobService;
+use tvix_castore::directoryservice::MemoryDirectoryService;
+use tvix_eval::observer::{DisassemblingObserver, TracingObserver};
+use tvix_eval::Value;
+use tvix_store::pathinfoservice::MemoryPathInfoService;
+use tvix_store_io::TvixStoreIO;
+
+#[derive(Parser)]
+struct Args {
+    /// Path to a script to evaluate
+    script: Option<PathBuf>,
+
+    #[clap(long, short = 'E')]
+    expr: Option<String>,
+
+    /// Dump the raw AST to stdout before interpreting
+    #[clap(long, env = "TVIX_DISPLAY_AST")]
+    display_ast: bool,
+
+    /// Dump the bytecode to stdout before evaluating
+    #[clap(long, env = "TVIX_DUMP_BYTECODE")]
+    dump_bytecode: bool,
+
+    /// Trace the runtime of the VM
+    #[clap(long, env = "TVIX_TRACE_RUNTIME")]
+    trace_runtime: bool,
+
+    /// Only compile, but do not execute code. This will make Tvix act
+    /// sort of like a linter.
+    #[clap(long)]
+    compile_only: bool,
+
+    /// Don't print warnings.
+    #[clap(long)]
+    no_warnings: bool,
+
+    /// A colon-separated list of directories to use to resolve `<...>`-style paths
+    #[clap(long, short = 'I', env = "NIX_PATH")]
+    nix_search_path: Option<String>,
+
+    /// Print "raw" (unquoted) output.
+    #[clap(long)]
+    raw: bool,
+
+    /// Strictly evaluate values, traversing them and forcing e.g.
+    /// elements of lists and attribute sets before printing the
+    /// return value.
+    #[clap(long)]
+    strict: bool,
+}
+
+/// Interprets the given code snippet, printing out warnings, errors
+/// and the result itself. The return value indicates whether
+/// evaluation succeeded.
+fn interpret(code: &str, path: Option<PathBuf>, args: &Args, explain: bool) -> bool {
+    let mut eval = tvix_eval::Evaluation::new_impure(code, path);
+    let known_paths: Rc<RefCell<KnownPaths>> = Default::default();
+
+    eval.strict = args.strict;
+
+    let blob_service = Arc::new(MemoryBlobService::default());
+    let directory_service = Arc::new(MemoryDirectoryService::default());
+    let path_info_service = Arc::new(MemoryPathInfoService::new(
+        blob_service.clone(),
+        directory_service.clone(),
+    ));
+
+    let tokio_runtime = tokio::runtime::Runtime::new().unwrap();
+
+    eval.io_handle = Box::new(tvix_io::TvixIO::new(
+        known_paths.clone(),
+        TvixStoreIO::new(
+            blob_service,
+            directory_service,
+            path_info_service,
+            tokio_runtime.handle().clone(),
+        ),
+    ));
+
+    // bundle fetchurl.nix (used in nixpkgs) by resolving <nix> to
+    // `/__corepkgs__`, which has special handling in [`nix_compat`].
+    eval.nix_path = args
+        .nix_search_path
+        .as_ref()
+        .map(|p| format!("nix=/__corepkgs__:{}", p))
+        .or_else(|| Some("nix=/__corepkgs__".to_string()));
+
+    eval.builtins
+        .extend(derivation::derivation_builtins(known_paths));
+
+    // Add the actual `builtins.derivation` from compiled Nix code
+    eval.src_builtins
+        .push(("derivation", include_str!("derivation.nix")));
+
+    let source_map = eval.source_map();
+    let result = {
+        let mut compiler_observer =
+            DisassemblingObserver::new(source_map.clone(), std::io::stderr());
+        if args.dump_bytecode {
+            eval.compiler_observer = Some(&mut compiler_observer);
+        }
+
+        let mut runtime_observer = TracingObserver::new(std::io::stderr());
+        if args.trace_runtime {
+            eval.runtime_observer = Some(&mut runtime_observer);
+        }
+
+        eval.evaluate()
+    };
+
+    if args.display_ast {
+        if let Some(ref expr) = result.expr {
+            eprintln!("AST: {}", tvix_eval::pretty_print_expr(expr));
+        }
+    }
+
+    for error in &result.errors {
+        error.fancy_format_stderr(&source_map);
+    }
+
+    if !args.no_warnings {
+        for warning in &result.warnings {
+            warning.fancy_format_stderr(&source_map);
+        }
+    }
+
+    if let Some(value) = result.value.as_ref() {
+        if explain {
+            println!("=> {}", value.explain());
+        } else {
+            println_result(value, args.raw);
+        }
+    }
+
+    // inform the caller about any errors
+    result.errors.is_empty()
+}
+
+/// Interpret the given code snippet, but only run the Tvix compiler
+/// on it and return errors and warnings.
+fn lint(code: &str, path: Option<PathBuf>, args: &Args) -> bool {
+    let mut eval = tvix_eval::Evaluation::new_impure(code, path);
+    eval.strict = args.strict;
+
+    let source_map = eval.source_map();
+
+    let mut compiler_observer = DisassemblingObserver::new(source_map.clone(), std::io::stderr());
+
+    if args.dump_bytecode {
+        eval.compiler_observer = Some(&mut compiler_observer);
+    }
+
+    if args.trace_runtime {
+        eprintln!("warning: --trace-runtime has no effect with --compile-only!");
+    }
+
+    let result = eval.compile_only();
+
+    if args.display_ast {
+        if let Some(ref expr) = result.expr {
+            eprintln!("AST: {}", tvix_eval::pretty_print_expr(expr));
+        }
+    }
+
+    for error in &result.errors {
+        error.fancy_format_stderr(&source_map);
+    }
+
+    for warning in &result.warnings {
+        warning.fancy_format_stderr(&source_map);
+    }
+
+    // inform the caller about any errors
+    result.errors.is_empty()
+}
+
+fn main() {
+    let args = Args::parse();
+
+    if let Some(file) = &args.script {
+        run_file(file.clone(), &args)
+    } else if let Some(expr) = &args.expr {
+        if !interpret(expr, None, &args, false) {
+            std::process::exit(1);
+        }
+    } else {
+        run_prompt(&args)
+    }
+}
+
+fn run_file(mut path: PathBuf, args: &Args) {
+    if path.is_dir() {
+        path.push("default.nix");
+    }
+    let contents = fs::read_to_string(&path).expect("failed to read the input file");
+
+    let success = if args.compile_only {
+        lint(&contents, Some(path), args)
+    } else {
+        interpret(&contents, Some(path), args, false)
+    };
+
+    if !success {
+        std::process::exit(1);
+    }
+}
+
+fn println_result(result: &Value, raw: bool) {
+    if raw {
+        println!("{}", result.to_str().unwrap().as_str())
+    } else {
+        println!("=> {} :: {}", result, result.type_of())
+    }
+}
+
+fn state_dir() -> Option<PathBuf> {
+    let mut path = dirs::data_dir();
+    if let Some(p) = path.as_mut() {
+        p.push("tvix")
+    }
+    path
+}
+
+fn run_prompt(args: &Args) {
+    let mut rl = Editor::<()>::new().expect("should be able to launch rustyline");
+
+    if args.compile_only {
+        eprintln!("warning: `--compile-only` has no effect on REPL usage!");
+    }
+
+    let history_path = match state_dir() {
+        // Attempt to set up these paths, but do not hard fail if it
+        // doesn't work.
+        Some(mut path) => {
+            let _ = std::fs::create_dir_all(&path);
+            path.push("history.txt");
+            let _ = rl.load_history(&path);
+            Some(path)
+        }
+
+        None => None,
+    };
+
+    loop {
+        let readline = rl.readline("tvix-repl> ");
+        match readline {
+            Ok(line) => {
+                if line.is_empty() {
+                    continue;
+                }
+
+                rl.add_history_entry(&line);
+
+                if let Some(without_prefix) = line.strip_prefix(":d ") {
+                    interpret(without_prefix, None, args, true);
+                } else {
+                    interpret(&line, None, args, false);
+                }
+            }
+            Err(ReadlineError::Interrupted) | Err(ReadlineError::Eof) => break,
+
+            Err(err) => {
+                eprintln!("error: {}", err);
+                break;
+            }
+        }
+    }
+
+    if let Some(path) = history_path {
+        rl.save_history(&path).unwrap();
+    }
+}
diff --git a/tvix/cli/src/refscan.rs b/tvix/cli/src/refscan.rs
new file mode 100644
index 000000000000..0e0bb6c77828
--- /dev/null
+++ b/tvix/cli/src/refscan.rs
@@ -0,0 +1,115 @@
+//! Simple scanner for non-overlapping, known references of Nix store paths in a
+//! given string.
+//!
+//! This is used for determining build references (see
+//! //tvix/eval/docs/build-references.md for more details).
+//!
+//! The scanner itself is using the Wu-Manber string-matching algorithm, using
+//! our fork of the `wu-mamber` crate.
+
+use std::collections::BTreeSet;
+use wu_manber::TwoByteWM;
+
+pub const STORE_PATH_LEN: usize = "/nix/store/00000000000000000000000000000000".len();
+
+/// Represents a "primed" reference scanner with an automaton that knows the set
+/// of store paths to scan for.
+pub struct ReferenceScanner<P: Ord + AsRef<[u8]>> {
+    candidates: Vec<P>,
+    searcher: Option<TwoByteWM>,
+    matches: Vec<usize>,
+}
+
+impl<P: Clone + Ord + AsRef<[u8]>> ReferenceScanner<P> {
+    /// Construct a new `ReferenceScanner` that knows how to scan for the given
+    /// candidate store paths.
+    pub fn new(candidates: Vec<P>) -> Self {
+        let searcher = if candidates.is_empty() {
+            None
+        } else {
+            Some(TwoByteWM::new(&candidates))
+        };
+
+        ReferenceScanner {
+            searcher,
+            candidates,
+            matches: Default::default(),
+        }
+    }
+
+    /// Scan the given str for all non-overlapping matches and collect them
+    /// in the scanner.
+    pub fn scan<S: AsRef<[u8]>>(&mut self, haystack: S) {
+        if haystack.as_ref().len() < STORE_PATH_LEN {
+            return;
+        }
+
+        if let Some(searcher) = &self.searcher {
+            for m in searcher.find(haystack) {
+                self.matches.push(m.pat_idx);
+            }
+        }
+    }
+
+    /// Finalise the reference scanner and return the resulting matches.
+    pub fn finalise(self) -> BTreeSet<P> {
+        self.matches
+            .into_iter()
+            .map(|idx| self.candidates[idx].clone())
+            .collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // The actual derivation of `nixpkgs.hello`.
+    const HELLO_DRV: &str = r#"Derive([("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1","","")],[("/nix/store/6z1jfnqqgyqr221zgbpm30v91yfj3r45-bash-5.1-p16.drv",["out"]),("/nix/store/ap9g09fxbicj836zm88d56dn3ff4clxl-stdenv-linux.drv",["out"]),("/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv",["out"])],["/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],"x86_64-linux","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash",["-e","/nix/store/9krlzvny65gdc8s7kpb6lkx8cd02c25b-default-builder.sh"],[("buildInputs",""),("builder","/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16/bin/bash"),("cmakeFlags",""),("configureFlags",""),("depsBuildBuild",""),("depsBuildBuildPropagated",""),("depsBuildTarget",""),("depsBuildTargetPropagated",""),("depsHostHost",""),("depsHostHostPropagated",""),("depsTargetTarget",""),("depsTargetTargetPropagated",""),("doCheck","1"),("doInstallCheck",""),("mesonFlags",""),("name","hello-2.12.1"),("nativeBuildInputs",""),("out","/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1"),("outputs","out"),("patches",""),("pname","hello"),("propagatedBuildInputs",""),("propagatedNativeBuildInputs",""),("src","/nix/store/pa10z4ngm0g83kx9mssrqzz30s84vq7k-hello-2.12.1.tar.gz"),("stdenv","/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux"),("strictDeps",""),("system","x86_64-linux"),("version","2.12.1")])"#;
+
+    #[test]
+    fn test_no_patterns() {
+        let mut scanner: ReferenceScanner<String> = ReferenceScanner::new(vec![]);
+
+        scanner.scan(HELLO_DRV);
+
+        let result = scanner.finalise();
+
+        assert_eq!(result.len(), 0);
+    }
+
+    #[test]
+    fn test_single_match() {
+        let mut scanner = ReferenceScanner::new(vec![
+            "/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16".to_string(),
+        ]);
+        scanner.scan(HELLO_DRV);
+
+        let result = scanner.finalise();
+
+        assert_eq!(result.len(), 1);
+        assert!(result.contains("/nix/store/4xw8n979xpivdc46a9ndcvyhwgif00hz-bash-5.1-p16"));
+    }
+
+    #[test]
+    fn test_multiple_matches() {
+        let candidates = vec![
+            // these exist in the drv:
+            "/nix/store/33l4p0pn0mybmqzaxfkpppyh7vx1c74p-hello-2.12.1".to_string(),
+            "/nix/store/pf80kikyxr63wrw56k00i1kw6ba76qik-hello-2.12.1.tar.gz.drv".to_string(),
+            "/nix/store/cp65c8nk29qq5cl1wyy5qyw103cwmax7-stdenv-linux".to_string(),
+            // this doesn't:
+            "/nix/store/fn7zvafq26f0c8b17brs7s95s10ibfzs-emacs-28.2.drv".to_string(),
+        ];
+
+        let mut scanner = ReferenceScanner::new(candidates.clone());
+        scanner.scan(HELLO_DRV);
+
+        let result = scanner.finalise();
+        assert_eq!(result.len(), 3);
+
+        for c in candidates[..3].iter() {
+            assert!(result.contains(c));
+        }
+    }
+}
diff --git a/tvix/cli/src/tvix_io.rs b/tvix/cli/src/tvix_io.rs
new file mode 100644
index 000000000000..74f91138bff8
--- /dev/null
+++ b/tvix/cli/src/tvix_io.rs
@@ -0,0 +1,79 @@
+//! This module implements a wrapper around tvix-eval's [EvalIO] type,
+//! adding functionality which is required by tvix-cli:
+//!
+//! 1. Marking plain paths known to the reference scanner.
+//! 2. Handling the C++ Nix `__corepkgs__`-hack for nixpkgs bootstrapping.
+//!
+//! All uses of [EvalIO] in tvix-cli must make use of this wrapper,
+//! otherwise fundamental features like nixpkgs bootstrapping and hash
+//! calculation will not work.
+
+use crate::KnownPaths;
+use std::cell::RefCell;
+use std::io;
+use std::path::{Path, PathBuf};
+use std::rc::Rc;
+use tvix_eval::{EvalIO, FileType};
+
+// TODO: Merge this together with TvixStoreIO?
+pub(crate) struct TvixIO<T: EvalIO> {
+    /// Ingested paths must be reported to this known paths tracker
+    /// for accurate build reference scanning.
+    known_paths: Rc<RefCell<KnownPaths>>,
+
+    // Actual underlying [EvalIO] implementation.
+    actual: T,
+}
+
+impl<T: EvalIO> TvixIO<T> {
+    pub(crate) fn new(known_paths: Rc<RefCell<KnownPaths>>, actual: T) -> Self {
+        Self {
+            known_paths,
+            actual,
+        }
+    }
+}
+
+impl<T: EvalIO> EvalIO for TvixIO<T> {
+    fn store_dir(&self) -> Option<String> {
+        self.actual.store_dir()
+    }
+
+    fn import_path(&self, path: &Path) -> Result<PathBuf, io::Error> {
+        let imported_path = self.actual.import_path(path)?;
+        self.known_paths
+            .borrow_mut()
+            .plain(imported_path.to_string_lossy());
+
+        Ok(imported_path)
+    }
+
+    fn path_exists(&self, path: &Path) -> Result<bool, io::Error> {
+        if path.starts_with("/__corepkgs__") {
+            return Ok(true);
+        }
+
+        self.actual.path_exists(path)
+    }
+
+    fn read_to_string(&self, path: &Path) -> Result<String, io::Error> {
+        // Bundled version of corepkgs/fetchurl.nix. The counterpart
+        // of this happens in `main`, where the `nix_path` of the
+        // evaluation has `nix=/__corepkgs__` added to it.
+        //
+        // This workaround is similar to what cppnix does for passing
+        // the path through.
+        //
+        // TODO: this comparison is bad and allocates, we should use
+        // the sane path library.
+        if path.starts_with("/__corepkgs__/fetchurl.nix") {
+            return Ok(include_str!("fetchurl.nix").to_string());
+        }
+
+        self.actual.read_to_string(path)
+    }
+
+    fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> {
+        self.actual.read_dir(path)
+    }
+}
diff --git a/tvix/cli/src/tvix_store_io.rs b/tvix/cli/src/tvix_store_io.rs
new file mode 100644
index 000000000000..cc69357282fd
--- /dev/null
+++ b/tvix/cli/src/tvix_store_io.rs
@@ -0,0 +1,356 @@
+//! This module provides an implementation of EvalIO talking to tvix-store.
+
+use nix_compat::store_path::{self, StorePath};
+use std::{io, path::Path, path::PathBuf, sync::Arc};
+use tokio::io::AsyncReadExt;
+use tracing::{error, instrument, warn};
+use tvix_eval::{EvalIO, FileType, StdIO};
+
+use tvix_castore::{
+    blobservice::BlobService,
+    directoryservice::{self, DirectoryService},
+    import,
+    proto::{node::Node, NamedNode},
+    B3Digest,
+};
+use tvix_store::{
+    nar::calculate_size_and_sha256,
+    pathinfoservice::PathInfoService,
+    proto::{NarInfo, PathInfo},
+};
+
+/// Implements [EvalIO], asking given [PathInfoService], [DirectoryService]
+/// and [BlobService].
+///
+/// In case the given path does not exist in these stores, we ask StdIO.
+/// This is to both cover cases of syntactically valid store paths, that exist
+/// on the filesystem (still managed by Nix), as well as being able to read
+/// files outside store paths.
+pub struct TvixStoreIO {
+    blob_service: Arc<dyn BlobService>,
+    directory_service: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+    std_io: StdIO,
+    tokio_handle: tokio::runtime::Handle,
+}
+
+impl TvixStoreIO {
+    pub fn new(
+        blob_service: Arc<dyn BlobService>,
+        directory_service: Arc<dyn DirectoryService>,
+        path_info_service: Arc<dyn PathInfoService>,
+        tokio_handle: tokio::runtime::Handle,
+    ) -> Self {
+        Self {
+            blob_service,
+            directory_service,
+            path_info_service,
+            std_io: StdIO {},
+            tokio_handle,
+        }
+    }
+
+    /// for a given [StorePath] and additional [Path] inside the store path,
+    /// look up the [PathInfo], and if it exists, and then use
+    /// [directoryservice::traverse_to] to return the
+    /// [Node] specified by `sub_path`.
+    #[instrument(skip(self), ret, err)]
+    fn store_path_to_root_node(
+        &self,
+        store_path: &StorePath,
+        sub_path: &Path,
+    ) -> Result<Option<Node>, io::Error> {
+        let path_info_service = self.path_info_service.clone();
+        let digest = store_path.digest.clone();
+        let task = self
+            .tokio_handle
+            .spawn(async move { path_info_service.get(digest).await });
+        let path_info = match self.tokio_handle.block_on(task).unwrap()? {
+            // If there's no PathInfo found, early exit
+            None => return Ok(None),
+            Some(path_info) => path_info,
+        };
+
+        let root_node = {
+            match path_info.node {
+                None => {
+                    warn!(
+                        "returned PathInfo {:?} node is None, this shouldn't happen.",
+                        &path_info
+                    );
+                    return Ok(None);
+                }
+                Some(root_node) => match root_node.node {
+                    None => {
+                        warn!("node for {:?} is None, this shouldn't happen.", &root_node);
+                        return Ok(None);
+                    }
+                    Some(root_node) => root_node,
+                },
+            }
+        };
+
+        let directory_service = self.directory_service.clone();
+        let sub_path = sub_path.to_owned();
+        let task = self.tokio_handle.spawn(async move {
+            directoryservice::descend_to(directory_service, root_node, &sub_path).await
+        });
+
+        Ok(self.tokio_handle.block_on(task).unwrap()?)
+    }
+}
+
+impl EvalIO for TvixStoreIO {
+    #[instrument(skip(self), ret, err)]
+    fn path_exists(&self, path: &Path) -> Result<bool, io::Error> {
+        if let Ok((store_path, sub_path)) =
+            StorePath::from_absolute_path_full(&path.to_string_lossy())
+        {
+            if self
+                .store_path_to_root_node(&store_path, &sub_path)?
+                .is_some()
+            {
+                Ok(true)
+            } else {
+                // As tvix-store doesn't manage /nix/store on the filesystem,
+                // we still need to also ask self.std_io here.
+                self.std_io.path_exists(path)
+            }
+        } else {
+            // The store path is no store path, so do regular StdIO.
+            self.std_io.path_exists(path)
+        }
+    }
+
+    #[instrument(skip(self), ret, err)]
+    fn read_to_string(&self, path: &Path) -> Result<String, io::Error> {
+        if let Ok((store_path, sub_path)) =
+            StorePath::from_absolute_path_full(&path.to_string_lossy())
+        {
+            if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? {
+                // depending on the node type, treat read_to_string differently
+                match node {
+                    Node::Directory(_) => {
+                        // This would normally be a io::ErrorKind::IsADirectory (still unstable)
+                        Err(io::Error::new(
+                            io::ErrorKind::Unsupported,
+                            format!("tried to read directory at {:?} to string", path),
+                        ))
+                    }
+                    Node::File(file_node) => {
+                        let digest: B3Digest =
+                            file_node.digest.clone().try_into().map_err(|_e| {
+                                error!(
+                                    file_node = ?file_node,
+                                    "invalid digest"
+                                );
+                                io::Error::new(
+                                    io::ErrorKind::InvalidData,
+                                    format!("invalid digest length in file node: {:?}", file_node),
+                                )
+                            })?;
+
+                        let blob_service = self.blob_service.clone();
+
+                        let task = self.tokio_handle.spawn(async move {
+                            let mut reader = {
+                                let resp = blob_service.open_read(&digest).await?;
+                                match resp {
+                                    Some(blob_reader) => blob_reader,
+                                    None => {
+                                        error!(
+                                            blob.digest = %digest,
+                                            "blob not found",
+                                        );
+                                        Err(io::Error::new(
+                                            io::ErrorKind::NotFound,
+                                            format!("blob {} not found", &digest),
+                                        ))?
+                                    }
+                                }
+                            };
+
+                            let mut buf = String::new();
+
+                            reader.read_to_string(&mut buf).await?;
+                            Ok(buf)
+                        });
+
+                        self.tokio_handle.block_on(task).unwrap()
+                    }
+                    Node::Symlink(_symlink_node) => Err(io::Error::new(
+                        io::ErrorKind::Unsupported,
+                        "read_to_string for symlinks is unsupported",
+                    ))?,
+                }
+            } else {
+                // As tvix-store doesn't manage /nix/store on the filesystem,
+                // we still need to also ask self.std_io here.
+                self.std_io.read_to_string(path)
+            }
+        } else {
+            // The store path is no store path, so do regular StdIO.
+            self.std_io.read_to_string(path)
+        }
+    }
+
+    #[instrument(skip(self), ret, err)]
+    fn read_dir(&self, path: &Path) -> Result<Vec<(bytes::Bytes, FileType)>, io::Error> {
+        if let Ok((store_path, sub_path)) =
+            StorePath::from_absolute_path_full(&path.to_string_lossy())
+        {
+            if let Some(node) = self.store_path_to_root_node(&store_path, &sub_path)? {
+                match node {
+                    Node::Directory(directory_node) => {
+                        // fetch the Directory itself.
+                        let digest: B3Digest =
+                            directory_node.digest.clone().try_into().map_err(|_e| {
+                                io::Error::new(
+                                    io::ErrorKind::InvalidData,
+                                    format!(
+                                        "invalid digest length in directory node: {:?}",
+                                        directory_node
+                                    ),
+                                )
+                            })?;
+
+                        let directory_service = self.directory_service.clone();
+                        let digest_clone = digest.clone();
+                        let task = self
+                            .tokio_handle
+                            .spawn(async move { directory_service.get(&digest_clone).await });
+                        if let Some(directory) = self.tokio_handle.block_on(task).unwrap()? {
+                            let mut children: Vec<(bytes::Bytes, FileType)> = Vec::new();
+                            for node in directory.nodes() {
+                                children.push(match node {
+                                    Node::Directory(e) => (e.name, FileType::Directory),
+                                    Node::File(e) => (e.name, FileType::Regular),
+                                    Node::Symlink(e) => (e.name, FileType::Symlink),
+                                })
+                            }
+                            Ok(children)
+                        } else {
+                            // If we didn't get the directory node that's linked, that's a store inconsistency!
+                            error!(
+                                directory.digest = %digest,
+                                path = ?path,
+                                "directory not found",
+                            );
+                            Err(io::Error::new(
+                                io::ErrorKind::NotFound,
+                                format!("directory {digest} does not exist"),
+                            ))?
+                        }
+                    }
+                    Node::File(_file_node) => {
+                        // This would normally be a io::ErrorKind::NotADirectory (still unstable)
+                        Err(io::Error::new(
+                            io::ErrorKind::Unsupported,
+                            "tried to readdir path {:?}, which is a file",
+                        ))?
+                    }
+                    Node::Symlink(_symlink_node) => Err(io::Error::new(
+                        io::ErrorKind::Unsupported,
+                        "read_dir for symlinks is unsupported",
+                    ))?,
+                }
+            } else {
+                self.std_io.read_dir(path)
+            }
+        } else {
+            self.std_io.read_dir(path)
+        }
+    }
+
+    #[instrument(skip(self), ret, err)]
+    fn import_path(&self, path: &std::path::Path) -> Result<PathBuf, std::io::Error> {
+        let p = path.to_owned();
+        let blob_service = self.blob_service.clone();
+        let directory_service = self.directory_service.clone();
+        let path_info_service = self.path_info_service.clone();
+
+        let task = self.tokio_handle.spawn(async move {
+            import_path_with_pathinfo(blob_service, directory_service, path_info_service, &p).await
+        });
+
+        let path_info = self.tokio_handle.block_on(task).unwrap()?;
+
+        // from the [PathInfo], extract the store path (as string).
+        Ok({
+            let mut path = PathBuf::from(nix_compat::store_path::STORE_DIR_WITH_SLASH);
+
+            let root_node_name = path_info.node.unwrap().node.unwrap().get_name().to_vec();
+
+            // This must be a string, otherwise it would have failed validation.
+            let root_node_name = String::from_utf8(root_node_name).unwrap();
+
+            // append to the PathBuf
+            path.push(root_node_name);
+
+            // and return it
+            path
+        })
+    }
+
+    #[instrument(skip(self), ret)]
+    fn store_dir(&self) -> Option<String> {
+        Some("/nix/store".to_string())
+    }
+}
+
+/// Imports a given path on the filesystem into the store, and returns the
+/// [PathInfo] describing the path, that was sent to
+/// [PathInfoService].
+#[instrument(skip(blob_service, directory_service, path_info_service), ret, err)]
+async fn import_path_with_pathinfo(
+    blob_service: Arc<dyn BlobService>,
+    directory_service: Arc<dyn DirectoryService>,
+    path_info_service: Arc<dyn PathInfoService>,
+    path: &std::path::Path,
+) -> Result<PathInfo, io::Error> {
+    // Call [import::ingest_path], which will walk over the given path and return a root_node.
+    let root_node = import::ingest_path(blob_service.clone(), directory_service.clone(), path)
+        .await
+        .expect("error during import_path");
+
+    // Render the NAR.
+    let (nar_size, nar_sha256) =
+        calculate_size_and_sha256(&root_node, blob_service.clone(), directory_service.clone())
+            .await
+            .expect("error during nar calculation"); // TODO: handle error
+
+    // TODO: make a path_to_name helper function?
+    let name = path
+        .file_name()
+        .expect("path must not be ..")
+        .to_str()
+        .expect("path must be valid unicode");
+
+    let output_path = store_path::build_nar_based_store_path(&nar_sha256, name);
+
+    // assemble a new root_node with a name that is derived from the nar hash.
+    let root_node = root_node.rename(output_path.to_string().into_bytes().into());
+
+    // assemble the [PathInfo] object.
+    let path_info = PathInfo {
+        node: Some(tvix_castore::proto::Node {
+            node: Some(root_node),
+        }),
+        // There's no reference scanning on path contents ingested like this.
+        references: vec![],
+        narinfo: Some(NarInfo {
+            nar_size,
+            nar_sha256: nar_sha256.to_vec().into(),
+            signatures: vec![],
+            reference_names: vec![],
+            // TODO: narinfo for talosctl.src contains `CA: fixed:r:sha256:1x13j5hy75221bf6kz7cpgld9vgic6bqx07w5xjs4pxnksj6lxb6`
+            // do we need this anywhere?
+        }),
+    };
+
+    // put into [PathInfoService], and return the [PathInfo] that we get
+    // back from there (it might contain additional signatures).
+    let path_info = path_info_service.put(path_info).await?;
+
+    Ok(path_info)
+}