34 files changed, 600 insertions, 1808 deletions
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml
index 677ce6ab85..4cf8ea146c 100644
--- a/tvix/eval/Cargo.toml
+++ b/tvix/eval/Cargo.toml
@@ -30,7 +30,6 @@ smol_str = "0.2.0"
 tabwriter = "1.2"
 test-strategy = { version = "0.2.1", optional = true }
 toml = "0.6.0"
-xml-rs = "0.8.4"
 sha2 = "0.10.8"
 sha1 = "0.10.6"
 md-5 = "0.10.6"
@@ -43,6 +42,9 @@ pretty_assertions = "1.2.1"
 rstest = "0.19.0"
 tempfile = "3.3.0"
 
+[target.'cfg(not(target_env = "msvc"))'.dev-dependencies]
+tikv-jemallocator = "0.5"
+
 [features]
 default = ["impure", "arbitrary", "nix_tests"]
 
diff --git a/tvix/eval/benches/eval.rs b/tvix/eval/benches/eval.rs
index 57d4eb71b5..1333f5018c 100644
--- a/tvix/eval/benches/eval.rs
+++ b/tvix/eval/benches/eval.rs
@@ -1,5 +1,11 @@
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use itertools::Itertools;
+#[cfg(not(target_env = "msvc"))]
+use tikv_jemallocator::Jemalloc;
+
+#[cfg(not(target_env = "msvc"))]
+#[global_allocator]
+static GLOBAL: Jemalloc = Jemalloc;
 
 fn interpret(code: &str) {
     tvix_eval::Evaluation::new_pure().evaluate(code, None);
diff --git a/tvix/eval/build.rs b/tvix/eval/build.rs
index a9c9a78b06..b37a6e8a0c 100644
--- a/tvix/eval/build.rs
+++ b/tvix/eval/build.rs
@@ -5,5 +5,10 @@ fn main() {
         "cargo:rustc-env=TVIX_CURRENT_SYSTEM={}",
         &env::var("TARGET").unwrap()
     );
-    println!("cargo:rerun-if-changed-env=TARGET")
+    println!("cargo:rerun-if-changed-env=TARGET");
+
+    // Pick up new test case files
+    // https://github.com/la10736/rstest/issues/256
+    println!("cargo:rerun-if-changed=src/tests/nix_tests");
+    println!("cargo:rerun-if-changed=src/tests/tvix_tests")
 }
diff --git a/tvix/eval/default.nix b/tvix/eval/default.nix
index 91661291f7..9dd5875f85 100644
--- a/tvix/eval/default.nix
+++ b/tvix/eval/default.nix
@@ -1,9 +1,16 @@
 # TODO: find a way to build the benchmarks via crate2nix
-{ depot, pkgs, ... }:
+{ depot, pkgs, lib, ... }:
 
-depot.tvix.crates.workspaceMembers.tvix-eval.build.override {
+(depot.tvix.crates.workspaceMembers.tvix-eval.build.override {
   runTests = true;
 
   # Make C++ Nix available, to compare eval results against.
   testInputs = [ pkgs.nix ];
-}
+}).overrideAttrs (old: rec {
+  meta.ci.targets = lib.filter (x: lib.hasPrefix "with-features" x || x == "no-features") (lib.attrNames passthru);
+  passthru = depot.tvix.utils.mkFeaturePowerset {
+    inherit (old) crateName;
+    features = [ "nix_tests" ];
+    override.testInputs = [ pkgs.nix ];
+  };
+})
diff --git a/tvix/eval/docs/abandoned/thread-local-vm.md b/tvix/eval/docs/abandoned/thread-local-vm.md
deleted file mode 100644
index c6a2d5e07e..0000000000
--- a/tvix/eval/docs/abandoned/thread-local-vm.md
+++ /dev/null
@@ -1,233 +0,0 @@
-# We can't have nice things because IFD
-
-The thread-local VM work below was ultimately not merged because it
-was decided that it would be harmful for `tvix::eval::Value` to
-implement `Eq`, `Hash`, or any of the other `std` traits.
-
-Implementing `std` traits on `Value` was deemed harmful because IFD
-can cause arbitrary amounts of compilation to occur, including
-network transactions with builders.  Obviously it would be
-unexpected and error-prone to have a `PartialEq::eq()` which does
-something like this.  This problem does not manifest within the
-"nixpkgs compatibility only" scope, or in any undeprecated language
-feature other than IFD.  Although IFD is outside the "nixpkgs
-compatibility scope", it [has been added to the TVL compatibility
-scope](https://cl.tvl.fyi/c/depot/+/7193/comment/3418997b_0dbd0b65/).
-
-This was the sole reason for not merging.
-
-The explanation below may be useful in case future circumstances
-affect the relevance of the reasoning above.
-
-The implementation can be found in these CLs:
-
-- [refactor(tvix/eval): remove lifetime parameter from VM<'o>](https://cl.tvl.fyi/c/depot/+/7194)
-- [feat(tvix/eval): [FOUNDLING] thread-local VM](https://cl.tvl.fyi/c/depot/+/7195)
-- [feat(tvix/eval): [FOUNDLING] VM::vm_xxx convenience methods](https://cl.tvl.fyi/c/depot/+/7196)
-- [refactor(tvix/eval): [FOUNDLING]: drop explicit `&mut vm` parameter](https://cl.tvl.fyi/c/depot/+/7197)
-
-# Thread-local storage for tvix::eval::vm::VM
-
-## The problem
-
-`Value::force()` takes a `&mut VM` argument, since forcing a value
-requires executing opcodes.  This means that `Value::nix_eq()` too
-must take a `&mut VM`, since any sensible definition of equality
-will have to force thunks.
-
-Unfortunately Rust's `PartialEq::eq()` function does not accept any
-additional arguments like this, so `Value` cannot implement
-`PartialEq`.  Worse, structs which *contain* `Value`s can't
-implement `PartialEq` either.  This means `Value`, and anything
-containing it, cannot be the key for a `BTreeMap` or `HashMap`.  We
-can't even insert `Value`s into a `HashSet`!
-
-There are other situations like this that don't involve `PartialEq`,
-but it's the most glaring one.  The main problem is that you need a
-`VM` in order to force thunks, and thunks can be anywhere in a
-`Value`.
-
-## Solving the problem with thread-locals
-
-We could avoid threading the `&mut VM` through the entire codebase
-by making it a thread-local.
-
-To do this without a performance hit, we need to use LLVM
-thread-locals, which are the same cost as references to `static`s
-but load relative to
-[`llvm.threadlocal.address`][threadlocal-intrinsic] instead of
-relative to the data segment.  Unfortunately `#[thread_local]` [is
-unstable][thread-local-unstable] and [unsafe in
-general][thread-local-unsafe] for most of the cases where we would
-want to use it.  There is one [exception][tls-const-init], however:
-if a `!thread_local()` has a `const` initializer, the compiler will
-insert a `#[thread_local]`; this special case is both safe and
-stable.
-
-The difficult decision is what the type of the thread-local should
-be.  Since you can't get a mutable reference to a `thread_local!()`
-it will have to be some interior-mutability-bestowing wrapper around
-our current `struct VM`.  Here are the choices:
-
-### `RefCell<VM>`
-
-This is the obvious first choice, since it lets you borrow a
-`RefMut<Target=VM>`.  The problem here is that we want to keep the
-codebase written such that all the functions in `impl VM` still take
-a `&mut self`.  This means that there will be an active mutable
-borrow for the duration of `VM::call_builtin()`.  So if we implement
-`PartialEq` by having `eq()` attempt a second mutable borrow from
-the thread-local storage, it will fail since there is already an
-active borrow.
-
-The problem here is that you can't "unborrow" a `RefMut` except by
-dropping it.  There's no way around this.
-
-#### Problem: Uglification
-
-The only solution here is to rewrite all the functions in `impl VM`
-so they don't take any kind of `self` argument, and then have them
-do a short-lived `.borrow_mut()` from the thread-local `RefCell`
-*separately, each time* they want to modify one of the fields of
-`VM` (currently `frames`, `stack`, `with_stack`, `warnings`).  This
-means that if you had a code sequence like this:
-
-```
-impl VM {
-  fn foo(&mut self, ...) {
-    ...
-    self.frame().ip += 1;
-    self.some_other_method();
-    self.frame().ip += 1;
-```
-
-You would need to add *two separate `borrow_mut()`s*, one for each
-of the `self.frame().ip+=1` statements.  You can't just do one big
-`borrow_mut()` because `some_other_method()` will call
-`borrow_mut()` and panic.
-
-#### Problem: Performance
-
-The `RefCell<VM>` approach also has a fairly huge performance hit,
-because every single modification to any part of `VM` will require a
-reference count increment/decrement, and a conditional branch based
-on the check (which will never fail) that the `RefCell` isn't
-already mutably borrowed.  It will also impede a lot of rustc's
-optimizations.
-
-### `Cell<VM>`
-
-This is a non-starter because it means that in order to mutate any
-field of `VM`, you have to move the entire `struct VM` out of the
-`Cell`, mutate it, and move it back in.
-
-### `Cell<Box<VM>>`
-
-Now we're getting warmer.  Here, we can move the `Box<VM>` out of
-the cell with a single pointer-sized memory access.
-
-We don't want to do the "uglification" described in the previous
-section.  We are very fortunate that, sometime in mid-2019, the Rust
-dieties [decreed by fiat][fiat-decree] that `&Cell<T>` and `&mut T`
-are bit-for-bit identical, and even gave us mortals safe wrappers
-[`from_mut()`][from_mut] and [`get_mut()`][get_mut] around
-`mem::transmute()`.
-
-So now, when a `VM` method (which takes `&mut self`) calls out to
-some external code (like a builtin), instead of passing the `&mut
-self` to the external code it can call `Cell::from_mut(&mut self)`,
-and then `Cell::swap()` that into the thread-local storage cell for
-the duration of the external code.  After the external code returns,
-it can `Cell::swap()` it back.  This whole dance gets wrapped in a
-lexical block, and the borrow checker sees that the `&Cell<Box<VM>>`
-returned by `Cell::from_mut()` lives only until the end of the
-lexical block, *so we get the `&mut self` back after the close-brace
-for that block*.  NLL FTW.  This sounds like a lot of work, but it
-should compile down to two pointer-sized loads and two pointer-sized
-stores, and it is incurred basically only for `OpBuiltin`.
-
-This all works, with only two issues:
-
-1. `vm.rs` needs to be very careful to do the thread-local cell swap
-   dance before calling anything that might call `PartialEq::eq()`
-   (or any other method that expects to be able to pull the `VM` out
-   of thread-local storage).  There is no compile-time check that we
-   did the dance in all the right places.  If we forget to do the
-   dance somewhere we'll get a runtime panic from `Option::expect()`
-   (see next section).
-
-2. Since we need to call `Cell::from_mut()` on a `Box<VM>` rather
-   than a bare `VM`, we still need to rewrite all of `vm.rs` so that
-   every function takes a `&mut Box<VM>` instead of a `&mut self`.
-   This creates a huge amount of "noise" in the code.
-
-Fortunately, it turns out that nearly all the "noise" that arises
-from the second point can be eliminated by taking advantage of
-[deref coercions][deref-coercions]!  This was the last "shoe to
-drop".
-
-There is still the issue of having to be careful about calls from
-`vm.rs` to things outside that file, but it's manageable.
-
-### `Cell<Option<Box<VM>>>`
-
-In order to get the "safe and stable `#[thread_local]`"
-[exception][tls-const-init] we need a `const` initializer, which
-means we need to be able to put something into the `Cell` that isn't
-a `VM`.  So the type needs to be `Cell<Option<Box<VM>>>`.
-
-Recall that you can't turn an `Option<&T>` into an `&Option<T>`.
-The latter type has the "is this a `Some` or `None`" bit immediately
-adjacent to the bits representing `T`.  So if I hand you a `t:&T`
-and you wrap it as `Some(t)`, those bits aren't adjacent in memory.
-This means that all the VM methods need to operate on an
-`Option<Box<VM>>` -- we can't just wrap a `Some()` around `&mut
-self` "at the last minute" before inserting it into the thread-local
-storage cell.  Fortunately deref coercions save the day here too --
-the coercion is inferred through both layers (`Box` and `Option`) of
-wrapper, so there is no additional noise in the code.
-
-Note that Rust is clever and can find some sequence of bits that
-aren't a valid `T`, so `sizeof(Option<T>)==sizeof(T)`.  And in fact,
-`Box<T>` is one of these cases (and this is guaranteed).  So the
-`Option` has no overhead.
-
-# Closing thoughts, language-level support
-
-This would have been easier with language-level support.
-
-## What wouldn't help
-
-Although it [it was decreed][fiat-decree] that `Cell<T>` and `&mut
-T` are interchangeable, a `LocalKey<Cell<T>>` isn't quite the same
-thing as a `Cell<T>`, so it wouldn't be safe for the standard
-library to contain something like this:
-
-```
-impl<T> LocalKey<Cell<T>> {
-  fn get_mut(&self) -> &mut T {
-    unsafe {
-      // ... mem::transmute() voodoo goes here ...
-```
-
-The problem here is that you can call `LocalKey<Cell<T>>::get_mut()` twice and
-end up with two `&mut T`s that point to the same thing (mutable aliasing) which
-results in undefined behavior.
-
-## What would help
-
-The ideal solution is for Rust to let you call arbitrary methods
-`T::foo(&mut self...)` on a `LocalKey<Cell<T>>`.  This way you can
-have one (and only one) `&mut T` at any syntactical point in your
-program -- the `&mut self`.
-
-
-[tls-const-init]: https://github.com/rust-lang/rust/pull/90774
-[thread-local-unstable]: https://github.com/rust-lang/rust/issues/29594
-[thread-local-unsafe-generally]: https://github.com/rust-lang/rust/issues/54366
-[fiat-decree]: https://github.com/rust-lang/rust/issues/43038
-[from_mut]: https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.from_mut
-[get_mut]: https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.get_mut
-[thread-local-unsafe]: [https://github.com/rust-lang/rust/issues/54366]
-[deref-coercions]: https://doc.rust-lang.org/book/ch15-02-deref.html#implicit-deref-coercions-with-functions-and-methods
-[threadlocal-intrinsic]: https://llvm.org/docs/LangRef.html#llvm-threadlocal-address-intrinsic
diff --git a/tvix/eval/docs/build-references.md b/tvix/eval/docs/build-references.md
deleted file mode 100644
index badcea1155..0000000000
--- a/tvix/eval/docs/build-references.md
+++ /dev/null
@@ -1,254 +0,0 @@
-Build references in derivations
-===============================
-
-This document describes how build references are calculated in Tvix. Build
-references are used to determine which store paths should be available to a
-builder during the execution of a build (i.e. the full build closure of a
-derivation).
-
-## String contexts in C++ Nix
-
-In C++ Nix, each string value in the evaluator carries an optional so-called
-"string context".
-
-These contexts are themselves a list of strings that take one of the following
-formats:
-
-1. `!<output_name>!<drv_path>`
-
-   This format describes a build reference to a specific output of a derivation.
-
-2. `=<drv_path>`
-
-   This format is used for a special case where a derivation attribute directly
-   refers to a derivation path (e.g. by accessing `.drvPath` on a derivation).
-
-   Note: In C++ Nix this case is quite special and actually requires a
-   store-database query during evaluation.
-
-3. `<path>` - a non-descript store path input, usually a plain source file (e.g.
-   from something like `src = ./.` or `src = ./foo.txt`).
-
-   In the case of `unsafeDiscardOutputDependency` this is used to pass a raw
-   derivation file, but *not* pull in its outputs.
-
-Lets introduce names for these (in the same order) to make them easier to
-reference below:
-
-```rust
-enum BuildReference {
-    /// !<output_name>!<drv_path>
-    SingleOutput(OutputName, DrvPath),
-
-    /// =<drv_path>
-    DrvClosure(DrvPath),
-
-    /// <path>
-    Path(StorePath),
-}
-```
-
-String contexts are, broadly speaking, created whenever a string is the result
-of a computation (e.g. string interpolation) that used a *computed* path or
-derivation in any way.
-
-Note: This explicitly does *not* include simply writing a literal string
-containing a store path (whether valid or not). That is only permitted through
-the `storePath` builtin.
-
-## Derivation inputs
-
-Based on the data above, the fields `inputDrvs` and `inputSrcs` of derivations
-are populated in `builtins.derivationStrict` (the function which
-`builtins.derivation`, which isn't actually a builtin, wraps).
-
-`inputDrvs` is represented by a map of derivation paths to the set of their
-outputs that were referenced by the context.
-
-TODO: What happens if the set is empty? Somebody claimed this means all outputs.
-
-`inputSrcs` is represented by a set of paths.
-
-These are populated by the above references as follows:
-
-* `SingleOutput` entries are merged into `inputDrvs`
-* `Path` entries are inserted into `inputSrcs`
-* `DrvClosure` leads to a special store computation (`computeFSClosure`), which
-  finds all paths referenced by the derivation and then inserts all of them into
-  the fields as above (derivations with _all_ their outputs)
-
-This is then serialised in the derivation and passed down the pipe.
-
-## Builtins interfacing with contexts
-
-C++ Nix has several builtins that interface directly with string contexts:
-
-* `unsafeDiscardStringContext`: throws away a string's string context (if
-  present)
-* `hasContext`: returns `true`/`false` depending on whether the string has
-  context
-* `unsafeDiscardOutputDependency`: drops dependencies on the *outputs* of a
-  `.drv` in the context, passing only the literal `.drv` itself
-
-  Note: This is only used for special test-cases in nixpkgs, and deprecated Nix
-  commands like `nix-push`.
-* `getContext`: returns the string context in serialised form as a Nix attribute
-  set
-* `appendContext`: adds a given string context to the string in the same format
-  as returned by `getContext`
-
-Most of the string manipulation operations will propagate the context to the
-result based on their parameters' contexts.
-
-## Placeholders
-
-C++ Nix has `builtins.placeholder`, which given the name of an output (e.g.
-`out`) creates a hashed string representation of that output name. If that
-string is used anywhere in input attributes, the builder will replace it with
-the actual name of the corresponding output of the current derivation.
-
-C++ Nix does not use contexts for this, it blindly creates a rewrite map of
-these placeholder strings to the names of all outputs, and runs the output
-replacement logic on all environment variables it creates, attribute files it
-passes etc.
-
-## Tvix & string contexts
-
-In the past, Tvix did not track string contexts in its evaluator at all, see
-the historical section for more information about that.
-
-Tvix tracks string contexts in every `NixString` structure via a
-`HashSet<BuildReference>` and offers an API to combine the references while
-keeping the exact internal structure of that data private.
-
-## Historical attempt: Persistent reference tracking
-
-We were investigating implementing a system which allows us to drop string
-contexts in favour of reference scanning derivation attributes.
-
-This means that instead of maintaining and passing around a string context data
-structure in eval, we maintain a data structure of *known paths* from the same
-evaluation elsewhere in Tvix, and scan each derivation attribute against this
-set of known paths when instantiating derivations.
-
-We believed we could take the stance that the system of string contexts as
-implemented in C++ Nix is likely an implementation detail that should not be
-leaking to the language surface as it does now.
-
-### Tracking "known paths"
-
-Every time a Tvix evaluation does something that causes a store interaction, a
-"known path" is created. On the language surface, this is the result of one of:
-
-1. Path literals (e.g. `src = ./.`).
-2. Calls to `builtins.derivationStrict` yielding a derivation and its output
-   paths.
-3. Calls to `builtins.path`.
-
-Whenever one of these occurs, some metadata that persists for the duration of
-one evaluation should be created in Nix. This metadata needs to be available in
-`builtins.derivationStrict`, and should be able to respond to these queries:
-
-1. What is the set of all known paths? (used for e.g. instantiating an
-   Aho-Corasick type string searcher)
-2. What is the _type_ of a path? (derivation path, derivation output, source
-   file)
-3. What are the outputs of a derivation?
-4. What is the derivation of an output?
-
-These queries will need to be asked of the metadata when populating the
-derivation fields.
-
-Note: Depending on how we implement `builtins.placeholder`, it might be useful
-to track created placeholders in this metadata, too.
-
-### Context builtins
-
-Context-reading builtins can be implemented in Tvix by adding `hasContext` and
-`getContext` with the appropriate reference-scanning logic. However, we should
-evaluate how these are used in nixpkgs and whether their uses can be removed.
-
-Context-mutating builtins can be implemented by tracking their effects in the
-value representation of Tvix, however we should consider not doing this at all.
-
-`unsafeDiscardOutputDependency` should probably never be used and we should warn
-or error on it.
-
-`unsafeDiscardStringContext` is often used as a workaround for avoiding IFD in
-inconvenient places (e.g. in the TVL depot pipeline generation). This is
-unnecessary in Tvix. We should evaluate which other uses exist, and act on them
-appropriately.
-
-The initial danger with diverging here is that we might cause derivation hash
-discrepancies between Tvix and C++ Nix, which can make initial comparisons of
-derivations generated by the two systems difficult. If this occurs we need to
-discuss how to approach it, but initially we will implement the mutating
-builtins as no-ops.
-
-### Why this did not work for us?
-
-Nix has a feature to perform environmental checks of your derivation, e.g.
-"these derivation outputs should not be referenced in this derivation", this was
-introduced in Nix 2.2 by
-https://github.com/NixOS/nix/commit/3cd15c5b1f5a8e6de87d5b7e8cc2f1326b420c88.
-
-Unfortunately, this feature introduced a very unfortunate and critical bug: all
-usage of this feature with contextful strings will actually force the
-derivation to depend at least at build time on those specific paths, see
-https://github.com/NixOS/nix/issues/4629.
-
-For example, if you wanted to `disallowedReferences` to a package and you used a
-derivation as a path, you would actually register that derivation as a input
-derivation of that derivation.
-
-This bug is still unfixed in Nix and it seems that fixing it would require
-introducing different ways to evaluate Nix derivations to preserve the
-output path calculation for Nix expressions so far.
-
-All of this would be fine if the bug behavior was uniform in the sense that no
-one tried to force-workaround it. Since Nixpkgs 23.05, due to
-https://github.com/NixOS/nixpkgs/pull/211783 this is not true anymore.
-
-If you let nixpkgs be the disjoint union of bootstrapping derivations $A$ and
-`stdenv.mkDerivation`-built derivations $B$.
-
-$A$ suffers from the bug and $B$ doesn't by the forced usage of
-`unsafeDiscardStringContext` on those special checking fields.
-
-This means that to build hash-compatible $A$ **and** $B$, we need to
-distinguish $A$ and $B$. A lot of hacks could be imagined to support this
-problem.
-
-Let's assume we have a solution to that problem, it means that we are able to
-detect implicitly when a set of specific fields are
-`unsafeDiscardStringContext`-ed.
-
-Thus, we could use that same trick to implement `unsafeDiscardStringContext`
-entirely for all fields actually.
-
-Now, to implement `unsafeDiscardStringContext` in the persistent reference
-tracking model, you will need to store a disallowed list of strings that should
-not trigger a reference when we are scanning a derivation parameters.
-
-But assume you have something like:
-
-```nix
-derivation {
-   buildInputs = [
-     stdenv.cc
-   ];
-
-   disallowedReferences = [ stdenv.cc ];
-}
-```
-
-If you unregister naively the `stdenv.cc` reference, it will silence the fact
-that it is part of the `buildInputs`, so you will observe that Nix will fail
-the derivation during environmental check, but Tvix would silently force remove
-that reference.
-
-Until proven otherwise, it seems highly difficult to have the fine-grained
-information to prevent reference tracking of those specific fields. It is not a
-failure of the persistent reference tracking, it is an unresolved critical bug
-of Nix that only nixpkgs really workarounded for `stdenv.mkDerivation`-based
-derivations.
diff --git a/tvix/eval/docs/builtins.md b/tvix/eval/docs/builtins.md
deleted file mode 100644
index dba4c48c65..0000000000
--- a/tvix/eval/docs/builtins.md
+++ /dev/null
@@ -1,138 +0,0 @@
-Nix builtins
-============
-
-Nix has a lot of built-in functions, some of which are accessible in
-the global scope, and some of which are only accessible through the
-global `builtins` attribute set.
-
-This document is an attempt to track all of these builtins, but
-without documenting their functionality.
-
-See also https://nixos.org/manual/nix/stable/expressions/builtins.html
-
-The `impl` column indicates implementation status in tvix:
-- implemented: "" (empty cell)
-- not yet implemented, but not blocked: `todo`
-- not yet implemented, but blocked by other prerequisites:
-  - `store`: awaiting eval<->store api(s)
-  - `context`: awaiting support for string contexts
-
-| name                          | global | arity | pure  | impl    |
-|-------------------------------|--------|-------|-------|---------|
-| abort                         | true   | 1     |       |         |
-| add                           | false  | 2     | true  |         |
-| addErrorContext               | false  | ?     |       | context |
-| all                           | false  | 2     | true  |         |
-| any                           | false  | 2     | true  |         |
-| appendContext                 | false  | ?     |       |         |
-| attrNames                     | false  | 1     | true  |         |
-| attrValues                    | false  |       | true  |         |
-| baseNameOf                    | true   |       |       |         |
-| bitAnd                        | false  |       |       |         |
-| bitOr                         | false  |       |       |         |
-| bitXor                        | false  |       |       |         |
-| builtins                      | true   |       |       |         |
-| catAttrs                      | false  |       |       |         |
-| compareVersions               | false  |       |       |         |
-| concatLists                   | false  |       |       |         |
-| concatMap                     | false  |       |       |         |
-| concatStringsSep              | false  |       |       |         |
-| currentSystem                 | false  |       |       |         |
-| currentTime                   | false  |       | false |         |
-| deepSeq                       | false  |       |       |         |
-| derivation                    | true   |       |       | store   |
-| derivationStrict              | true   |       |       | store   |
-| dirOf                         | true   |       |       |         |
-| div                           | false  |       |       |         |
-| elem                          | false  |       |       |         |
-| elemAt                        | false  |       |       |         |
-| false                         | true   |       |       |         |
-| fetchGit                      | true   |       |       | store   |
-| fetchMercurial                | true   |       |       | store   |
-| fetchTarball                  | true   |       |       | store   |
-| fetchurl                      | false  |       |       | store   |
-| filter                        | false  |       |       |         |
-| filterSource                  | false  |       |       | store   |
-| findFile                      | false  |       | false | todo    |
-| foldl'                        | false  |       |       |         |
-| fromJSON                      | false  |       |       |         |
-| fromTOML                      | true   |       |       |         |
-| functionArgs                  | false  |       |       |         |
-| genList                       | false  |       |       |         |
-| genericClosure                | false  |       |       | todo    |
-| getAttr                       | false  |       |       |         |
-| getContext                    | false  |       |       |         |
-| getEnv                        | false  |       | false |         |
-| hasAttr                       | false  |       |       |         |
-| hasContext                    | false  |       |       |         |
-| hashFile                      | false  |       | false |         |
-| hashString                    | false  |       |       |         |
-| head                          | false  |       |       |         |
-| import                        | true   |       |       |         |
-| intersectAttrs                | false  |       |       |         |
-| isAttrs                       | false  |       |       |         |
-| isBool                        | false  |       |       |         |
-| isFloat                       | false  |       |       |         |
-| isFunction                    | false  |       |       |         |
-| isInt                         | false  |       |       |         |
-| isList                        | false  |       |       |         |
-| isNull                        | true   |       |       |         |
-| isPath                        | false  |       |       |         |
-| isString                      | false  |       |       |         |
-| langVersion                   | false  |       |       |         |
-| length                        | false  |       |       |         |
-| lessThan                      | false  |       |       |         |
-| listToAttrs                   | false  |       |       |         |
-| map                           | true   |       |       |         |
-| mapAttrs                      | false  |       |       |         |
-| match                         | false  |       |       |         |
-| mul                           | false  |       |       |         |
-| nixPath                       | false  |       |       | todo    |
-| nixVersion                    | false  |       |       | todo    |
-| null                          | true   |       |       |         |
-| parseDrvName                  | false  |       |       |         |
-| partition                     | false  |       |       |         |
-| path                          | false  |       | sometimes | store |
-| pathExists                    | false  |       | false |         |
-| placeholder                   | true   |       |       | context |
-| readDir                       | false  |       | false |         |
-| readFile                      | false  |       | false |         |
-| removeAttrs                   | true   |       |       |         |
-| replaceStrings                | false  |       |       |         |
-| scopedImport                  | true   |       |       |         |
-| seq                           | false  |       |       |         |
-| sort                          | false  |       |       |         |
-| split                         | false  |       |       |         |
-| splitVersion                  | false  |       |       |         |
-| storeDir                      | false  |       |       | store   |
-| storePath                     | false  |       |       | store   |
-| stringLength                  | false  |       |       |         |
-| sub                           | false  |       |       |         |
-| substring                     | false  |       |       |         |
-| tail                          | false  |       |       |         |
-| throw                         | true   |       |       |         |
-| toFile                        | false  |       |       | store   |
-| toJSON                        | false  |       |       |         |
-| toPath                        | false  |       |       |         |
-| toString                      | true   |       |       |         |
-| toXML                         | true   |       |       |         |
-| trace                         | false  |       |       |         |
-| true                          | true   |       |       |         |
-| tryEval                       | false  |       |       |         |
-| typeOf                        | false  |       |       |         |
-| unsafeDiscardOutputDependency | false  |       |       |         |
-| unsafeDiscardStringContext    | false  |       |       |         |
-| unsafeGetAttrPos              | false  |       |       | todo    |
-| valueSize                     | false  |       |       | todo    |
-
-## Added after C++ Nix 2.3 (without Flakes enabled)
-
-| name          | global | arity | pure  | impl  |
-|---------------|--------|-------|-------|-------|
-| break         | false  | 1     |       | todo  |
-| ceil          | false  | 1     | true  |       |
-| fetchTree     | true   | 1     |       | todo  |
-| floor         | false  | 1     | true  |       |
-| groupBy       | false  | 2     | true  |       |
-| traceVerbose  | false  | 2     |       | todo  |
-| zipAttrsWith  | false  | 2     | true  | todo  |
diff --git a/tvix/eval/docs/catchable-errors.md b/tvix/eval/docs/catchable-errors.md
deleted file mode 100644
index ce320a9217..0000000000
--- a/tvix/eval/docs/catchable-errors.md
+++ /dev/null
@@ -1,131 +0,0 @@
-# (Possible) Implementation(s) of Catchable Errors for `builtins.tryEval`
-
-## Terminology
-
-Talking about “catchable errors” in Nix in general is a bit precarious since
-there is no properly established terminology. Also, the existing terms are less
-than apt. The reason for this lies in the fact that catchable errors (or
-whatever you want to call them) don't properly _exist_ in the language: While
-Nix's `builtins.tryEval` is (originally) based on the C++ exception system,
-it specifically lacks the ability of such systems to have an exception _value_
-whilst handling it. Consequently, these errors don't have an obvious name
-as they never appear _in_ the Nix language. They just have to be named in the
-respective Nix implementation:
-
-- In C++ Nix the only term for such errors is `AssertionError` which is the
-  name of the (C++) exception used in the implementation internally. This
-  term isn't great, though, as `AssertionError`s can not only be generated
-  using `assert`, but also using `throw` and failed `NIX_PATH` resolutions.
-  Were this terminology to be used in documentation addressing Nix language
-  users, it would probably only serve confusion.
-
-- Tvix currently (as of r/7573) uses the term catchable errors. This term
-  relates to nothing in the language as such: Errors are not caught, we rather
-  try to evaluate an expression. Catching also sort of implies that a value
-  representation of the error is attainable (like in an exception system) which
-  is untrue.
-
-In light of this I (sterni) would like to suggest “tryable errors” as an
-alternative term going forward which isn't inaccurate and relates to terms
-already established by language internal naming.
-
-However, this document will continue using the term catchable error until the
-naming is adjusted in Tvix itself.
-
-## Implementation
-
-Below we discuss different implementation approaches in Tvix in order to arrive
-at a proposal for the new one. The historical discussion is intended as a basis
-for discussing the proposal: Are we committing to an old or current mistake? Are
-we solving all problems that cropped up or were solved at any given point in
-time?
-
-### Original
-
-The original implementation of `tryEval` in cl/6924 was quite straightforward:
-It would simply interrupt the propagation of a potential catchable error to the
-top level (which usually happened using the `?` operator) in the builtin and
-construct the appropriate representation of an unsuccessful evaluation if the
-error was deemed catchable. It had, however, multiple problems:
-
-- The VM was originally written without `tryEval` in mind, i.e. it largely
-  assumed that an error would always cause execution to be terminated. This
-  problem was later solved (cl/6940).
-- Thunks could not be `tryEval`-ed multiple times (b/281). This was another
-  consequence of VM architecture at the time: Thunks would be blackholed
-  before evaluation was started and the error could occur. Due to the
-  interaction of the generator-based VM code and `Value::force` the part
-  of the code altering the thunk state would never be informed about the
-  evaluation result in case of a failure, so the thunk would remain
-  blackholed leading to a crash if the same thunk was `tryEval`-ed or
-  forced again. To solve this issue, amjoseph completely overhauled
-  the implementation.
-
-One key point about this implementation is that it is based on the assumption
-that catchable errors can only be generated in thunks, i.e. expressions causing
-them are never evaluated strictly. This can be illustrated using C++ Nix:
-
-```console
-> nix-instantiate --eval -E '[ (assert false; true) (builtins.throw "") <nixpkgs> ]'
-[ <CODE> <CODE> <CODE> ]
-```
-
-If this wasn't the case, the VM could encounter the error in a situation where
-the error would not have needed to pass through the `tryEval` builtin, causing
-evaluation to abort.
-
-### Present
-
-The current system (mostly implemented in cl/9289) uses a very different
-approach: Instead of relying on the thunk boundary, catchable errors are no
-longer errors, but special values. They are created at the relevant points (e.g.
-`builtins.throw`) and propagated whenever they are encountered by VM ops or
-builtins. Finally, they either encounter `builtins.tryEval` (and are converted to
-an ordinary value again) or the top level where they become a normal error again.
-
-The problems with this mostly stem from the confusion between values and errors
-that it necessitates:
-
-- In most circumstances, catchable errors end up being errors again, as `tryEval`
-  is not used a lot. So `throw`s usually end up causing evaluation to abort.
-  Consequently, not only `Value::Catchable` is necessary, but also a corresponding
-  error variant that is _only_ created if a catchable value remains at the end of
-  evaluation. A requirement that was missed until cl/10991 (!) which illustrate
-  how strange that architecture is. A consequence of this is that catchable
-  errors have no location information at all.
-- `Value::Catchable` is similar to other internal values in Tvix, but is much
-  more problematic. Aside from thunks, internal values only exist for a brief
-  amount of time on the stack and it is very clear what parts of the VM or
-  builtins need to handle them. This means that the rest of the implementation
-  need to consider them, keeping the complexity caused by the internal value
-  low. `Value::Catchable`, on the other hand, may exist anywhere and be passed
-  to any VM op or builtin, so it needs to be correctly propagated _everywhere_.
-  This causes a lot of noise in the code as well as a big potential for bugs.
-  Essentially, catchable errors require as much attention by the Tvix developer
-  as laziness. This doesn't really correlate to the importance of the two
-  features to the Nix language.
-
-### Future?
-
-The core assumption of the original solution does offer a path forward: After
-cl/9289 we should be in a better position to introspect an error occurring from
-within the VM code, but we need a better way of storing such an error to prevent
-another b/281. If catchable errors can only be generated in thunks, we can just
-use the thunk representation for this. This would mean that `Thunk::force_`
-would need to check if evaluation was successful and (in case of failure)
-change the thunk representation
-
-- either to the original `ThunkRepr::Suspended` which would be simple, but of
-  course mean duplicated evaluation work in some expressions. In fact, this
-  would probably leave a lot of easy performance on the table for use cases we
-  would like to support, e.g. tree walkers for nixpkgs.
-- or to a new `ThunkRepr` variant that stores the kind of the error and all
-  necessary location info so stack traces can work properly. This of course
-  reintroduces some of the difficulty of having two kinds of errors, but it is
-  hopefully less problematic, as the thunk boundary (i.e. `Thunk::force`) is
-  where errors would usually occur.
-
-Besides the question whether this proposal can actually be implemented, another
-consideration is whether the underlying assumption will hold in the future, i.e.
-can we implement optimizations for thunk elimination in a way that thunks that
-generate catchable errors are never eliminated?
diff --git a/tvix/eval/docs/known-optimisation-potential.md b/tvix/eval/docs/known-optimisation-potential.md
deleted file mode 100644
index 0ab185fe1b..0000000000
--- a/tvix/eval/docs/known-optimisation-potential.md
+++ /dev/null
@@ -1,162 +0,0 @@
-Known Optimisation Potential
-============================
-
-There are several areas of the Tvix evaluator code base where
-potentially large performance gains can be achieved through
-optimisations that we are already aware of.
-
-The shape of most optimisations is that of moving more work into the
-compiler to simplify the runtime execution of Nix code. This leads, in
-some cases, to drastically higher complexity in both the compiler
-itself and in invariants that need to be guaranteed between the
-runtime and the compiler.
-
-For this reason, and because we lack the infrastructure to adequately
-track their impact (WIP), we have not yet implemented these
-optimisations, but note the most important ones here.
-
-* Use "open upvalues" [hard]
-
-  Right now, Tvix will immediately close over all upvalues that are
-  created and clone them into the `Closure::upvalues` array.
-
-  Instead of doing this, we can statically determine most locals that
-  are closed over *and escape their scope* (similar to how the
-  `compiler::scope::Scope` struct currently tracks whether locals are
-  used at all).
-
-  If we implement the machinery to track this, we can implement some
-  upvalues at runtime by simply sticking stack indices in the upvalue
-  array and only copy the values where we know that they escape.
-
-* Avoid `with` value duplication [easy]
-
-  If a `with` makes use of a local identifier in a scope that can not
-  close before the with (e.g. not across `LambdaCtx` boundaries), we
-  can avoid the allocation of the phantom value and duplication of the
-  `NixAttrs` value on the stack. In this case we simply push the stack
-  index of the known local.
-
-* Multiple attribute selection [medium]
-
-  An instruction could be introduced that avoids repeatedly pushing an
-  attribute set to/from the stack if multiple keys are being selected
-  from it. This occurs, for example, when inheriting from an attribute
-  set or when binding function formals.
-
-* Split closure/function representation [easy]
-
-  Functions have fewer fields that need to be populated at runtime and
-  can directly use the `value::function::Lambda` representation where
-  possible.
-
-* Apply `compiler::optimise_select` to other set operations [medium]
-
-  In addition to selects, statically known attribute resolution could
-  also be used for things like `?` or `with`. The latter might be a
-  little more complicated but is worth investigating.
-
-* Inline fully applied builtins with equivalent operators [medium]
-
-  Some `builtins` have equivalent operators, e.g. `builtins.sub`
-  corresponds to the `-` operator, `builtins.hasAttr` to the `?`
-  operator etc. These operators additionally compile to a primitive
-  VM opcode, so they should be just as cheap (if not cheaper) as
-  a builtin application.
-
-  In case the compiler encounters a fully applied builtin (i.e.
-  no currying is occurring) and the `builtins` global is unshadowed,
-  it could compile the equivalent operator bytecode instead: For
-  example, `builtins.sub 20 22` would be compiled as `20 - 22`.
-  This would ensure that equivalent `builtins` can also benefit
-  from special optimisations we may implement for certain operators
-  (in the absence of currying). E.g. we could optimise access
-  to the `builtins` attribute set which a call to
-  `builtins.getAttr "foo" builtins` should also profit from.
-
-* Avoid nested `VM::run` calls [hard]
-
-  Currently when encountering Nix-native callables (thunks, closures)
-  the VM's run loop will nest and return the value of the nested call
-  frame one level up. This makes the Rust call stack almost mirror the
-  Nix call stack, which is usually undesirable.
-
-  It is possible to detect situations where this is avoidable and
-  instead set up the VM in such a way that it continues and produces
-  the desired result in the same run loop, but this is kind of tricky
-  to get right - especially while other parts are still in flux.
-
-  For details consult the commit with Gerrit change ID
-  `I96828ab6a628136e0bac1bf03555faa4e6b74ece`, in which the initial
-  attempt at doing this was reverted.
-
-* Avoid thunks if only identifier closing is required [medium]
-
-  Some constructs, like `with`, mostly do not change runtime behaviour
-  if thunked. However, they are wrapped in thunks to ensure that
-  deferred identifiers are resolved correctly.
-
-  This can be avoided, as we statically analyse the scope and should
-  be able to tell whether any such logic was required.
-
-* Intern literals [easy]
-
-  Currently, the compiler emits a separate entry in the constant
-  table for each literal.  So the program `1 + 1 + 1` will have
-  three entries in its `Chunk::constants` instead of only one.
-
-* Do some list and attribute set operations in place [hard]
-
-  Algorithms that can not do a lot of work inside `builtins` like `map`,
-  `filter` or `foldl'` usually perform terribly if they use data structures like
-  lists and attribute sets.
-
-  `builtins` can do work in place on a copy of a `Value`, but naïvely expressed
-  recursive algorithms will usually use `//` and `++` to do a single change to a
-  `Value` at a time, requiring a full copy of the data structure each time.
-  It would be a big improvement if we could do some of these operations in place
-  without requiring a new copy.
-
-  There are probably two approaches: We could determine statically if a value is
-  reachable from elsewhere and emit a special in place instruction if not. An
-  easier alternative is probably to rely on reference counting at runtime: If no
-  other reference to a value exists, we can extend the list or update the
-  attribute set in place.
-
-  An **alternative** to this is using [persistent data
-  structures](https://en.wikipedia.org/wiki/Persistent_data_structure) or at the
-  very least [immutable data structures](https://docs.rs/im/latest/im/) that can
-  be copied more efficiently than the stock structures we are using at the
-  moment.
-
-* Skip finalising unfinalised thunks or non-thunks instead of crashing [easy]
-
-  Currently `OpFinalise` crashes the VM if it is called on values that don't
-  need to be finalised. This helps catching miscompilations where `OpFinalise`
-  operates on the wrong `StackIdx`. In the case of function argument patterns,
-  however, this means extra VM stack and instruction overhead for dynamically
-  determining if finalisation is necessary or not. This wouldn't be necessary
-  if `OpFinalise` would just noop on any values that don't need to be finalised
-  (anymore).
-
-* Phantom binding for from expression of inherits [easy]
-
-  The from expression of an inherit is reevaluated for each inherit. This can
-  be demonstrated using the following Nix expression which, counter-intuitively,
-  will print “plonk” twice.
-
-  ```nix
-  let
-    inherit (builtins.trace "plonk" { a = null; b = null; }) a b;
-  in
-  builtins.seq a (builtins.seq b null)
-  ```
-
-  In most Nix code, the from expression is just an identifier, so it is not
-  terribly inefficient, but in some cases a more expensive expression may
-  be used. We should create a phantom binding for the from expression that
-  is reused in the inherits, so only a single thunk is created for the from
-  expression.
-
-  Since we discovered this, C++ Nix has implemented a similar optimization:
-  <https://github.com/NixOS/nix/pull/9847>.
diff --git a/tvix/eval/docs/language-issues.md b/tvix/eval/docs/language-issues.md
deleted file mode 100644
index 152e6594a1..0000000000
--- a/tvix/eval/docs/language-issues.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# Nix language issues
-
-In the absence of a language standard, what Nix (the language) is, is prescribed
-by the behavior of the C++ Nix implementation. Still, there are reasons not to
-accept some behavior:
-
-* Tvix aims for nixpkgs compatibility only. This means we can ignore behavior in
-  edge cases nixpkgs doesn't trigger as well as obscure features it doesn't use
-  (e.g. `__overrides`).
-* Some behavior of the Nix evaluator seems to be unintentional or an
-  implementation detail leaking out into language behavior.
-
-Especially in the latter case, it makes sense to raise the respective issue and
-maybe to get rid of the behavior in all implementations for good. Below is an
-(incomplete) list of such issues:
-
-* [Behaviour of nested attribute sets depends on definition order][i7111]
-* [Partially constructed attribute sets are observable during dynamic attr names construction][i7012]
-* [Nix parsers merges multiple attribute set literals for the same key incorrectly depending on definition order][i7115]
-
-On the other hand, there is behavior that seems to violate one's expectation
-about the language at first, but has good enough reasons from an implementor's
-perspective to keep them:
-
-* Dynamic keys are forbidden in `let` and `inherit`. This makes sure that we
-  only need to do runtime identifier lookups for `with`. More dynamic (i.e.
-  runtime) lookups would make the scoping system even more complicated as well
-  as hurt performance.
-* Dynamic attributes of `rec` sets are not added to its scope. This makes sense
-  for the same reason.
-* Dynamic and nested attributes in attribute sets don't get merged. This is a
-  tricky one, but avoids doing runtime (recursive) merges of attribute sets.
-  Instead all necessary merging can be inferred statically, i.e. the C++ Nix
-  implementation already merges at parse time, making nested attribute keys
-  syntactic sugar effectively.
-
-Other behavior is just odd, surprising or underdocumented:
-
-* `builtins.foldl'` doesn't force the initial accumulator (but all other
-  intermediate accumulator values), differing from e.g. Haskell, see
-  the [relevant PR discussion][p7158].
-
-[i7111]: https://github.com/NixOS/nix/issues/7111
-[i7012]: https://github.com/NixOS/nix/issues/7012
-[i7115]: https://github.com/NixOS/nix/issues/7115
-[p7158]: https://github.com/NixOS/nix/pull/7158
diff --git a/tvix/eval/docs/opcodes-attrsets.md b/tvix/eval/docs/opcodes-attrsets.md
deleted file mode 100644
index 7026f3319d..0000000000
--- a/tvix/eval/docs/opcodes-attrsets.md
+++ /dev/null
@@ -1,122 +0,0 @@
-# attrset-opcodes
-
-The problem with attrset literals is twofold:
-
-1. The keys of attribute sets may be dynamically evaluated.
-
-   Access:
-
-   ```nix
-   let
-     k = "foo";
-     attrs = { /* etc. */ };
-   in attrs."${k}"
-   ```
-
-   Literal:
-   ```nix
-   let
-     k = "foo";
-   in {
-     "${k}" = 42;
-   }
-   ```
-
-   The problem with this is that the attribute set key is not known at
-   compile time, and needs to be dynamically evaluated by the VM as an
-   expression.
-
-   For the most part this should be pretty simple, assuming a
-   theoretical instruction set:
-
-   ```
-   0000  OP_CONSTANT(0) # key "foo"
-   0001  OP_CONSTANT(1) # value 42
-   0002  OP_ATTR_SET(1) # construct attrset from 2 stack values
-   ```
-
-   The operation pushing the key needs to be replaced with one that
-   leaves a single value (the key) on the stack, i.e. the code for the
-   expression, e.g.:
-
-   ```
-   0000..000n <operations leaving a string value on the stack>
-   000n+1     OP_CONSTANT(1) # value 42
-   000n+2     OP_ATTR_SET(1) # construct attrset from 2 stack values
-   ```
-
-   This is fairly easy to do by simply recursing in the compiler when
-   the key expression is encountered.
-
-2. The keys of attribute sets may be nested.
-
-   This is the non-trivial part of dealing with attribute set
-   literals. Specifically, the nesting can be arbitrarily deep and the
-   AST does not guarantee that related set keys are located
-   adjacently.
-
-   Furthermore, this frequently occurs in practice in Nix. We need a
-   bytecode representation that makes it possible to construct nested
-   attribute sets at runtime.
-
-   Proposal: AttrPath values
-
-   If we can leave a value representing an attribute path on the
-   stack, we can offload the construction of nested attribute sets to
-   the `OpAttrSet` operation.
-
-   Under the hood, OpAttrSet in practice constructs a `Map<NixString,
-   Value>` attribute set in most cases. This means it expects to pop
-   the value of the key of the stack, but is otherwise free to do
-   whatever it wants with the underlying map.
-
-   In a simple example, we could have code like this:
-
-   ```nix
-   {
-     a.b = 15;
-   }
-   ```
-
-   This would be compiled to a new `OpAttrPath` instruction that
-   constructs and pushes an attribute path from a given number of
-   fragments (which are popped off the stack).
-
-   For example,
-
-   ```
-   0000 OP_CONSTANT(0)  # key "a"
-   0001 OP_CONSTANT(1)  # key "b"
-   0002 OP_ATTR_PATH(2) # construct attrpath from 2 fragments
-   0003 OP_CONSTANT(2)  # value 42
-   0004 OP_ATTRS(1)     # construct attrset from one pair
-   ```
-
-   Right before `0004` the stack would be left like this:
-
-   [ AttrPath[a,b], 42 ]
-
-   Inside of the `OP_ATTRS` instruction we could then begin
-   construction of the map and insert the nested attribute sets as
-   required, as well as validate that there are no duplicate keys.
-
-3. Both of these cases can occur simultaneously, but this is not a
-   problem as the opcodes combine perfectly fine, e.g.:
-
-   ```nix
-   let
-     k = "a";
-   in {
-     "${k}".b = 42;
-   }
-   ```
-
-   results in
-
-   ```
-   0000..000n <operations leaving a string value on the stack>
-   000n+1     OP_CONSTANT(1)  # key "b"
-   000n+2     OP_ATTR_PATH(2) # construct attrpath from 2 fragments
-   000n+3     OP_CONSTANT(2)  # value 42
-   000n+4     OP_ATTR_SET(1)  # construct attrset from 2 stack values
-   ```
diff --git a/tvix/eval/docs/recursive-attrs.md b/tvix/eval/docs/recursive-attrs.md
deleted file mode 100644
index c30cfd33e6..0000000000
--- a/tvix/eval/docs/recursive-attrs.md
+++ /dev/null
@@ -1,68 +0,0 @@
-Recursive attribute sets
-========================
-
-The construction behaviour of recursive attribute sets is very
-specific, and a bit peculiar.
-
-In essence, there are multiple "phases" of scoping that take place
-during attribute set construction:
-
-1. Every inherited value without an explicit source is inherited only
-   from the **outer** scope in which the attribute set is enclosed.
-
-2. A new scope is opened in which all recursive keys are evaluated.
-   This only considers **statically known keys**, attributes can
-   **not** recurse into dynamic keys in `self`!
-
-   For example, this code is invalid in C++ Nix:
-
-   ```
-   nix-repl> rec { ${"a"+""} = 2; b = a * 10; }
-   error: undefined variable 'a' at (string):1:26
-   ```
-
-3. Finally, a third scope is opened in which dynamic keys are
-   evaluated.
-
-This behaviour, while possibly a bit strange and unexpected, actually
-simplifies the implementation of recursive attribute sets in Tvix as
-well.
-
-Essentially, a recursive attribute set like this:
-
-```nix
-rec {
-  inherit a;
-  b = a * 10;
-  ${"c" + ""} = b * 2;
-}
-```
-
-Can be compiled like the following expression:
-
-```nix
-let
-  inherit a;
-in let
-  b = a * 10;
-  in {
-    inherit a b;
-    ${"c" + ""} = b * 2;
-  }
-```
-
-Completely deferring the resolution of recursive identifiers to the
-existing handling of recursive scopes (i.e. deferred access) in let
-bindings.
-
-In practice, we can further specialise this and compile each scope
-directly into the form expected by `OpAttrs` (that is, leaving
-attribute names on the stack) before each value's position.
-
-C++ Nix's Implementation
-------------------------
-
-* [`ExprAttrs`](https://github.com/NixOS/nix/blob/2097c30b08af19a9b42705fbc07463bea60dfb5b/src/libexpr/nixexpr.hh#L241-L268)
-  (AST representation of attribute sets)
-* [`ExprAttrs::eval`](https://github.com/NixOS/nix/blob/075bf6e5565aff9fba0ea02f3333c82adf4dccee/src/libexpr/eval.cc#L1333-L1414)
-* [`addAttr`](https://github.com/NixOS/nix/blob/master/src/libexpr/parser.y#L98-L156) (`ExprAttrs` construction in the parser)
diff --git a/tvix/eval/docs/vm-loop.md b/tvix/eval/docs/vm-loop.md
deleted file mode 100644
index 6266d34709..0000000000
--- a/tvix/eval/docs/vm-loop.md
+++ /dev/null
@@ -1,315 +0,0 @@
-tvix-eval VM loop
-=================
-
-This document describes the new tvix-eval VM execution loop implemented in the
-chain focusing around cl/8104.
-
-## Background
-
-The VM loop implemented in Tvix prior to cl/8104 had several functions:
-
-1. Advancing the instruction pointer for a chunk of Tvix bytecode and
-   executing instructions in a loop until a result was yielded.
-
-2. Tracking Nix call frames as functions/thunks were entered/exited.
-
-3. Catching trampoline requests returned from instructions to force suspended
-   thunks without increasing stack size *where possible*.
-
-4. Handling trampolines through an inner trampoline loop, switching between a
-   code execution mode and execution of subsequent trampolines.
-
-This implementation of the trampoline logic was added on to the existing VM,
-which previously always recursed for thunk forcing. There are some cases (for
-example values that need to be forced *inside* of the execution of a builtin)
-where trampolines could not previously be used, and the VM recursed anyways.
-
-As a result of this trampoline logic being added "on top" of the existing VM
-loop the code became quite difficult to understand. This led to several bugs,
-for example: b/251, b/246, b/245, and b/238.
-
-These bugs were tricky to deal with, as we had to try and make the VM do
-things that are somewhat difficult to fit into its model. We could of course
-keep extending the trampoline logic to accommodate all sorts of concepts (such
-as finalisers), but that seems like it does not solve the root problem.
-
-## New VM loop
-
-In cl/8104, a unified new solution is implemented with which the VM is capable
-of evaluating everything without increasing the call stack size.
-
-This is done by introducing a new frame stack in the VM, on which execution
-frames are enqueued that are either:
-
-1. A bytecode frame, consisting of Tvix bytecode that evaluates compiled Nix
-   code.
-2. A generator frame, consisting of some VM logic implemented in pure Rust
-   code that can be *suspended* when it hits a point where the VM would
-   previously need to recurse.
-
-We do this by making use of the `async` *keyword* in Rust, but notably
-*without* introducing asynchronous I/O or concurrency in tvix-eval (the
-complexity of which is currently undesirable for us).
-
-Specifically, when writing a Rust function that uses the `async` keyword, such
-as:
-
-```rust
-async fn some_builtin(input: Value) -> Result<Value, ErrorKind> {
-  let mut out = NixList::new();
-
-  for element in input.to_list()? {
-    let result = do_something_that_requires_the_vm(element).await;
-    out.push(result);
-  }
-
-  Ok(out)
-}
-```
-
-The compiler actually generates a state-machine under-the-hood which allows
-the execution of that function to be *suspended* whenever it hits an `await`.
-
-We use the [`genawaiter`][] crate that gives us a data structure and simple
-interface for getting instances of these state machines that can be stored in
-a struct (in our case, a *generator frame*).
-
-The execution of the VM then becomes the execution of an *outer loop*, which
-is responsible for selecting the next generator frame to execute, and two
-*inner loops*, which drive the execution of a bytecode frame or generator
-frame forward until it either yields a value or asks to be suspended in favour
-of another frame.
-
-All "communication" between frames happens solely through values left on the
-stack: Whenever a frame of either type runs to completion, it is expected to
-leave a *single* value on the stack. It follows that the whole VM, upon
-completion of the last (or initial, depending on your perspective) frame
-yields its result as the return value.
-
-The core of the VM restructuring is cl/8104, unfortunately one of the largest
-single commit changes we've had to make yet, as it touches pretty much all
-areas of tvix-eval. The introduction of the generators and the
-message/response system we built to request something from the VM, suspend a
-generator, and wait for the return is in cl/8148.
-
-The next sections describe in detail how the three different loops work.
-
-### Outer VM loop
-
-The outer VM loop is responsible for selecting the next frame to run, and
-dispatching it correctly to inner loops, as well as determining when to shut
-down the VM and return the final result.
-
-```
-                          ╭──────────────────╮
-                 ╭────────┤ match frame kind ├──────╮
-                 │        ╰──────────────────╯      │
-                 │                                  │
-    ┏━━━━━━━━━━━━┷━━━━━┓                ╭───────────┴───────────╮
-───►┃ frame_stack.pop()┃                ▼                       ▼
-    ┗━━━━━━━━━━━━━━━━━━┛       ┏━━━━━━━━━━━━━━━━┓      ┏━━━━━━━━━━━━━━━━━┓
-                 ▲             ┃ bytecode frame ┃      ┃ generator frame ┃
-                 │             ┗━━━━━━━━┯━━━━━━━┛      ┗━━━━━━━━┯━━━━━━━━┛
-                 │[yes, cont.]          │                       │
-                 │                      ▼                       ▼
-    ┏━━━━━━━━┓   │             ╔════════════════╗      ╔═════════════════╗
-◄───┨ return ┃   │             ║ inner bytecode ║      ║ inner generator ║
-    ┗━━━━━━━━┛   │             ║      loop      ║      ║      loop       ║
-        ▲        │             ╚════════╤═══════╝      ╚════════╤════════╝
-        │   ╭────┴─────╮                │                       │
-        │   │ has next │                ╰───────────┬───────────╯
-   [no] ╰───┤  frame?  │                            │
-            ╰────┬─────╯                            ▼
-                 │                         ┏━━━━━━━━━━━━━━━━━┓
-                 │                         ┃ frame completed ┃
-                 ╰─────────────────────────┨  or suspended   ┃
-                                           ┗━━━━━━━━━━━━━━━━━┛
-```
-
-Initially, the VM always pops a frame from the frame stack and then inspects
-the type of frame it found. As a consequence the next frame to execute is
-always the frame at the top of the stack, and setting up a VM initially for
-code execution is done by leaving a bytecode frame with the code to execute on
-the stack and passing control to the outer loop.
-
-Control is dispatched to either of the inner loops (depending on the type of
-frame) and the cycle continues once they return.
-
-When an inner loop returns, it has either finished its execution (and left its
-result value on the *value stack*), or its frame has requested to be
-suspended.
-
-Frames request suspension by re-enqueueing *themselves* through VM helper
-methods, and then leaving the frame they want to run *on top* of themselves in
-the frame stack before yielding control back to the outer loop.
-
-The inner control loops inform the outer loops about whether the frame has
-been *completed* or *suspended* by returning a boolean.
-
-### Inner bytecode loop
-
-The inner bytecode loop drives the execution of some Tvix bytecode by
-continously looking at the next instruction to execute, and dispatching to the
-instruction handler.
-
-```
-   ┏━━━━━━━━━━━━━┓
-◄──┨ return true ┃
-   ┗━━━━━━━━━━━━━┛
-          ▲
-     ╔════╧═════╗
-     ║ OpReturn ║
-     ╚══════════╝
-          ▲
-          ╰──┬────────────────────────────╮
-             │                            ▼
-             │                 ╔═════════════════════╗
-    ┏━━━━━━━━┷━━━━━┓           ║ execute instruction ║
-───►┃ inspect next ┃           ╚══════════╤══════════╝
-    ┃  instruction ┃                      │
-    ┗━━━━━━━━━━━━━━┛                      │
-             ▲                      ╭─────┴─────╮
-             ╰──────────────────────┤ suspends? │
-                       [no]         ╰─────┬─────╯
-                                          │
-                                          │
-   ┏━━━━━━━━━━━━━━┓                       │
-◄──┨ return false ┃───────────────────────╯
-   ┗━━━━━━━━━━━━━━┛              [yes]
-```
-
-With this refactoring, the compiler now emits a special `OpReturn` instruction
-at the end of bytecode chunks. This is a signal to the runtime that the chunk
-has completed and that its current value should be returned, without having to
-perform instruction pointer arithmetic.
-
-When `OpReturn` is encountered, the inner bytecode loop returns control to the
-outer loop and informs it (by returning `true`) that the bytecode frame has
-completed.
-
-Any other instruction may also request a suspension of the bytecode frame (for
-example, instructions that need to force a value). In this case the inner loop
-is responsible for setting up the frame stack correctly, and returning `false`
-to inform the outer loop of the suspension
-
-### Inner generator loop
-
-The inner generator loop is responsible for driving the execution of a
-generator frame by continously calling [`Gen::resume`][] until it requests a
-suspension (as a result of which control is returned to the outer loop), or
-until the generator is done and yields a value.
-
-```
-   ┏━━━━━━━━━━━━━┓
-◄──┨ return true ┃ ◄───────────────────╮
-   ┗━━━━━━━━━━━━━┛                     │
-                                       │
-                               [Done]  │
-                    ╭──────────────────┴─────────╮
-                    │ inspect generator response │◄────────────╮
-                    ╰──────────────────┬─────────╯             │
-                            [yielded]  │              ┏━━━━━━━━┷━━━━━━━━┓
-                                       │              ┃ gen.resume(msg) ┃◄──
-                                       ▼              ┗━━━━━━━━━━━━━━━━━┛
-                                 ╭────────────╮                ▲
-                                 │ same-frame │                │
-                                 │  request?  ├────────────────╯
-                                 ╰─────┬──────╯      [yes]
-   ┏━━━━━━━━━━━━━━┓                    │
-◄──┨ return false ┃ ◄──────────────────╯
-   ┗━━━━━━━━━━━━━━┛                [no]
-```
-
-On each execution of a generator frame, `resume_with` is called with a
-[`VMResponse`][] (i.e. a message *from* the VM *to* the generator). For a newly
-created generator, the initial message is just `Empty`.
-
-A generator may then respond by signaling that it has finished execution
-(`Done`), in which case the inner generator loop returns control to the outer
-loop and informs it that this generator is done (by returning `true`).
-
-A generator may also respond by signaling that it needs some data from the VM.
-This is implemented through a request-response pattern, in which the generator
-returns a `Yielded` message containing a [`VMRequest`][]. These requests can be
-very simple ("Tell me the current store path") or more complex ("Call this Nix
-function with these values").
-
-Requests are divided into two classes: Same-frame requests (requests that can be
-responded to *without* returning control to the outer loop, i.e. without
-executing a *different* frame), and multi-frame generator requests. Based on the
-type of request, the inner generator loop will either handle it right away and
-send the response in a new `resume_with` call, or return `false` to the outer
-generator loop after setting up the frame stack.
-
-Most of this logic is implemented in cl/8148.
-
-[`Gen::resume`]: https://docs.rs/genawaiter/0.99.1/genawaiter/rc/struct.Gen.html#method.resume_with
-[`VMRequest`]: https://cs.tvl.fyi/depot@2696839770c1ccb62929ff2575a633c07f5c9593/-/blob/tvix/eval/src/vm/generators.rs?L44
-[`VMResponse`]: https://cs.tvl.fyi/depot@2696839770c1ccb62929ff2575a633c07f5c9593/-/blob/tvix/eval/src/vm/generators.rs?L169
-
-## Advantages & Disadvantages of the approach
-
-This approach has several advantages:
-
-* The execution model is much simpler than before, making it fairly
-  straightforward to build up a mental model of what the VM does.
-
-* All "out of band requests" inside the VM are handled through the same
-  abstraction (generators).
-
-* Implementation is not difficult, albeit a little verbose in some cases (we
-  can argue about whether or not to introduce macros for simplifying it).
-
-* Several parts of the VM execution are now much easier to document,
-  potentially letting us onboard tvix-eval contributors faster.
-
-* The linear VM execution itself is much easier to trace now, with for example
-  the `RuntimeObserver` (and by extension `tvixbolt`) giving much clearer
-  output now.
-
-But it also comes with some disadvantages:
-
-* Even though we "only" use the `async` keyword without a full async-I/O
-  runtime, we still encounter many of the drawbacks of the fragmented Rust
-  async ecosystem.
-
-  The biggest issue with this is that parts of the standard library become
-  unavailable to us, for example the built-in `Vec::sort_by` can no longer be
-  used for sorting in Nix because our comparators themselves are `async`.
-
-  This led us to having to implement some logic on our own, as the design of
-  `async` in Rust even makes it difficult to provide usecase-generic
-  implementations of concepts like sorting.
-
-* We need to allocate quite a few new structures on the heap in order to drive
-  generators, as generators involve storing `Future` types (with unknown
-  sizes) inside of structs.
-
-  In initial testing this seems to make no significant difference in
-  performance (our performance in an actual nixpkgs-eval is still bottlenecked
-  by I/O concerns and reference scanning), but is something to keep in mind
-  later on when we start optimising more after the low-hanging fruits have
-  been reaped.
-
-## Alternatives considered
-
-1. Tacking on more functionality onto the existing VM loop
-   implementation to accomodate problems as they show up. This is not
-   preferred as the code is already getting messy.
-
-2. Making tvix-eval a fully `async` project, pulling in something like Tokio
-   or `async-std` as a runtime. This is not preferred due to the massively
-   increased complexity of those solutions, and all the known issues of fully
-   buying in to the async ecosystem.
-
-   tvix-eval fundamentally should work for use-cases besides building Nix
-   packages (e.g. for `//tvix/serde`), and its profile should be as slim as
-   possible.
-
-3. Convincing the Rust developers that Rust needs a way to guarantee
-   constant-stack-depth tail calls through something like a `tailcall`
-   keyword.
-
-4. ... ?
-
-[`genawaiter`]: https://docs.rs/genawaiter/
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index 04a0b3dd33..8fc80b2f9c 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -274,9 +274,10 @@ mod pure_builtins {
         list: Value,
     ) -> Result<Value, ErrorKind> {
         let mut separator = separator.to_contextful_str()?;
+
         let mut context = NixContext::new();
-        if let Some(sep_context) = separator.context_mut() {
-            context = context.join(sep_context);
+        if let Some(sep_context) = separator.take_context() {
+            context.extend(sep_context.into_iter())
         }
         let list = list.to_list()?;
         let mut res = BString::default();
@@ -296,13 +297,8 @@ mod pure_builtins {
             {
                 Ok(mut s) => {
                     res.push_str(&s);
-                    if let Some(ref mut other_context) = s.context_mut() {
-                        // It is safe to consume the other context here
-                        // because the `list` and `separator` are originally
-                        // moved, here.
-                        // We are not going to use them again
-                        // because the result here is a string.
-                        context = context.join(other_context);
+                    if let Some(other_context) = s.take_context() {
+                        context.extend(other_context.into_iter());
                     }
                 }
                 Err(c) => return Ok(Value::Catchable(Box::new(c))),
@@ -764,9 +760,8 @@ mod pure_builtins {
         }
 
         if let Some(origin_ctx) = origin.context_mut() {
-            // FUTUREWORK(performance): avoid this clone
-            // and extend in-place.
-            *origin_ctx = origin_ctx.clone().join(&mut ctx_elements.into());
+            origin_ctx.extend(ctx_elements)
+            // TODO: didn't we forget cases where origin had no context?
         }
 
         Ok(origin.into())
@@ -1169,8 +1164,8 @@ mod pure_builtins {
         let mut empty_string_replace = false;
         let mut context = NixContext::new();
 
-        if let Some(string_context) = string.context_mut() {
-            context = context.join(string_context);
+        if let Some(string_context) = string.take_context() {
+            context.extend(string_context.into_iter());
         }
 
         // This can't be implemented using Rust's string.replace() as
@@ -1200,8 +1195,8 @@ mod pure_builtins {
                 if string[i..i + from.len()] == *from {
                     res.push_str(&to);
                     i += from.len();
-                    if let Some(to_ctx) = to.context_mut() {
-                        context = context.join(to_ctx);
+                    if let Some(to_ctx) = to.take_context() {
+                        context.extend(to_ctx.into_iter());
                     }
 
                     // remember if we applied the empty from->to
@@ -1232,8 +1227,8 @@ mod pure_builtins {
 
             if from.is_empty() {
                 res.push_str(&to);
-                if let Some(to_ctx) = to.context_mut() {
-                    context = context.join(to_ctx);
+                if let Some(to_ctx) = to.take_context() {
+                    context.extend(to_ctx.into_iter());
                 }
                 break;
             }
@@ -1291,6 +1286,9 @@ mod pure_builtins {
                 })
                 .collect();
             ret.push_back(Value::List(NixList::from(v)));
+            if pos == text.len() {
+                break;
+            }
             pos = thematch.end();
         }
 
@@ -1504,8 +1502,19 @@ mod pure_builtins {
         }
 
         let mut buf: Vec<u8> = vec![];
-        to_xml::value_to_xml(&mut buf, &value)?;
-        Ok(String::from_utf8(buf)?.into())
+        let context = to_xml::value_to_xml(&mut buf, &value)?;
+
+        Ok((
+            buf,
+            // FUTUREWORK: We have a distinction between an empty context, and
+            // no context at all. Fix this.
+            if !context.is_empty() {
+                Some(Box::new(context))
+            } else {
+                None
+            },
+        )
+            .into())
     }
 
     #[builtin("placeholder")]
@@ -1612,10 +1621,16 @@ pub fn pure_builtins() -> Vec<(&'static str, Value)> {
         crate::systems::llvm_triple_to_nix_double(CURRENT_PLATFORM).into(),
     ));
 
-    // TODO: implement for nixpkgs compatibility
     result.push((
         "__curPos",
-        Value::from(CatchableErrorKind::UnimplementedFeature("__curPos".into())),
+        Value::Thunk(Thunk::new_suspended_native(Box::new(move || {
+            // TODO: implement for nixpkgs compatibility
+            Ok(Value::attrs(NixAttrs::from_iter([
+                ("line", 42.into()),
+                ("column", 42.into()),
+                ("file", Value::String("/deep/thought".into())),
+            ])))
+        }))),
     ));
 
     result
@@ -1623,6 +1638,8 @@ pub fn pure_builtins() -> Vec<(&'static str, Value)> {
 
 #[builtins]
 mod placeholder_builtins {
+    use crate::NixContext;
+
     use super::*;
 
     #[builtin("unsafeDiscardStringContext")]
@@ -1671,24 +1688,17 @@ mod placeholder_builtins {
             .to_contextful_str()?;
 
         // If there's any context, we will swap any ... by a path one.
-        if let Some(ctx) = v.context_mut() {
-            let new_context: tvix_eval::NixContext = ctx
-                .iter()
-                .map(|elem| match elem {
-                    // FUTUREWORK(performance): ideally, we should either:
-                    // (a) do interior mutation of the existing context.
-                    // (b) let the structural sharing make those clones cheap.
-                    crate::NixContextElement::Derivation(drv_path) => {
-                        crate::NixContextElement::Plain(drv_path.to_string())
-                    }
-                    elem => elem.clone(),
-                })
-                .collect::<HashSet<_>>()
-                .into();
+        if let Some(c) = v.take_context() {
+            let mut context = NixContext::new();
+            context.extend(c.into_iter().map(|elem| match elem {
+                crate::NixContextElement::Derivation(drv_path) => {
+                    crate::NixContextElement::Plain(drv_path.to_string())
+                }
+                elem => elem.clone(),
+            }));
 
-            *ctx = new_context;
+            return Ok(Value::String(NixString::new_context_from(context, v)));
         }
-
         Ok(Value::from(v))
     }
 
@@ -1709,6 +1719,7 @@ mod placeholder_builtins {
         _name: Value,
         _attrset: Value,
     ) -> Result<Value, ErrorKind> {
+        // TODO: implement for nixpkgs compatibility
         generators::emit_warning_kind(
             &co,
             WarningKind::NotImplemented("builtins.unsafeGetAttrsPos"),
diff --git a/tvix/eval/src/builtins/to_xml.rs b/tvix/eval/src/builtins/to_xml.rs
index bb12cebfc9..093e127fe2 100644
--- a/tvix/eval/src/builtins/to_xml.rs
+++ b/tvix/eval/src/builtins/to_xml.rs
@@ -3,112 +3,98 @@
 //! things in nixpkgs rely on.
 
 use bstr::ByteSlice;
+use std::borrow::Cow;
 use std::{io::Write, rc::Rc};
-use xml::writer::events::XmlEvent;
-use xml::writer::EmitterConfig;
-use xml::writer::EventWriter;
 
-use crate::{ErrorKind, Value};
+use crate::{ErrorKind, NixContext, NixContextElement, Value};
 
 /// Recursively serialise a value to XML. The value *must* have been
 /// deep-forced before being passed to this function.
-pub fn value_to_xml<W: Write>(mut writer: W, value: &Value) -> Result<(), ErrorKind> {
-    let config = EmitterConfig {
-        perform_indent: true,
-        pad_self_closing: true,
-
-        // Nix uses single-quotes *only* in the document declaration,
-        // so we need to write it manually.
-        write_document_declaration: false,
-        ..Default::default()
-    };
-
+/// On success, returns the NixContext.
+pub fn value_to_xml<W: Write>(mut writer: W, value: &Value) -> Result<NixContext, ErrorKind> {
     // Write a literal document declaration, using C++-Nix-style
     // single quotes.
     writeln!(writer, "<?xml version='1.0' encoding='utf-8'?>")?;
 
-    let mut writer = EventWriter::new_with_config(writer, config);
-
-    writer.write(XmlEvent::start_element("expr"))?;
-    value_variant_to_xml(&mut writer, value)?;
-    writer.write(XmlEvent::end_element())?;
+    let mut emitter = XmlEmitter::new(writer);
 
-    // Unwrap the writer to add the final newline that C++ Nix adds.
-    writeln!(writer.into_inner())?;
+    emitter.write_open_tag("expr", &[])?;
+    value_variant_to_xml(&mut emitter, value)?;
+    emitter.write_closing_tag("expr")?;
 
-    Ok(())
+    Ok(emitter.into_context())
 }
 
 fn write_typed_value<W: Write, V: ToString>(
-    w: &mut EventWriter<W>,
-    name: &str,
+    w: &mut XmlEmitter<W>,
+    name_unescaped: &str,
     value: V,
 ) -> Result<(), ErrorKind> {
-    w.write(XmlEvent::start_element(name).attr("value", &value.to_string()))?;
-    w.write(XmlEvent::end_element())?;
+    w.write_self_closing_tag(name_unescaped, &[("value", &value.to_string())])?;
     Ok(())
 }
 
-fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Result<(), ErrorKind> {
+fn value_variant_to_xml<W: Write>(w: &mut XmlEmitter<W>, value: &Value) -> Result<(), ErrorKind> {
     match value {
         Value::Thunk(t) => return value_variant_to_xml(w, &t.value()),
 
         Value::Null => {
-            w.write(XmlEvent::start_element("null"))?;
-            w.write(XmlEvent::end_element())
+            w.write_open_tag("null", &[])?;
+            w.write_closing_tag("null")?;
         }
 
         Value::Bool(b) => return write_typed_value(w, "bool", b),
         Value::Integer(i) => return write_typed_value(w, "int", i),
         Value::Float(f) => return write_typed_value(w, "float", f),
-        Value::String(s) => return write_typed_value(w, "string", s.to_str()?),
+        Value::String(s) => {
+            if let Some(context) = s.context() {
+                w.extend_context(context.iter().cloned());
+            }
+            return write_typed_value(w, "string", s.to_str()?);
+        }
         Value::Path(p) => return write_typed_value(w, "path", p.to_string_lossy()),
 
         Value::List(list) => {
-            w.write(XmlEvent::start_element("list"))?;
+            w.write_open_tag("list", &[])?;
 
             for elem in list.into_iter() {
                 value_variant_to_xml(w, elem)?;
             }
 
-            w.write(XmlEvent::end_element())
+            w.write_closing_tag("list")?;
         }
 
         Value::Attrs(attrs) => {
-            w.write(XmlEvent::start_element("attrs"))?;
+            w.write_open_tag("attrs", &[])?;
 
             for elem in attrs.iter() {
-                w.write(XmlEvent::start_element("attr").attr("name", &elem.0.to_str_lossy()))?;
+                w.write_open_tag("attr", &[("name", &elem.0.to_str_lossy())])?;
                 value_variant_to_xml(w, elem.1)?;
-                w.write(XmlEvent::end_element())?;
+                w.write_closing_tag("attr")?;
             }
 
-            w.write(XmlEvent::end_element())
+            w.write_closing_tag("attrs")?;
         }
 
         Value::Closure(c) => {
-            w.write(XmlEvent::start_element("function"))?;
+            w.write_open_tag("function", &[])?;
 
             match &c.lambda.formals {
                 Some(formals) => {
-                    let mut attrspat = XmlEvent::start_element("attrspat");
+                    let mut attrs: Vec<(&str, &str)> = Vec::with_capacity(2);
                     if formals.ellipsis {
-                        attrspat = attrspat.attr("ellipsis", "1");
+                        attrs.push(("ellipsis", "1"));
                     }
                     if let Some(ref name) = &formals.name {
-                        attrspat = attrspat.attr("name", name.as_str());
+                        attrs.push(("name", name.as_str()));
                     }
 
-                    w.write(attrspat)?;
-
+                    w.write_open_tag("attrspat", &attrs)?;
                     for arg in formals.arguments.iter() {
-                        w.write(
-                            XmlEvent::start_element("attr").attr("name", &arg.0.to_str_lossy()),
-                        )?;
-                        w.write(XmlEvent::end_element())?;
+                        w.write_self_closing_tag("attr", &[("name", &arg.0.to_str_lossy())])?;
                     }
 
-                    w.write(XmlEvent::end_element())?;
+                    w.write_closing_tag("attrspat")?;
                 }
                 None => {
                     // TODO(tazjin): tvix does not currently persist function
@@ -120,17 +106,16 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu
                     // If we don't want to persist the data, we can re-parse the
                     // AST from the spans of the lambda's bytecode and figure it
                     // out that way, but it needs some investigating.
-                    w.write(XmlEvent::start_element("varpat").attr("name", /* fake: */ "x"))?;
-                    w.write(XmlEvent::end_element())?;
+                    w.write_self_closing_tag("varpat", &[("name", /* fake: */ "x")])?;
                 }
             }
 
-            w.write(XmlEvent::end_element())
+            w.write_closing_tag("function")?;
         }
 
         Value::Builtin(_) => {
-            w.write(XmlEvent::start_element("unevaluated"))?;
-            w.write(XmlEvent::end_element())
+            w.write_open_tag("unevaluated", &[])?;
+            w.write_closing_tag("unevaluated")?;
         }
 
         Value::AttrNotFound
@@ -148,7 +133,189 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu
         Value::Catchable(_) => {
             panic!("tvix bug: value_to_xml() called on a value which had not been deep-forced")
         }
-    }?;
+    };
 
     Ok(())
 }
+
+/// A simple-stupid XML emitter, which implements only the subset needed for byte-by-byte compat with C++ nix’ `builtins.toXML`.
+struct XmlEmitter<W> {
+    /// The current indentation
+    cur_indent: usize,
+    writer: W,
+    context: NixContext,
+}
+
+impl<W: Write> XmlEmitter<W> {
+    pub fn new(writer: W) -> Self {
+        XmlEmitter {
+            cur_indent: 0,
+            writer,
+            context: Default::default(),
+        }
+    }
+
+    /// Write an open tag with the given name (which is not escaped!)
+    /// and attributes (Keys are not escaped! Only attribute values are.)
+    pub fn write_open_tag(
+        &mut self,
+        name_unescaped: &str,
+        attrs: &[(&str, &str)],
+    ) -> std::io::Result<()> {
+        self.add_indent()?;
+        self.writer.write_all(b"<")?;
+        self.writer.write_all(name_unescaped.as_bytes())?;
+        self.write_attrs_escape_vals(attrs)?;
+        self.writer.write_all(b">\n")?;
+        self.cur_indent += 2;
+        Ok(())
+    }
+
+    /// Write a self-closing open tag with the given name (which is not escaped!)
+    /// and attributes (Keys are not escaped! Only attribute values are.)
+    pub fn write_self_closing_tag(
+        &mut self,
+        name_unescaped: &str,
+        attrs: &[(&str, &str)],
+    ) -> std::io::Result<()> {
+        self.add_indent()?;
+        self.writer.write_all(b"<")?;
+        self.writer.write_all(name_unescaped.as_bytes())?;
+        self.write_attrs_escape_vals(attrs)?;
+        self.writer.write_all(b" />\n")?;
+        Ok(())
+    }
+
+    /// Write a closing tag with the given name (which is not escaped!)
+    pub fn write_closing_tag(&mut self, name_unescaped: &str) -> std::io::Result<()> {
+        self.cur_indent -= 2;
+        self.add_indent()?;
+        self.writer.write_all(b"</")?;
+        self.writer.write_all(name_unescaped.as_bytes())?;
+        self.writer.write_all(b">\n")?;
+        Ok(())
+    }
+
+    #[inline]
+    fn add_indent(&mut self) -> std::io::Result<()> {
+        self.writer.write_all(&b" ".repeat(self.cur_indent))
+    }
+
+    /// Write an attribute list
+    fn write_attrs_escape_vals(&mut self, attrs: &[(&str, &str)]) -> std::io::Result<()> {
+        for (name, val) in attrs {
+            self.writer.write_all(b" ")?;
+            self.writer.write_all(name.as_bytes())?;
+            self.writer.write_all(br#"=""#)?;
+            self.writer
+                .write_all(Self::escape_attr_value(val).as_bytes())?;
+            self.writer.write_all(b"\"")?;
+        }
+        Ok(())
+    }
+
+    /// Escape the given attribute value, making sure we only actually clone the string if we needed to replace something.
+    fn escape_attr_value(s: &str) -> Cow<str> {
+        let mut last_escape: usize = 0;
+        let mut res: Cow<str> = Cow::Borrowed("");
+        // iterating via char_indices gives us the ability to index the original string slice at character boundaries
+        for (idx, c) in s.char_indices() {
+            match Self::should_escape_char(c) {
+                None => {}
+                Some(new) => {
+                    // add characters since the last escape we did
+                    res += &s[last_escape..idx];
+                    // add the escaped value
+                    res += new;
+                    last_escape = idx + 1;
+                }
+            }
+        }
+        // we did not need to escape anything, so borrow original string
+        if last_escape == 0 {
+            Cow::Borrowed(s)
+        } else {
+            // add the remaining characters
+            res += &s[last_escape..];
+            res
+        }
+    }
+
+    fn should_escape_char(c: char) -> Option<&'static str> {
+        match c {
+            '<' => Some("&lt;"),
+            '>' => Some("&gt;"),
+            '"' => Some("&quot;"),
+            '\'' => Some("&apos;"),
+            '&' => Some("&amp;"),
+            '\n' => Some("&#xA;"),
+            '\r' => Some("&#xD;"),
+            _ => None,
+        }
+    }
+
+    /// Extends the existing context with more context elements.
+    fn extend_context<T>(&mut self, iter: T)
+    where
+        T: IntoIterator<Item = NixContextElement>,
+    {
+        self.context.extend(iter)
+    }
+
+    /// Consumes [Self] and returns the [NixContext] collected.
+    fn into_context(self) -> NixContext {
+        self.context
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use bytes::buf::Writer;
+    use pretty_assertions::assert_eq;
+
+    use crate::builtins::to_xml::XmlEmitter;
+    use std::borrow::Cow;
+
+    #[test]
+    fn xml_gen() {
+        let mut buf = Vec::new();
+        let mut x = XmlEmitter::new(&mut buf);
+        x.write_open_tag("hello", &[("hi", "it’s me"), ("no", "<escape>")])
+            .unwrap();
+        x.write_self_closing_tag("self-closing", &[("tag", "yay")])
+            .unwrap();
+        x.write_closing_tag("hello").unwrap();
+
+        assert_eq!(
+            std::str::from_utf8(&buf).unwrap(),
+            r##"<hello hi="it’s me" no="&lt;escape&gt;">
+  <self-closing tag="yay" />
+</hello>
+"##
+        );
+    }
+
+    #[test]
+    fn xml_escape() {
+        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("ab<>c&de") {
+            Cow::Owned(s) => assert_eq!(s, "ab&lt;&gt;c&amp;de".to_string(), "escape stuff"),
+            Cow::Borrowed(s) => panic!("s should be owned {}", s),
+        }
+        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("") {
+            Cow::Borrowed(s) => assert_eq!(s, "", "empty escape is borrowed"),
+            Cow::Owned(s) => panic!("s should be borrowed {}", s),
+        }
+        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!ŷbla") {
+            Cow::Borrowed(s) => assert_eq!(s, "hi!ŷbla", "no escape is borrowed"),
+            Cow::Owned(s) => panic!("s should be borrowed {}", s),
+        }
+        match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!<ŷ>bla") {
+            Cow::Owned(s) => assert_eq!(
+                s,
+                "hi!&lt;ŷ&gt;bla".to_string(),
+                "multi-byte chars are correctly used"
+            ),
+            Cow::Borrowed(s) => panic!("s should be owned {}", s),
+        }
+    }
+}
diff --git a/tvix/eval/src/errors.rs b/tvix/eval/src/errors.rs
index 652252dadf..a5f79c235f 100644
--- a/tvix/eval/src/errors.rs
+++ b/tvix/eval/src/errors.rs
@@ -10,7 +10,6 @@ use std::{fmt::Debug, fmt::Display, num::ParseIntError};
 use codemap::{File, Span};
 use codemap_diagnostic::{ColorConfig, Diagnostic, Emitter, Level, SpanLabel, SpanStyle};
 use smol_str::SmolStr;
-use xml::writer::Error as XmlError;
 
 use crate::spans::ToSpan;
 use crate::value::{CoercionKind, NixString};
@@ -194,9 +193,6 @@ pub enum ErrorKind {
     /// Invalid UTF-8 was encoutered somewhere
     Utf8,
 
-    /// Errors while serialising to XML.
-    Xml(Rc<XmlError>),
-
     /// Variant for errors that bubble up to eval from other Tvix
     /// components.
     TvixError(Rc<dyn error::Error>),
@@ -248,7 +244,6 @@ impl error::Error for Error {
                 errors.first().map(|e| e as &dyn error::Error)
             }
             ErrorKind::IO { error, .. } => Some(error.as_ref()),
-            ErrorKind::Xml(error) => Some(error.as_ref()),
             ErrorKind::TvixError(error) => Some(error.as_ref()),
             _ => None,
         }
@@ -285,12 +280,6 @@ impl From<bstr::FromUtf8Error> for ErrorKind {
     }
 }
 
-impl From<XmlError> for ErrorKind {
-    fn from(err: XmlError) -> Self {
-        Self::Xml(Rc::new(err))
-    }
-}
-
 impl From<io::Error> for ErrorKind {
     fn from(e: io::Error) -> Self {
         ErrorKind::IO {
@@ -506,8 +495,6 @@ to a missing value in the attribute set(s) included via `with`."#,
                 write!(f, "Invalid UTF-8 in string")
             }
 
-            ErrorKind::Xml(error) => write!(f, "failed to serialise to XML: {error}"),
-
             ErrorKind::TvixError(inner_error) => {
                 write!(f, "{inner_error}")
             }
@@ -823,7 +810,6 @@ impl Error {
             | ErrorKind::JsonError(_)
             | ErrorKind::NotSerialisableToJson(_)
             | ErrorKind::FromTomlError(_)
-            | ErrorKind::Xml(_)
             | ErrorKind::Utf8
             | ErrorKind::TvixError(_)
             | ErrorKind::TvixBug { .. }
@@ -870,7 +856,6 @@ impl Error {
             ErrorKind::UnexpectedArgument { .. } => "E031",
             ErrorKind::RelativePathResolution(_) => "E032",
             ErrorKind::DivisionByZero => "E033",
-            ErrorKind::Xml(_) => "E034",
             ErrorKind::FromTomlError(_) => "E035",
             ErrorKind::NotSerialisableToJson(_) => "E036",
             ErrorKind::UnexpectedContext => "E037",
diff --git a/tvix/eval/src/io.rs b/tvix/eval/src/io.rs
index f775077af8..58586030aa 100644
--- a/tvix/eval/src/io.rs
+++ b/tvix/eval/src/io.rs
@@ -16,14 +16,16 @@
 //! how store paths are opened and so on.
 
 use std::{
-    fs::File,
     io,
     path::{Path, PathBuf},
 };
 
-#[cfg(target_family = "unix")]
+#[cfg(all(target_family = "unix", feature = "impure"))]
 use std::os::unix::ffi::OsStringExt;
 
+#[cfg(feature = "impure")]
+use std::fs::File;
+
 /// Types of files as represented by `builtins.readDir` in Nix.
 #[derive(Debug)]
 pub enum FileType {
diff --git a/tvix/eval/src/lib.rs b/tvix/eval/src/lib.rs
index 845964cb7e..398da4d6e2 100644
--- a/tvix/eval/src/lib.rs
+++ b/tvix/eval/src/lib.rs
@@ -29,7 +29,7 @@ mod vm;
 mod warnings;
 
 mod nix_search_path;
-#[cfg(test)]
+#[cfg(all(test, feature = "arbitrary"))]
 mod properties;
 #[cfg(test)]
 mod test_utils;
diff --git a/tvix/eval/src/nix_search_path.rs b/tvix/eval/src/nix_search_path.rs
index 566ca12238..369c5b6857 100644
--- a/tvix/eval/src/nix_search_path.rs
+++ b/tvix/eval/src/nix_search_path.rs
@@ -197,6 +197,8 @@ mod tests {
         }
     }
 
+    // this uses StdIO, which is only available with the impure feature.
+    #[cfg(feature = "impure")]
     mod resolve {
         use crate::StdIO;
         use path_clean::PathClean;
diff --git a/tvix/eval/src/tests/mod.rs b/tvix/eval/src/tests/mod.rs
index 5a7708e298..21b5d35e6a 100644
--- a/tvix/eval/src/tests/mod.rs
+++ b/tvix/eval/src/tests/mod.rs
@@ -1,203 +1,6 @@
-use crate::{value::Value, EvalIO};
-use builtin_macros::builtins;
-use pretty_assertions::assert_eq;
-use rstest::rstest;
-use std::path::PathBuf;
-
 /// Module for one-off tests which do not follow the rest of the
 /// test layout.
 mod one_offs;
 
-#[builtins]
-mod mock_builtins {
-    //! Builtins which are required by language tests, but should not
-    //! actually exist in //tvix/eval.
-    use crate as tvix_eval;
-    use crate::generators::GenCo;
-    use crate::*;
-    use genawaiter::rc::Gen;
-
-    #[builtin("derivation")]
-    async fn builtin_derivation(co: GenCo, input: Value) -> Result<Value, ErrorKind> {
-        let input = input.to_attrs()?;
-        let attrs = input.update(NixAttrs::from_iter(
-            [
-                (
-                    "outPath",
-                    "/nix/store/00000000000000000000000000000000-mock",
-                ),
-                (
-                    "drvPath",
-                    "/nix/store/00000000000000000000000000000000-mock.drv",
-                ),
-                ("type", "derivation"),
-            ]
-            .into_iter(),
-        ));
-
-        Ok(Value::Attrs(Box::new(attrs)))
-    }
-}
-
-fn eval_test(code_path: PathBuf, expect_success: bool) {
-    std::env::set_var("TEST_VAR", "foo"); // for eval-okay-getenv.nix
-
-    eprintln!("path: {}", code_path.display());
-    assert_eq!(
-        code_path.extension().unwrap(),
-        "nix",
-        "test files always end in .nix"
-    );
-
-    let code = std::fs::read_to_string(&code_path).expect("should be able to read test code");
-
-    let mut eval = crate::Evaluation::new_impure();
-    eval.strict = true;
-    eval.builtins.extend(mock_builtins::builtins());
-
-    let result = eval.evaluate(code, Some(code_path.clone()));
-    let failed = match result.value {
-        Some(Value::Catchable(_)) => true,
-        _ => !result.errors.is_empty(),
-    };
-    if expect_success && failed {
-        panic!(
-            "{}: evaluation of eval-okay test should succeed, but failed with {:?}",
-            code_path.display(),
-            result.errors,
-        );
-    }
-
-    if !expect_success && failed {
-        return;
-    }
-    // !expect_success can also mean the output differs, so don't panic if the
-    // evaluation didn't fail.
-
-    let value = result.value.unwrap();
-    let result_str = value.to_string();
-
-    let exp_path = code_path.with_extension("exp");
-    if exp_path.exists() {
-        // If there's an .exp file provided alongside, compare it with the
-        // output of the NixValue .to_string() method.
-        let exp_str = std::fs::read_to_string(&exp_path).expect("unable to read .exp file");
-
-        if expect_success {
-            assert_eq!(
-                result_str,
-                exp_str.trim(),
-                "{}: result value representation (left) must match expectation (right)",
-                code_path.display()
-            );
-        } else {
-            assert_ne!(
-                result_str,
-                exp_str.trim(),
-                "{}: test passed unexpectedly!  consider moving it out of notyetpassing",
-                code_path.display()
-            );
-
-            // Early return here, we don't compare .xml outputs if this is a !
-            // expect_success test.
-            return;
-        }
-    }
-
-    let exp_xml_path = code_path.with_extension("exp.xml");
-    if exp_xml_path.exists() {
-        // If there's an XML file provided alongside, compare it with the
-        // output produced when serializing the Value as XML.
-        let exp_xml_str = std::fs::read_to_string(exp_xml_path).expect("unable to read .xml file");
-
-        let mut xml_actual_buf = Vec::new();
-        crate::builtins::value_to_xml(&mut xml_actual_buf, &value).expect("value_to_xml failed");
-
-        assert_eq!(
-            String::from_utf8(xml_actual_buf).expect("to_xml produced invalid utf-8"),
-            exp_xml_str,
-            "{}: result value representation (left) must match expectation (right)",
-            code_path.display()
-        );
-    }
-}
-
-// identity-* tests contain Nix code snippets which should evaluate to
-// themselves exactly (i.e. literals).
-#[rstest]
-fn identity(#[files("src/tests/tvix_tests/identity-*.nix")] code_path: PathBuf) {
-    let code = std::fs::read_to_string(code_path).expect("should be able to read test code");
-
-    let mut eval = crate::Evaluation::new(Box::new(crate::StdIO) as Box<dyn EvalIO>, false);
-    eval.strict = true;
-
-    let result = eval.evaluate(&code, None);
-    assert!(
-        result.errors.is_empty(),
-        "evaluation of identity test failed: {:?}",
-        result.errors
-    );
-
-    let result_str = result.value.unwrap().to_string();
-
-    assert_eq!(
-        result_str,
-        code.trim(),
-        "result value representation (left) must match expectation (right)"
-    )
-}
-
-// eval-okay-* tests contain a snippet of Nix code, and an expectation
-// of the produced string output of the evaluator.
-//
-// These evaluations are always supposed to succeed, i.e. all snippets
-// are guaranteed to be valid Nix code.
-#[rstest]
-fn eval_okay(#[files("src/tests/tvix_tests/eval-okay-*.nix")] code_path: PathBuf) {
-    eval_test(code_path, true)
-}
-
-// eval-okay-* tests from the original Nix test suite.
-#[cfg(feature = "nix_tests")]
-#[rstest]
-fn nix_eval_okay(#[files("src/tests/nix_tests/eval-okay-*.nix")] code_path: PathBuf) {
-    eval_test(code_path, true)
-}
-
-// eval-okay-* tests from the original Nix test suite which do not yet pass for tvix
-//
-// Eventually there will be none of these left, and this function
-// will disappear :)
-//
-// Please don't submit failing tests unless they're in
-// notyetpassing; this makes the test suite much more useful for
-// regression testing, since there should always be zero non-ignored
-// failing tests.
-#[rstest]
-fn nix_eval_okay_currently_failing(
-    #[files("src/tests/nix_tests/notyetpassing/eval-okay-*.nix")] code_path: PathBuf,
-) {
-    eval_test(code_path, false)
-}
-
-#[rstest]
-fn eval_okay_currently_failing(
-    #[files("src/tests/tvix_tests/notyetpassing/eval-okay-*.nix")] code_path: PathBuf,
-) {
-    eval_test(code_path, false)
-}
-
-// eval-fail-* tests contain a snippet of Nix code, which is
-// expected to fail evaluation.  The exact type of failure
-// (assertion, parse error, etc) is not currently checked.
-#[rstest]
-fn eval_fail(#[files("src/tests/tvix_tests/eval-fail-*.nix")] code_path: PathBuf) {
-    eval_test(code_path, false)
-}
-
-// eval-fail-* tests from the original Nix test suite.
 #[cfg(feature = "nix_tests")]
-#[rstest]
-fn nix_eval_fail(#[files("src/tests/nix_tests/eval-fail-*.nix")] code_path: PathBuf) {
-    eval_test(code_path, false)
-}
+mod nix_tests;
diff --git a/tvix/eval/src/tests/nix_tests.rs b/tvix/eval/src/tests/nix_tests.rs
new file mode 100644
index 0000000000..17968e4bdb
--- /dev/null
+++ b/tvix/eval/src/tests/nix_tests.rs
@@ -0,0 +1,207 @@
+use crate::value::Value;
+use builtin_macros::builtins;
+use pretty_assertions::assert_eq;
+use rstest::rstest;
+use std::path::PathBuf;
+
+#[builtins]
+mod mock_builtins {
+    //! Builtins which are required by language tests, but should not
+    //! actually exist in //tvix/eval.
+    use crate as tvix_eval;
+    use crate::generators::GenCo;
+    use crate::*;
+    use genawaiter::rc::Gen;
+
+    #[builtin("derivation")]
+    async fn builtin_derivation(co: GenCo, input: Value) -> Result<Value, ErrorKind> {
+        let input = input.to_attrs()?;
+        let attrs = input.update(NixAttrs::from_iter(
+            [
+                (
+                    "outPath",
+                    "/nix/store/00000000000000000000000000000000-mock",
+                ),
+                (
+                    "drvPath",
+                    "/nix/store/00000000000000000000000000000000-mock.drv",
+                ),
+                ("type", "derivation"),
+            ]
+            .into_iter(),
+        ));
+
+        Ok(Value::Attrs(Box::new(attrs)))
+    }
+}
+
+#[cfg(feature = "impure")]
+fn eval_test(code_path: PathBuf, expect_success: bool) {
+    std::env::set_var("TEST_VAR", "foo"); // for eval-okay-getenv.nix
+
+    eprintln!("path: {}", code_path.display());
+    assert_eq!(
+        code_path.extension().unwrap(),
+        "nix",
+        "test files always end in .nix"
+    );
+
+    let code = std::fs::read_to_string(&code_path).expect("should be able to read test code");
+
+    let mut eval = crate::Evaluation::new_impure();
+    eval.strict = true;
+    eval.builtins.extend(mock_builtins::builtins());
+
+    let result = eval.evaluate(code, Some(code_path.clone()));
+    let failed = match result.value {
+        Some(Value::Catchable(_)) => true,
+        _ => !result.errors.is_empty(),
+    };
+    if expect_success && failed {
+        panic!(
+            "{}: evaluation of eval-okay test should succeed, but failed with {:?}",
+            code_path.display(),
+            result.errors,
+        );
+    }
+
+    if !expect_success && failed {
+        return;
+    }
+    // !expect_success can also mean the output differs, so don't panic if the
+    // evaluation didn't fail.
+
+    let value = result.value.unwrap();
+    let result_str = value.to_string();
+
+    let exp_path = code_path.with_extension("exp");
+    if exp_path.exists() {
+        // If there's an .exp file provided alongside, compare it with the
+        // output of the NixValue .to_string() method.
+        let exp_str = std::fs::read_to_string(&exp_path).expect("unable to read .exp file");
+
+        if expect_success {
+            assert_eq!(
+                result_str,
+                exp_str.trim(),
+                "{}: result value representation (left) must match expectation (right)",
+                code_path.display()
+            );
+        } else {
+            assert_ne!(
+                result_str,
+                exp_str.trim(),
+                "{}: test passed unexpectedly!  consider moving it out of notyetpassing",
+                code_path.display()
+            );
+
+            // Early return here, we don't compare .xml outputs if this is a !
+            // expect_success test.
+            return;
+        }
+    }
+
+    let exp_xml_path = code_path.with_extension("exp.xml");
+    if exp_xml_path.exists() {
+        // If there's an XML file provided alongside, compare it with the
+        // output produced when serializing the Value as XML.
+        let exp_xml_str = std::fs::read_to_string(exp_xml_path).expect("unable to read .xml file");
+
+        let mut xml_actual_buf = Vec::new();
+        crate::builtins::value_to_xml(&mut xml_actual_buf, &value).expect("value_to_xml failed");
+
+        assert_eq!(
+            String::from_utf8(xml_actual_buf).expect("to_xml produced invalid utf-8"),
+            exp_xml_str,
+            "{}: result value representation (left) must match expectation (right)",
+            code_path.display()
+        );
+    }
+}
+
+// identity-* tests contain Nix code snippets which should evaluate to
+// themselves exactly (i.e. literals).
+#[cfg(feature = "impure")]
+#[rstest]
+fn identity(#[files("src/tests/tvix_tests/identity-*.nix")] code_path: PathBuf) {
+    use crate::EvalIO;
+
+    let code = std::fs::read_to_string(code_path).expect("should be able to read test code");
+
+    let mut eval = crate::Evaluation::new(Box::new(crate::StdIO) as Box<dyn EvalIO>, false);
+    eval.strict = true;
+
+    let result = eval.evaluate(&code, None);
+    assert!(
+        result.errors.is_empty(),
+        "evaluation of identity test failed: {:?}",
+        result.errors
+    );
+
+    let result_str = result.value.unwrap().to_string();
+
+    assert_eq!(
+        result_str,
+        code.trim(),
+        "result value representation (left) must match expectation (right)"
+    )
+}
+
+// eval-okay-* tests contain a snippet of Nix code, and an expectation
+// of the produced string output of the evaluator.
+//
+// These evaluations are always supposed to succeed, i.e. all snippets
+// are guaranteed to be valid Nix code.
+#[cfg(feature = "impure")]
+#[rstest]
+fn eval_okay(#[files("src/tests/tvix_tests/eval-okay-*.nix")] code_path: PathBuf) {
+    eval_test(code_path, true)
+}
+
+// eval-okay-* tests from the original Nix test suite.
+#[cfg(feature = "impure")]
+#[rstest]
+fn nix_eval_okay(#[files("src/tests/nix_tests/eval-okay-*.nix")] code_path: PathBuf) {
+    eval_test(code_path, true)
+}
+
+// eval-okay-* tests from the original Nix test suite which do not yet pass for tvix
+//
+// Eventually there will be none of these left, and this function
+// will disappear :)
+//
+// Please don't submit failing tests unless they're in
+// notyetpassing; this makes the test suite much more useful for
+// regression testing, since there should always be zero non-ignored
+// failing tests.
+#[cfg(feature = "impure")]
+#[rstest]
+fn nix_eval_okay_currently_failing(
+    #[files("src/tests/nix_tests/notyetpassing/eval-okay-*.nix")] code_path: PathBuf,
+) {
+    eval_test(code_path, false)
+}
+
+#[cfg(feature = "impure")]
+#[rstest]
+fn eval_okay_currently_failing(
+    #[files("src/tests/tvix_tests/notyetpassing/eval-okay-*.nix")] code_path: PathBuf,
+) {
+    eval_test(code_path, false)
+}
+
+// eval-fail-* tests contain a snippet of Nix code, which is
+// expected to fail evaluation.  The exact type of failure
+// (assertion, parse error, etc) is not currently checked.
+#[cfg(feature = "impure")]
+#[rstest]
+fn eval_fail(#[files("src/tests/tvix_tests/eval-fail-*.nix")] code_path: PathBuf) {
+    eval_test(code_path, false)
+}
+
+// eval-fail-* tests from the original Nix test suite.
+#[cfg(feature = "impure")]
+#[rstest]
+fn nix_eval_fail(#[files("src/tests/nix_tests/eval-fail-*.nix")] code_path: PathBuf) {
+    eval_test(code_path, false)
+}
diff --git a/tvix/eval/src/tests/one_offs.rs b/tvix/eval/src/tests/one_offs.rs
index 565d1dd48f..21e9144baf 100644
--- a/tvix/eval/src/tests/one_offs.rs
+++ b/tvix/eval/src/tests/one_offs.rs
@@ -5,7 +5,7 @@ fn test_source_builtin() {
     // Test an evaluation with a source-only builtin. The test ensures
     // that the artificially constructed thunking is correct.
 
-    let mut eval = Evaluation::new_impure();
+    let mut eval = Evaluation::new_pure();
     eval.src_builtins.push(("testSourceBuiltin", "42"));
 
     let result = eval.evaluate("builtins.testSourceBuiltin", None);
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.exp
new file mode 100644
index 0000000000..eb2117a0ce
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.exp
@@ -0,0 +1 @@
+[ [ "" [ "a" ] "c" ] [ "" [ "a" ] "b" [ "c" ] "" ] [ "" [ "a" null ] "b" [ null "c" ] "" ] [ " " [ "FOO" ] " " ] [ "" [ "abc" ] "" [ "" ] "" ] [ "" [ "abc" ] "" [ "" ] "" ] [ "" [ ] "" ] ]
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.nix
new file mode 100644
index 0000000000..95305040dc
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.nix
@@ -0,0 +1,10 @@
+[
+  (builtins.split "(a)b" "abc")
+  (builtins.split "([ac])" "abc")
+  (builtins.split "(a)|(c)" "abc")
+  (builtins.split "([[:upper:]]+)" " FOO ")
+
+  (builtins.split "(.*)" "abc")
+  (builtins.split "([abc]*)" "abc")
+  (builtins.split ".*" "")
+]
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.exp.xml b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.exp.xml
new file mode 100644
index 0000000000..468972b2f8
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.exp.xml
@@ -0,0 +1,5 @@
+<?xml version='1.0' encoding='utf-8'?>
+<expr>
+  <attrs>
+  </attrs>
+</expr>
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.nix
new file mode 100644
index 0000000000..ffcd4415b0
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.nix
@@ -0,0 +1 @@
+{ }
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.exp
new file mode 100644
index 0000000000..9ae16de526
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.exp
@@ -0,0 +1 @@
+"<?xml version='1.0' encoding='utf-8'?>\n<expr>\n  <attrs>\n    <attr name=\"&amp;-{\">\n      <string value=\";&amp;&quot;\" />\n    </attr>\n    <attr name=\"a\">\n      <string value=\"s\" />\n    </attr>\n  </attrs>\n</expr>\n"
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.nix
new file mode 100644
index 0000000000..7d074048dd
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.nix
@@ -0,0 +1,2 @@
+# Check some corner cases regarding escaping.
+builtins.toXML { a = "s"; "&-{" = ";&\""; }
diff --git a/tvix/eval/src/value/json.rs b/tvix/eval/src/value/json.rs
index c48e9c1f4e..24a6bcaf6f 100644
--- a/tvix/eval/src/value/json.rs
+++ b/tvix/eval/src/value/json.rs
@@ -47,8 +47,8 @@ impl Value {
 
                 for val in l.into_iter() {
                     match generators::request_to_json(co, val).await {
-                        Ok((v, mut ctx)) => {
-                            context = context.join(&mut ctx);
+                        Ok((v, ctx)) => {
+                            context.extend(ctx.into_iter());
                             out.push(v)
                         }
                         Err(cek) => return Ok(Err(cek)),
@@ -100,8 +100,8 @@ impl Value {
                     out.insert(
                         name.to_str()?.to_owned(),
                         match generators::request_to_json(co, value).await {
-                            Ok((v, mut ctx)) => {
-                                context = context.join(&mut ctx);
+                            Ok((v, ctx)) => {
+                                context.extend(ctx.into_iter());
                                 v
                             }
                             Err(cek) => return Ok(Err(cek)),
diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs
index c171c9a04e..dfad0cd839 100644
--- a/tvix/eval/src/value/mod.rs
+++ b/tvix/eval/src/value/mod.rs
@@ -338,8 +338,8 @@ impl Value {
             let coerced: Result<BString, _> = match (value, kind) {
                 // coercions that are always done
                 (Value::String(mut s), _) => {
-                    if let Some(ctx) = s.context_mut() {
-                        context = context.join(ctx);
+                    if let Some(ctx) = s.take_context() {
+                        context.extend(ctx.into_iter());
                     }
                     Ok((*s).into())
                 }
diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs
index ceb43f1ea5..4f869eb00d 100644
--- a/tvix/eval/src/value/string.rs
+++ b/tvix/eval/src/value/string.rs
@@ -54,6 +54,12 @@ impl From<HashSet<NixContextElement>> for NixContext {
     }
 }
 
+impl<const N: usize> From<[NixContextElement; N]> for NixContext {
+    fn from(value: [NixContextElement; N]) -> Self {
+        Self(HashSet::from(value))
+    }
+}
+
 impl NixContext {
     /// Creates an empty context that can be populated
     /// and passed to form a contextful [NixString], albeit
@@ -78,20 +84,19 @@ impl NixContext {
         self
     }
 
-    /// Consumes both ends of the join into a new NixContent
-    /// containing the union of elements of both ends.
-    pub fn join(mut self, other: &mut NixContext) -> Self {
-        let other_set = std::mem::take(&mut other.0);
-        let mut set: HashSet<NixContextElement> = std::mem::take(&mut self.0);
-        set.extend(other_set);
-        Self(set)
+    /// Extends the existing context with more context elements.
+    pub fn extend<T>(&mut self, iter: T)
+    where
+        T: IntoIterator<Item = NixContextElement>,
+    {
+        self.0.extend(iter)
     }
 
     /// Copies from another [NixString] its context strings
     /// in this context.
     pub fn mimic(&mut self, other: &NixString) {
         if let Some(context) = other.context() {
-            self.0.extend(context.iter().cloned());
+            self.extend(context.iter().cloned());
         }
     }
 
@@ -154,6 +159,16 @@ impl NixContext {
     }
 }
 
+impl IntoIterator for NixContext {
+    type Item = NixContextElement;
+
+    type IntoIter = std::collections::hash_set::IntoIter<NixContextElement>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
+
 /// This type is never instantiated, but serves to document the memory layout of the actual heap
 /// allocation for Nix strings.
 #[allow(dead_code)]
@@ -616,6 +631,11 @@ mod arbitrary {
 
 impl NixString {
     fn new(contents: &[u8], context: Option<Box<NixContext>>) -> Self {
+        debug_assert!(
+            !context.as_deref().is_some_and(NixContext::is_empty),
+            "BUG: initialized with empty context"
+        );
+
         // SAFETY: We're always fully initializing a NixString here:
         //
         // 1. NixStringInner::alloc sets up the len for us
@@ -707,8 +727,10 @@ impl NixString {
         let context = [self.context(), other.context()]
             .into_iter()
             .flatten()
-            .fold(NixContext::new(), |acc_ctx, new_ctx| {
-                acc_ctx.join(&mut new_ctx.clone())
+            .fold(NixContext::new(), |mut acc_ctx, new_ctx| {
+                // TODO: consume new_ctx?
+                acc_ctx.extend(new_ctx.iter().cloned());
+                acc_ctx
             });
         Self::new_context_from(context, s)
     }
@@ -719,16 +741,30 @@ impl NixString {
         //
         // Also, we're using the same lifetime and mutability as self, to fit the
         // pointer-to-reference conversion rules
-        unsafe { NixStringInner::context_ref(self.0).as_deref() }
+        let context = unsafe { NixStringInner::context_ref(self.0).as_deref() };
+
+        debug_assert!(
+            !context.is_some_and(NixContext::is_empty),
+            "BUG: empty context"
+        );
+
+        context
     }
 
-    pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> {
+    pub(crate) fn context_mut(&mut self) -> &mut Option<Box<NixContext>> {
         // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY
         // comment in `new`).
         //
         // Also, we're using the same lifetime and mutability as self, to fit the
         // pointer-to-reference conversion rules
-        unsafe { NixStringInner::context_mut(self.0).as_deref_mut() }
+        let context = unsafe { NixStringInner::context_mut(self.0) };
+
+        debug_assert!(
+            !context.as_deref().is_some_and(NixContext::is_empty),
+            "BUG: empty context"
+        );
+
+        context
     }
 
     pub fn iter_context(&self) -> impl Iterator<Item = &NixContext> {
@@ -756,12 +792,16 @@ impl NixString {
         self.context().is_some()
     }
 
+    /// This clears the context of the string, returning
+    /// the removed dependency tracking information.
+    pub fn take_context(&mut self) -> Option<Box<NixContext>> {
+        self.context_mut().take()
+    }
+
     /// This clears the context of that string, losing
     /// all dependency tracking information.
     pub fn clear_context(&mut self) {
-        // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY
-        // comment in `new`).
-        *unsafe { NixStringInner::context_mut(self.0) } = None;
+        let _ = self.take_context();
     }
 
     pub fn chars(&self) -> Chars<'_> {
@@ -849,7 +889,7 @@ impl Display for NixString {
     }
 }
 
-#[cfg(test)]
+#[cfg(all(test, feature = "arbitrary"))]
 mod tests {
     use test_strategy::proptest;
 
diff --git a/tvix/eval/src/vm/generators.rs b/tvix/eval/src/vm/generators.rs
index 79de688692..dbf7703bf0 100644
--- a/tvix/eval/src/vm/generators.rs
+++ b/tvix/eval/src/vm/generators.rs
@@ -745,6 +745,7 @@ pub async fn request_open_file(co: &GenCo, path: PathBuf) -> Box<dyn std::io::Re
     }
 }
 
+#[cfg_attr(not(feature = "impure"), allow(unused))]
 pub(crate) async fn request_path_exists(co: &GenCo, path: PathBuf) -> Value {
     match co.yield_(VMRequest::PathExists(path)).await {
         VMResponse::Value(value) => value,
@@ -755,6 +756,7 @@ pub(crate) async fn request_path_exists(co: &GenCo, path: PathBuf) -> Value {
     }
 }
 
+#[cfg_attr(not(feature = "impure"), allow(unused))]
 pub(crate) async fn request_read_dir(co: &GenCo, path: PathBuf) -> Vec<(bytes::Bytes, FileType)> {
     match co.yield_(VMRequest::ReadDir(path)).await {
         VMResponse::Directory(dir) => dir,
diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs
index 5c244cc3ca..1fa9eba4bc 100644
--- a/tvix/eval/src/vm/mod.rs
+++ b/tvix/eval/src/vm/mod.rs
@@ -994,8 +994,8 @@ where
             }
             let mut nix_string = val.to_contextful_str().with_span(frame, self)?;
             out.push_str(nix_string.as_bstr());
-            if let Some(nix_string_ctx) = nix_string.context_mut() {
-                context = context.join(nix_string_ctx);
+            if let Some(nix_string_ctx) = nix_string.take_context() {
+                context.extend(nix_string_ctx.into_iter())
             }
         }
 
diff --git a/tvix/eval/tests/nix_oracle.rs b/tvix/eval/tests/nix_oracle.rs
index 5a5cc0a822..3d3abc7363 100644
--- a/tvix/eval/tests/nix_oracle.rs
+++ b/tvix/eval/tests/nix_oracle.rs
@@ -56,6 +56,7 @@ fn nix_eval(expr: &str, strictness: Strictness) -> String {
 /// `NIX_INSTANTIATE_BINARY_PATH` env var to resolve the `nix-instantiate` binary) and tvix, and
 /// assert that the result is identical
 #[track_caller]
+#[cfg(feature = "impure")]
 fn compare_eval(expr: &str, strictness: Strictness) {
     let nix_result = nix_eval(expr, strictness);
     let mut eval = tvix_eval::Evaluation::new_pure();
@@ -76,6 +77,7 @@ fn compare_eval(expr: &str, strictness: Strictness) {
 macro_rules! compare_eval_tests {
     ($strictness:expr, {}) => {};
     ($strictness:expr, {$(#[$meta:meta])* $test_name: ident($expr: expr); $($rest:tt)*}) => {
+        #[cfg(feature = "impure")]
         #[test]
         $(#[$meta])*
         fn $test_name() {