From 025c67bf4d5666411b4d6cdc929e1a677ebc0439 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Tue, 14 Feb 2023 15:02:39 +0300 Subject: refactor(tvix/eval): flatten call stack of VM using generators Warning: This is probably the biggest refactor in tvix-eval history, so far. This replaces all instances of trampolines and recursion during evaluation of the VM loop with generators. A generator is an asynchronous function that can be suspended to yield a message (in our case, vm::generators::GeneratorRequest) and receive a response (vm::generators::GeneratorResponsee). The `genawaiter` crate provides an interpreter for generators that can drive their execution and lets us move control flow between the VM and suspended generators. To do this, massive changes have occured basically everywhere in the code. On a high-level: 1. The VM is now organised around a frame stack. A frame is either a call frame (execution of Tvix bytecode) or a generator frame (a running or suspended generator). The VM has an outer loop that pops a frame off the frame stack, and then enters an inner loop either driving the execution of the bytecode or the execution of a generator. Both types of frames have several branches that can result in the frame re-enqueuing itself, and enqueuing some other work (in the form of a different frame) on top of itself. The VM will eventually resume the frame when everything "above" it has been suspended. In this way, the VM's new frame stack takes over much of the work that was previously achieved by recursion. 2. All methods previously taking a VM have been refactored into async functions that instead emit/receive generator messages for communication with the VM. Notably, this includes *all* builtins. This has had some other effects: - Some test have been removed or commented out, either because they tested code that was mostly already dead (nix_eq) or because they now require generator scaffolding which we do not have in place for tests (yet). - Because generator functions are technically async (though no async IO is involved), we lose the ability to use much of the Rust standard library e.g. in builtins. This has led to many algorithms being unrolled into iterative versions instead of iterator combinations, and things like sorting had to be implemented from scratch. - Many call sites that previously saw a `Result<..., ErrorKind>` bubble up now only see the result value, as the error handling is encapsulated within the generator loop. This reduces number of places inside of builtin implementations where error context can be attached to calls that can fail. Currently what we gain in this tradeoff is significantly more detailed span information (which we still need to bubble up, this commit does not change the error display). We'll need to do some analysis later of how useful the errors turn out to be and potentially introduce some methods for attaching context to a generator frame again. This change is very difficult to do in stages, as it is very much an "all or nothing" change that affects huge parts of the codebase. I've tried to isolate changes that can be isolated into the parent CLs of this one, but this change is still quite difficult to wrap one's mind and I'm available to discuss it and explain things to any reviewer. Fixes: b/238, b/237, b/251 and potentially others. Change-Id: I39244163ff5bbecd169fe7b274df19262b515699 Reviewed-on: https://cl.tvl.fyi/c/depot/+/8104 Reviewed-by: raitobezarius Reviewed-by: Adam Joseph Tested-by: BuildkiteCI --- tvix/eval/src/value/builtin.rs | 78 +++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'tvix/eval/src/value/builtin.rs') diff --git a/tvix/eval/src/value/builtin.rs b/tvix/eval/src/value/builtin.rs index c7fc33903d96..0577111030a2 100644 --- a/tvix/eval/src/value/builtin.rs +++ b/tvix/eval/src/value/builtin.rs @@ -3,7 +3,7 @@ //! //! Builtins are directly backed by Rust code operating on Nix values. -use crate::{errors::ErrorKind, vm::VM}; +use crate::vm::generators::Generator; use super::Value; @@ -12,40 +12,38 @@ use std::{ rc::Rc, }; -/// Trait for closure types of builtins implemented directly by -/// backing Rust code. +/// Trait for closure types of builtins. /// -/// Builtins declare their arity and are passed a vector with the -/// right number of arguments. Additionally, as they might have to -/// force the evaluation of thunks, they are passed a reference to the -/// current VM which they can use for forcing a value. +/// Builtins are expected to yield a generator which can be run by the VM to +/// produce the final value. /// -/// Errors returned from a builtin will be annotated with the location -/// of the call to the builtin. -pub trait BuiltinFn: Fn(Vec, &mut VM) -> Result {} -impl, &mut VM) -> Result> BuiltinFn for F {} - -/// Description of a single argument passed to a builtin -pub struct BuiltinArgument { - /// Whether the argument should be forced before the underlying builtin function is called - pub strict: bool, - /// The name of the argument, to be used in docstrings and error messages - pub name: &'static str, -} +/// Implementors should use the builtins-macros to create these functions +/// instead of handling the argument-passing logic manually. +pub trait BuiltinGen: Fn(Vec) -> Generator {} +impl) -> Generator> BuiltinGen for F {} #[derive(Clone)] pub struct BuiltinRepr { name: &'static str, - /// Array of arguments to the builtin. - arguments: &'static [BuiltinArgument], /// Optional documentation for the builtin. documentation: Option<&'static str>, - func: Rc, + arg_count: usize, + + func: Rc, /// Partially applied function arguments. partials: Vec, } +pub enum BuiltinResult { + /// Builtin was not ready to be called (arguments missing) and remains + /// partially applied. + Partial(Builtin), + + /// Builtin was called and constructed a generator that the VM must run. + Called(Generator), +} + /// Represents a single built-in function which directly executes Rust /// code that operates on a Nix value. /// @@ -68,16 +66,16 @@ impl From for Builtin { } impl Builtin { - pub fn new( + pub fn new( name: &'static str, - arguments: &'static [BuiltinArgument], documentation: Option<&'static str>, + arg_count: usize, func: F, ) -> Self { BuiltinRepr { name, - arguments, documentation, + arg_count, func: Rc::new(func), partials: vec![], } @@ -92,23 +90,25 @@ impl Builtin { self.0.documentation } - /// Apply an additional argument to the builtin, which will either - /// lead to execution of the function or to returning a partial - /// builtin. - pub fn apply(mut self, vm: &mut VM, arg: Value) -> Result { + /// Apply an additional argument to the builtin. After this, [`call`] *must* + /// be called, otherwise it may leave the builtin in an incorrect state. + pub fn apply_arg(&mut self, arg: Value) { self.0.partials.push(arg); - if self.0.partials.len() == self.0.arguments.len() { - for (idx, BuiltinArgument { strict, .. }) in self.0.arguments.iter().enumerate() { - if *strict { - self.0.partials[idx].force(vm)?; - } - } - return (self.0.func)(self.0.partials, vm); - } + debug_assert!( + self.0.partials.len() <= self.0.arg_count, + "Tvix bug: pushed too many arguments to builtin" + ); + } - // Function is not yet ready to be called. - Ok(Value::Builtin(self)) + /// Attempt to call a builtin, which will produce a generator if it is fully + /// applied or return the builtin if it is partially applied. + pub fn call(self) -> BuiltinResult { + if self.0.partials.len() == self.0.arg_count { + BuiltinResult::Called((self.0.func)(self.0.partials)) + } else { + BuiltinResult::Partial(self) + } } } -- cgit 1.4.1