From 49b34183e3f2591c6e18d44689acb6fd70eab1bf Mon Sep 17 00:00:00 2001 From: Adam Joseph Date: Tue, 14 Nov 2023 10:23:42 -0800 Subject: feat(tvix/eval): rewrite Thunk::force() in nonrecursive form This commit rewrites Thunk::force() so that it is not (directly) self-recursive. It maintains a Vec of all the previously-encountered thunks which point to the one it is currently forcing, rather than recursively calling itself. Benefits: - Short term: This commit saves the cost of a round-trip through the generator machinery for the generators::request_force() which is removed by this commit. - Medium term: Once a similar transformation has been applied to nix_cmp(), nix_add(), nix_eq(), and coerce_to_string(), those four functions, along with Thunk::force(), will make non-tail calls only to each other. They can then be merged into a single tail-recursive function which does not use the generator machinery at all: enum Task { Cmp, Add, Eq, CoerceToString, Force}; fn Value::walk(task:Task, v1:Value, v2:Value) { // ... - Long term: The long-term goal here is to use generators **only for builtins** and [Marionette]-style remote control of the VM. In other words: use `async` for things that actually involve concurrency. Calls from the VM to builtins can then be blocking calls, because even cppnix will overflow the stack if you make a MAX_STACK_DEPTH-deep recursive call which passes through a builtin at every stack frame (e.g. `{ func = builtins.sort (a: b: ... func ...) ...}`). This way the inner "tight loop" of the interpreter doesn't pay the costs of `async` and generators. These costs manifest in terms of: performance, complex nonlocal control flow, and language impediments (async Rust is a restricted subset of real Rust, and is missing things like traits). [Marionette]: https://firefox-source-docs.mozilla.org/testing/marionette/Intro.html Change-Id: I6179b8abb2ea0492180fcb347f37595a14665777 Reviewed-on: https://cl.tvl.fyi/c/depot/+/10039 Reviewed-by: tazjin Tested-by: BuildkiteCI --- tvix/eval/src/value/mod.rs | 4 +- tvix/eval/src/value/thunk.rs | 155 ++++++++++++++++++++++++++----------------- tvix/eval/src/vm/mod.rs | 2 +- 3 files changed, 97 insertions(+), 64 deletions(-) (limited to 'tvix/eval') diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index e4f8bf0fd824..6d9a6ada73bb 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -662,9 +662,9 @@ impl Value { // TODO(amjoseph): de-asyncify this (when called directly by the VM) pub async fn force(self, co: GenCo, span: LightSpan) -> Result { if let Value::Thunk(thunk) = self { - return thunk.force(co, span).await; + // TODO(amjoseph): use #[tailcall::mutual] + return Thunk::force(thunk, co, span).await; } - Ok(self) } diff --git a/tvix/eval/src/value/thunk.rs b/tvix/eval/src/value/thunk.rs index 2853a398decc..3ff3bde65934 100644 --- a/tvix/eval/src/value/thunk.rs +++ b/tvix/eval/src/value/thunk.rs @@ -79,6 +79,8 @@ enum ThunkRepr { content_span: Option, }, + // TODO(amjoseph): consider changing `Value` to `Rc` to avoid + // expensive clone()s in Thunk::force(). /// Fully evaluated thunk. Evaluated(Value), } @@ -203,69 +205,100 @@ impl Thunk { } } - // TODO(amjoseph): de-asyncify this - pub async fn force(self, co: GenCo, span: LightSpan) -> Result { - // If the current thunk is already fully evaluated, return its evaluated - // value. The VM will continue running the code that landed us here. - if self.is_forced() { - return Ok(self.unwrap_or_clone()); - } - - // Begin evaluation of this thunk by marking it as a blackhole, meaning - // that any other forcing frame encountering this thunk before its - // evaluation is completed detected an evaluation cycle. - let inner = self.0.replace(self.prepare_blackhole(span)); - - match inner { - // If there was already a blackhole in the thunk, this is an - // evaluation cycle. - ThunkRepr::Blackhole { - forced_at, - suspended_at, - content_span, - } => Err(ErrorKind::InfiniteRecursion { - first_force: forced_at.span(), - suspended_at: suspended_at.map(|s| s.span()), - content_span, - }), - - // If there is a native function stored in the thunk, evaluate it - // and replace this thunk's representation with the result. - ThunkRepr::Native(native) => { - let value = native.0()?; - - // Force the returned value again, in case the native call - // returned a thunk. - let value = generators::request_force(&co, value).await; - - self.0.replace(ThunkRepr::Evaluated(value.clone())); - Ok(value) - } - - // When encountering a suspended thunk, request that the VM enters - // it and produces the result. - ThunkRepr::Suspended { - lambda, - upvalues, - light_span, - } => { - let value = - generators::request_enter_lambda(&co, lambda, upvalues, light_span).await; - - // This may have returned another thunk, so we need to request - // that the VM forces this value, too. - let value = generators::request_force(&co, value).await; - - self.0.replace(ThunkRepr::Evaluated(value.clone())); - Ok(value) + pub async fn force(mut myself: Thunk, co: GenCo, span: LightSpan) -> Result { + // This vector of "thunks which point to the thunk-being-forced", to + // be updated along with it, is necessary in order to write this + // function in iterative (and later, mutual-tail-call) form. + let mut also_update: Vec>> = vec![]; + + loop { + // If the current thunk is already fully evaluated, return its evaluated + // value. The VM will continue running the code that landed us here. + if myself.is_forced() { + let val = myself.unwrap_or_clone(); + for other_thunk in also_update.into_iter() { + other_thunk.replace(ThunkRepr::Evaluated(val.clone())); + } + return Ok(val); } - // If an inner value is found, force it and then update. This is - // most likely an inner thunk, as `Thunk:is_forced` returned false. - ThunkRepr::Evaluated(val) => { - let value = generators::request_force(&co, val).await; - self.0.replace(ThunkRepr::Evaluated(value.clone())); - Ok(value) + // Begin evaluation of this thunk by marking it as a blackhole, meaning + // that any other forcing frame encountering this thunk before its + // evaluation is completed detected an evaluation cycle. + let inner = myself.0.replace(myself.prepare_blackhole(span.clone())); + + match inner { + // If there was already a blackhole in the thunk, this is an + // evaluation cycle. + ThunkRepr::Blackhole { + forced_at, + suspended_at, + content_span, + } => { + return Err(ErrorKind::InfiniteRecursion { + first_force: forced_at.span(), + suspended_at: suspended_at.map(|s| s.span()), + content_span, + }) + } + + // If there is a native function stored in the thunk, evaluate it + // and replace this thunk's representation with the result. + ThunkRepr::Native(native) => { + let value = native.0()?; + myself.0.replace(ThunkRepr::Evaluated(value)); + continue; + } + + // When encountering a suspended thunk, request that the VM enters + // it and produces the result. + ThunkRepr::Suspended { + lambda, + upvalues, + light_span, + } => { + // TODO(amjoseph): use #[tailcall::mutual] here. This can + // be turned into a tailcall to vm::execute_bytecode() by + // passing `also_update` to it. + let value = + generators::request_enter_lambda(&co, lambda, upvalues, light_span).await; + myself.0.replace(ThunkRepr::Evaluated(value)); + continue; + } + + // nested thunks -- try to flatten before forcing + ThunkRepr::Evaluated(Value::Thunk(inner_thunk)) => { + match Rc::try_unwrap(inner_thunk.0) { + Ok(refcell) => { + // we are the only reference to the inner thunk, + // so steal it + myself.0.replace(refcell.into_inner()); + continue; + } + Err(rc) => { + let inner_thunk = Thunk(rc); + if inner_thunk.is_forced() { + // tail call to force the inner thunk; note that + // this means the outer thunk remains unforced + // even after calling force() on it; however the + // next time it is forced we will be one + // thunk-forcing closer to it being + // fully-evaluated. + myself + .0 + .replace(ThunkRepr::Evaluated(inner_thunk.value().clone())); + continue; + } + also_update.push(myself.0.clone()); + myself = inner_thunk; + continue; + } + } + } + + ThunkRepr::Evaluated(val) => { + return Ok(val); + } } } } diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 87ad50cda182..6aad8719e0e4 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -472,7 +472,7 @@ impl<'o> VM<'o> { self.push_call_frame(span, frame); self.enqueue_generator("force", gen_span.clone(), |co| { - thunk.force(co, gen_span) + Thunk::force(thunk, co, gen_span) }); return Ok(false); -- cgit 1.4.1