From 07ea30370e887b16228af0dccbe126010cce9e25 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Tue, 6 Sep 2022 23:13:48 +0300 Subject: refactor(tvix/eval): capture entire with_stack in upvalues This completely rewrites the handling of "dynamic upvalues" to, instead of resolving them at thunk/closure instantiation time (which forces some values too early), capture the entire with stack of parent contexts if it exists. There are a couple of things in here that could be written more efficiently, but I'm first working through this to get to a bug related to with + recursion and the code complexity of some of the optimisations is distracting. Change-Id: Ia538e06c9146e3bf8decb9adf02dd726d2c651cf Reviewed-on: https://cl.tvl.fyi/c/depot/+/6486 Tested-by: BuildkiteCI Reviewed-by: sterni --- tvix/eval/src/compiler/mod.rs | 151 ++++++++++++++++++---------------------- tvix/eval/src/compiler/scope.rs | 13 ---- tvix/eval/src/opcode.rs | 3 +- tvix/eval/src/upvalues.rs | 18 +++++ tvix/eval/src/vm.rs | 87 ++++++++++------------- 5 files changed, 123 insertions(+), 149 deletions(-) diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 883129dbfa4d..8c06db981012 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -18,7 +18,6 @@ mod scope; use path_clean::PathClean; use rnix::ast::{self, AstToken, HasEntry}; use rowan::ast::{AstChildren, AstNode}; -use smol_str::SmolStr; use std::collections::HashMap; use std::path::{Path, PathBuf}; use std::rc::Rc; @@ -45,6 +44,7 @@ pub struct CompilationOutput { struct LambdaCtx { lambda: Lambda, scope: Scope, + captures_with_stack: bool, } impl LambdaCtx { @@ -52,6 +52,7 @@ impl LambdaCtx { LambdaCtx { lambda: Lambda::new_anonymous(), scope: Default::default(), + captures_with_stack: false, } } @@ -59,6 +60,7 @@ impl LambdaCtx { LambdaCtx { lambda: Lambda::new_anonymous(), scope: self.scope.inherit(), + captures_with_stack: false, } } } @@ -869,32 +871,18 @@ impl Compiler<'_, '_> { return; } - // Even worse - are we dealing with a dynamic upvalue? - if let Some(idx) = - self.resolve_dynamic_upvalue(self.contexts.len() - 1, ident.text(), &node) - { - // Edge case: Current scope *also* has a non-empty - // `with`-stack. This means we need to resolve - // both in this scope, and in the upvalues. - if self.scope().has_with() { - self.emit_literal_ident(&node); - self.push_op(OpCode::OpResolveWithOrUpvalue(idx), &node); - return; - } - - self.push_op(OpCode::OpGetUpvalue(idx), &node); + // If there is a non-empty `with`-stack (or a parent + // context with one), emit a runtime dynamic + // resolution instruction. + if self.has_dynamic_ancestor() { + self.emit_literal_ident(&node); + self.push_op(OpCode::OpResolveWith, &node); return; } - if !self.scope().has_with() { - self.emit_error(self.span_for(&node), ErrorKind::UnknownStaticVariable); - return; - } - - // Variable needs to be dynamically resolved at - // runtime. - self.emit_literal_ident(&node); - self.push_op(OpCode::OpResolveWith, &node); + // Otherwise, this variable is missing. + self.emit_error(self.span_for(&node), ErrorKind::UnknownStaticVariable); + return; } LocalPosition::Known(idx) => { @@ -1078,6 +1066,12 @@ impl Compiler<'_, '_> { // Check if tail-call optimisation is possible and perform it. optimise_tail_call(&mut compiled.lambda.chunk); + // Capturing the with stack counts as an upvalue, as it is + // emitted as an upvalue data instruction. + if compiled.captures_with_stack { + compiled.lambda.upvalue_count += 1; + } + let lambda = Rc::new(compiled.lambda); self.observer.observe_compiled_lambda(&lambda); @@ -1095,7 +1089,12 @@ impl Compiler<'_, '_> { let blueprint_idx = self.chunk().push_constant(Value::Blueprint(lambda)); self.push_op(OpCode::OpClosure(blueprint_idx), &node); - self.emit_upvalue_data(outer_slot, compiled.scope.upvalues); + self.emit_upvalue_data( + outer_slot, + &node, + compiled.scope.upvalues, + compiled.captures_with_stack, + ); } fn compile_apply(&mut self, slot: LocalIdx, node: ast::Apply) { @@ -1126,6 +1125,13 @@ impl Compiler<'_, '_> { let mut thunk = self.contexts.pop().unwrap(); optimise_tail_call(&mut thunk.lambda.chunk); + + // Capturing the with stack counts as an upvalue, as it is + // emitted as an upvalue data instruction. + if thunk.captures_with_stack { + thunk.lambda.upvalue_count += 1; + } + let lambda = Rc::new(thunk.lambda); self.observer.observe_compiled_thunk(&lambda); @@ -1140,12 +1146,23 @@ impl Compiler<'_, '_> { let blueprint_idx = self.chunk().push_constant(Value::Blueprint(lambda)); self.push_op(OpCode::OpThunk(blueprint_idx), node); - self.emit_upvalue_data(outer_slot, thunk.scope.upvalues); + self.emit_upvalue_data( + outer_slot, + node, + thunk.scope.upvalues, + thunk.captures_with_stack, + ); } /// Emit the data instructions that the runtime needs to correctly - /// assemble the provided upvalues array. - fn emit_upvalue_data(&mut self, slot: LocalIdx, upvalues: Vec) { + /// assemble the upvalues struct. + fn emit_upvalue_data( + &mut self, + slot: LocalIdx, + node: &T, + upvalues: Vec, + capture_with: bool, + ) { let this_depth = self.scope()[slot].depth; let this_stack_slot = self.scope().stack_index(slot); @@ -1170,16 +1187,13 @@ impl Compiler<'_, '_> { UpvalueKind::Upvalue(idx) => { self.push_op(OpCode::DataUpvalueIdx(idx), &upvalue.node); } - - UpvalueKind::Dynamic { name, up } => { - let idx = self.chunk().push_constant(Value::String(name.into())); - self.push_op(OpCode::DataDynamicIdx(idx), &upvalue.node); - if let Some(up) = up { - self.push_op(OpCode::DataDynamicAncestor(up), &upvalue.node); - } - } }; } + + if capture_with { + // TODO(tazjin): probably better to emit span for the ident that caused this + self.push_op(OpCode::DataCaptureWith, node); + } } /// Emit the literal string value of an identifier. Required for @@ -1322,58 +1336,25 @@ impl Compiler<'_, '_> { None } - /// If no static resolution for a potential upvalue was found, - /// finds the lowest lambda context that has a `with`-stack and - /// thread dynamic upvalues all the way through. - /// - /// At runtime, as closures are being constructed they either - /// capture a dynamically available upvalue, take an upvalue from - /// their "ancestor" or leave a sentinel value on the stack. - /// - /// As such an upvalue is actually accessed, an error is produced - /// when the sentinel is found. See the runtime's handling of - /// dynamic upvalues for details. - fn resolve_dynamic_upvalue( - &mut self, - at: usize, - name: &str, - node: &rnix::ast::Ident, - ) -> Option { - if at == 0 { - // There can not be any upvalue at the outermost context. - return None; - } - - if let Some((lowest_idx, _)) = self - .contexts - .iter() - .enumerate() - .find(|(_, c)| c.scope.has_with()) - { - // An enclosing lambda context has dynamic values. Each - // context in the chain from that point on now needs to - // capture dynamic upvalues because we can not statically - // know at which level the correct one is located. - let name = SmolStr::new(name); - let mut upvalue_idx = None; - - for idx in lowest_idx..=at { - upvalue_idx = Some(self.add_upvalue( - idx, - node, - UpvalueKind::Dynamic { - name: name.clone(), - up: upvalue_idx, - }, - )); + /// Determine whether the current lambda context has any ancestors + /// that use dynamic scope resolution, and mark contexts as + /// needing to capture their enclosing `with`-stack in their + /// upvalues. + fn has_dynamic_ancestor(&mut self) -> bool { + let mut ancestor_has_with = false; + + for ctx in self.contexts.iter_mut() { + if ancestor_has_with { + // If the ancestor has an active with stack, mark this + // lambda context as needing to capture it. + ctx.captures_with_stack = true; + } else { + // otherwise, check this context and move on + ancestor_has_with = ctx.scope.has_with(); } - - // Return the outermost upvalue index (i.e. the one of the - // current context). - return upvalue_idx; } - None + ancestor_has_with } fn add_upvalue( diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs index ca8512d409c8..e69922a33013 100644 --- a/tvix/eval/src/compiler/scope.rs +++ b/tvix/eval/src/compiler/scope.rs @@ -15,8 +15,6 @@ use std::{ ops::Index, }; -use smol_str::SmolStr; - use crate::opcode::{StackIdx, UpvalueIdx}; #[derive(Debug)] @@ -102,17 +100,6 @@ pub enum UpvalueKind { /// This upvalue captures an enclosing upvalue. Upvalue(UpvalueIdx), - - /// This upvalue captures a dynamically resolved value (i.e. - /// `with`). - /// - /// It stores the identifier with which to perform a dynamic - /// lookup, as well as the optional upvalue index in the enclosing - /// function (if any). - Dynamic { - name: SmolStr, - up: Option, - }, } #[derive(Clone, Debug)] diff --git a/tvix/eval/src/opcode.rs b/tvix/eval/src/opcode.rs index 42b8760a1ed4..27fceaf9677c 100644 --- a/tvix/eval/src/opcode.rs +++ b/tvix/eval/src/opcode.rs @@ -125,6 +125,5 @@ pub enum OpCode { DataLocalIdx(StackIdx), DataDeferredLocal(StackIdx), DataUpvalueIdx(UpvalueIdx), - DataDynamicIdx(ConstantIdx), - DataDynamicAncestor(UpvalueIdx), + DataCaptureWith, } diff --git a/tvix/eval/src/upvalues.rs b/tvix/eval/src/upvalues.rs index b51ef500e031..ddde8db95a21 100644 --- a/tvix/eval/src/upvalues.rs +++ b/tvix/eval/src/upvalues.rs @@ -16,12 +16,14 @@ use crate::{opcode::UpvalueIdx, Value}; #[derive(Clone, Debug)] pub struct Upvalues { upvalues: Vec, + with_stack: Option>, } impl Upvalues { pub fn with_capacity(count: usize) -> Self { Upvalues { upvalues: Vec::with_capacity(count), + with_stack: None, } } @@ -29,6 +31,22 @@ impl Upvalues { pub fn push(&mut self, value: Value) { self.upvalues.push(value); } + + /// Set the captured with stack. + pub fn set_with_stack(&mut self, with_stack: Vec) { + self.with_stack = Some(with_stack); + } + + pub fn with_stack(&self) -> Option<&Vec> { + self.with_stack.as_ref() + } + + pub fn with_stack_len(&self) -> usize { + match &self.with_stack { + None => 0, + Some(stack) => stack.len(), + } + } } impl Index for Upvalues { diff --git a/tvix/eval/src/vm.rs b/tvix/eval/src/vm.rs index 31964cf90969..9fb51de544b3 100644 --- a/tvix/eval/src/vm.rs +++ b/tvix/eval/src/vm.rs @@ -7,7 +7,7 @@ use crate::{ chunk::Chunk, errors::{Error, ErrorKind, EvalResult}, observer::Observer, - opcode::{CodeIdx, ConstantIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx}, + opcode::{CodeIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx}, upvalues::{UpvalueCarrier, Upvalues}, value::{Builtin, Closure, Lambda, NixAttrs, NixList, Thunk, Value}, }; @@ -139,10 +139,6 @@ impl<'o> VM<'o> { op } - fn peek_op(&self) -> OpCode { - self.chunk().code[self.frame().ip] - } - fn pop(&mut self) -> Value { self.stack.pop().expect("runtime stack empty") } @@ -478,11 +474,6 @@ impl<'o> VM<'o> { OpCode::OpGetUpvalue(upv_idx) => { let value = self.frame().upvalue(upv_idx).clone(); - if let Value::DynamicUpvalueMissing(name) = value { - return Err(self - .error(ErrorKind::UnknownDynamicVariable(name.as_str().to_string()))); - } - self.push(value); } @@ -552,8 +543,7 @@ impl<'o> VM<'o> { OpCode::DataLocalIdx(_) | OpCode::DataDeferredLocal(_) | OpCode::DataUpvalueIdx(_) - | OpCode::DataDynamicIdx(_) - | OpCode::DataDynamicAncestor(_) => { + | OpCode::DataCaptureWith => { panic!("VM bug: attempted to execute data-carrying operand") } } @@ -602,38 +592,6 @@ impl<'o> VM<'o> { Ok(()) } - fn resolve_dynamic_upvalue(&mut self, ident_idx: ConstantIdx) -> EvalResult { - let chunk = self.chunk(); - let ident = fallible!(self, chunk[ident_idx].to_str()); - - // Peek at the current instruction (note: IP has already - // advanced!) to see if it is actually data indicating a - // "fallback upvalue" in case the dynamic could not be - // resolved at this level. - let up = match self.peek_op() { - OpCode::DataDynamicAncestor(idx) => { - // advance ip past this data - self.inc_ip(); - Some(idx) - } - _ => None, - }; - - match self.resolve_with(ident.as_str()) { - Ok(v) => Ok(v), - - Err(Error { - kind: ErrorKind::UnknownDynamicVariable(_), - .. - }) => match up { - Some(idx) => Ok(self.frame().upvalue(idx).clone()), - None => Ok(Value::DynamicUpvalueMissing(ident)), - }, - - Err(err) => Err(err), - } - } - /// Resolve a dynamic identifier through the with-stack at runtime. fn resolve_with(&mut self, ident: &str) -> EvalResult { // Iterate over the with_stack manually to avoid borrowing @@ -651,6 +609,24 @@ impl<'o> VM<'o> { } } + // Iterate over the captured with stack if one exists. This is + // extra tricky to do without a lot of cloning. + for idx in (0..self.frame().upvalues.with_stack_len()).rev() { + // This is safe because having an index here guarantees + // that the stack is present. + let with = + unsafe { self.frame().upvalues.with_stack().unwrap_unchecked()[idx].clone() }; + + if let Value::Thunk(thunk) = &with { + fallible!(self, thunk.force(self)); + } + + match fallible!(self, with.to_attrs()).select(ident) { + None => continue, + Some(val) => return Ok(val.clone()), + } + } + Err(self.error(ErrorKind::UnknownDynamicVariable(ident.to_string()))) } @@ -671,15 +647,28 @@ impl<'o> VM<'o> { upvalues.push(self.frame().upvalue(upv_idx).clone()); } - OpCode::DataDynamicIdx(ident_idx) => { - let value = self.resolve_dynamic_upvalue(ident_idx)?; - upvalues.push(value); - } - OpCode::DataDeferredLocal(idx) => { upvalues.push(Value::DeferredUpvalue(idx)); } + OpCode::DataCaptureWith => { + // Start the captured with_stack off of the + // current call frame's captured with_stack, ... + let mut captured_with_stack = self + .frame() + .upvalues + .with_stack() + .map(Clone::clone) + // ... or make an empty one if there isn't one already. + .unwrap_or_else(|| Vec::with_capacity(self.with_stack.len())); + + for idx in &self.with_stack { + captured_with_stack.push(self.stack[*idx].clone()); + } + + upvalues.set_with_stack(captured_with_stack); + } + _ => panic!("compiler error: missing closure operand"), } } -- cgit 1.4.1