From 24811d76558a73b3d6f615d458aded0b9c690840 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Sun, 28 Aug 2022 19:38:17 +0300 Subject: fix(tvix/eval): distinguish statically between StackIdx and LocalIdx Previously the functions in the scope module returned usize values, which - sometimes from the same function - were either indexes into the runtime stack *or* indexes into the compiler's local stack. This is extremely confusing because it requires the caller to be aware of the difference, and it actually caused subtle bugs. To avoid this, there is now a new LocalIdx wrapper type which is used by the scope module to return indexes into the compiler's stack, as well as helpers for accounting for the differences between these indexes and the runtime indexes. Change-Id: I58f0b50ad94b28a304e3372fd9731b6590b3fdb8 Reviewed-on: https://cl.tvl.fyi/c/depot/+/6340 Tested-by: BuildkiteCI Reviewed-by: sterni --- tvix/eval/src/compiler/mod.rs | 109 +++++++++++++--------------------------- tvix/eval/src/compiler/scope.rs | 100 ++++++++++++++++++++++++++++-------- 2 files changed, 115 insertions(+), 94 deletions(-) (limited to 'tvix/eval') diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 1df44bdfde37..3f11661a97ab 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -25,11 +25,11 @@ use std::rc::Rc; use crate::chunk::Chunk; use crate::errors::{Error, ErrorKind, EvalResult}; -use crate::opcode::{CodeIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx}; +use crate::opcode::{CodeIdx, Count, JumpOffset, OpCode, UpvalueIdx}; use crate::value::{Closure, Lambda, Value}; use crate::warnings::{EvalWarning, WarningKind}; -use self::scope::{Local, LocalPosition, Scope, Upvalue}; +use self::scope::{Local, LocalIdx, LocalPosition, Scope, Upvalue}; /// Represents the result of compiling a piece of Nix code. If /// compilation was successful, the resulting bytecode can be passed @@ -106,7 +106,7 @@ impl Compiler { // Actual code-emitting AST traversal methods. impl Compiler { - fn compile(&mut self, slot: Option, expr: ast::Expr) { + fn compile(&mut self, slot: Option, expr: ast::Expr) { match expr { ast::Expr::Literal(literal) => self.compile_literal(literal), ast::Expr::Path(path) => self.compile_path(path), @@ -437,7 +437,8 @@ impl Compiler { } LocalPosition::Known(idx) => { - self.chunk().push_op(OpCode::OpGetLocal(idx)) + let stack_idx = self.scope().stack_index(idx); + self.chunk().push_op(OpCode::OpGetLocal(stack_idx)) } LocalPosition::Recursive(_) => { @@ -624,7 +625,7 @@ impl Compiler { ident.syntax().clone(), ident.ident_token().unwrap().text(), ); - self.mark_initialised(idx); + self.scope_mut().mark_initialised(idx); } } @@ -637,7 +638,7 @@ impl Compiler { ident.syntax().clone(), ident.ident_token().unwrap().text(), ); - self.mark_initialised(idx); + self.scope_mut().mark_initialised(idx); } } } @@ -656,7 +657,7 @@ impl Compiler { // First pass to ensure that all identifiers are known; // required for resolving recursion. - let mut entries: Vec<(usize, rnix::ast::Expr)> = vec![]; + let mut entries: Vec<(LocalIdx, rnix::ast::Expr)> = vec![]; for entry in node.attrpath_values() { let mut path = match normalise_ident_path(entry.attrpath().unwrap().attrs()) { Ok(p) => p, @@ -674,23 +675,25 @@ impl Compiler { entry.attrpath().unwrap().syntax().clone(), path.pop().unwrap(), ); + entries.push((idx, entry.value().unwrap())); } // Second pass to place the values in the correct stack slots. - let indices: Vec = entries.iter().map(|(idx, _)| *idx).collect(); + let indices: Vec = entries.iter().map(|(idx, _)| *idx).collect(); for (idx, value) in entries.into_iter() { self.compile(Some(idx), value); // Any code after this point will observe the value in the // right stack slot, so mark it as initialised. - self.mark_initialised(idx); + self.scope_mut().mark_initialised(idx); } // Third pass to emit finaliser instructions if necessary. for idx in indices { - if self.scope().locals[idx].needs_finaliser { - self.chunk().push_op(OpCode::OpFinalise(StackIdx(idx))); + if self.scope()[idx].needs_finaliser { + let stack_idx = self.scope().stack_index(idx); + self.chunk().push_op(OpCode::OpFinalise(stack_idx)); } } @@ -747,7 +750,10 @@ impl Compiler { self.chunk().push_op(OpCode::OpResolveWith) } - LocalPosition::Known(idx) => self.chunk().push_op(OpCode::OpGetLocal(idx)), + LocalPosition::Known(idx) => { + let stack_idx = self.scope().stack_index(idx); + self.chunk().push_op(OpCode::OpGetLocal(stack_idx)) + } LocalPosition::Recursive(_) => panic!("TODO: unclear if this can happen"), }; @@ -762,22 +768,12 @@ impl Compiler { // resolve that directly (thus avoiding duplication on the // stack). self.compile(None, node.namespace().unwrap()); - self.declare_phantom(); + let local_idx = self.scope_mut().declare_phantom(); + let with_idx = self.scope().stack_index(local_idx); self.scope_mut().push_with(); - let with_idx = self - .scope() - .locals - .iter() - // Calculate the with_idx without taking into account - // uninitialised variables that are not yet in their stack - // slots. - .filter(|l| l.initialised) - .count() - - 1; - - self.chunk().push_op(OpCode::OpPushWith(StackIdx(with_idx))); + self.chunk().push_op(OpCode::OpPushWith(with_idx)); self.compile(None, node.body().unwrap()); @@ -786,7 +782,7 @@ impl Compiler { self.end_scope(); } - fn compile_lambda(&mut self, slot: Option, node: ast::Lambda) { + fn compile_lambda(&mut self, slot: Option, node: ast::Lambda) { // Open new lambda context in compiler, which has its own // scope etc. self.contexts.push(LambdaCtx::new()); @@ -805,7 +801,7 @@ impl Compiler { .to_string(); let idx = self.declare_local(param.syntax().clone(), &name); - self.mark_initialised(idx); + self.scope_mut().mark_initialised(idx); } } @@ -841,20 +837,23 @@ impl Compiler { self.chunk().push_op(OpCode::OpClosure(closure_idx)); for upvalue in compiled.scope.upvalues { match upvalue { - Upvalue::Stack(idx) if slot.is_none() => { - self.chunk().push_op(OpCode::DataLocalIdx(idx)); + Upvalue::Local(idx) if slot.is_none() => { + let stack_idx = self.scope().stack_index(idx); + self.chunk().push_op(OpCode::DataLocalIdx(stack_idx)); } - Upvalue::Stack(idx) => { + Upvalue::Local(idx) => { + let stack_idx = self.scope().stack_index(idx); + // If the upvalue slot is located *after* the // closure, the upvalue resolution must be // deferred until the scope is fully initialised // and can be finalised. - if slot.unwrap() < idx.0 { - self.chunk().push_op(OpCode::DataDeferredLocal(idx)); - self.mark_needs_finaliser(slot.unwrap()); + if slot.unwrap() < idx { + self.chunk().push_op(OpCode::DataDeferredLocal(stack_idx)); + self.scope_mut().mark_needs_finaliser(slot.unwrap()); } else { - self.chunk().push_op(OpCode::DataLocalIdx(idx)); + self.chunk().push_op(OpCode::DataLocalIdx(stack_idx)); } } @@ -960,7 +959,7 @@ impl Compiler { /// Declare a local variable known in the scope that is being /// compiled by pushing it to the locals. This is used to /// determine the stack offset of variables. - fn declare_local>(&mut self, node: rnix::SyntaxNode, name: S) -> usize { + fn declare_local>(&mut self, node: rnix::SyntaxNode, name: S) -> LocalIdx { let name = name.into(); let depth = self.scope().scope_depth; @@ -991,43 +990,7 @@ impl Compiler { ); } - let idx = self.scope().locals.len(); - self.scope_mut().locals.push(Local { - name, - depth, - initialised: false, - needs_finaliser: false, - node: Some(node), - phantom: false, - used: false, - }); - idx - } - - /// Declare a local variable that occupies a stack slot and should - /// be accounted for, but is not directly accessible by users - /// (e.g. attribute sets used for `with`). - fn declare_phantom(&mut self) { - let depth = self.scope().scope_depth; - self.scope_mut().locals.push(Local { - depth, - initialised: true, - needs_finaliser: false, - name: "".into(), - node: None, - phantom: true, - used: true, - }); - } - - /// Mark local as initialised after compiling its expression. - fn mark_initialised(&mut self, local_idx: usize) { - self.scope_mut().locals[local_idx].initialised = true; - } - - /// Mark local as needing a finaliser. - fn mark_needs_finaliser(&mut self, local_idx: usize) { - self.scope_mut().locals[local_idx].needs_finaliser = true; + self.scope_mut().declare_local(name, node) } fn resolve_upvalue(&mut self, ctx_idx: usize, name: &str) -> Option { @@ -1043,7 +1006,7 @@ impl Compiler { // guaranteed to be placed on the stack (i.e. in the right // position) *during* their runtime construction LocalPosition::Known(idx) | LocalPosition::Recursive(idx) => { - return Some(self.add_upvalue(ctx_idx, Upvalue::Stack(idx))) + return Some(self.add_upvalue(ctx_idx, Upvalue::Local(idx))) } LocalPosition::Unknown => { /* continue below */ } diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs index 310f33d608cd..e6f74c7d2f66 100644 --- a/tvix/eval/src/compiler/scope.rs +++ b/tvix/eval/src/compiler/scope.rs @@ -10,7 +10,10 @@ //! stack indices. To do this, the compiler simulates where locals //! will be at runtime using the data structures implemented here. -use std::collections::{hash_map, HashMap}; +use std::{ + collections::{hash_map, HashMap}, + ops::Index, +}; use smol_str::SmolStr; @@ -56,13 +59,13 @@ pub enum LocalPosition { /// Local is not known in this scope. Unknown, - /// Local is known and defined at the given stack index. - Known(StackIdx), + /// Local is known at the given local index. + Known(LocalIdx), /// Local is known, but is being accessed recursively within its /// own initialisation. Depending on context, this is either an /// error or forcing a closure/thunk. - Recursive(StackIdx), + Recursive(LocalIdx), } /// Represents the different ways in which upvalues can be captured in @@ -70,7 +73,7 @@ pub enum LocalPosition { #[derive(Clone, Debug, PartialEq)] pub enum Upvalue { /// This upvalue captures a local from the stack. - Stack(StackIdx), + Local(LocalIdx), /// This upvalue captures an enclosing upvalue. Upvalue(UpvalueIdx), @@ -87,6 +90,13 @@ pub enum Upvalue { }, } +/// Represents the index of a local in the scope's local array, which +/// is subtly different from its `StackIdx` (which excludes +/// uninitialised values in between). +#[repr(transparent)] +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct LocalIdx(usize); + /// Represents a scope known during compilation, which can be resolved /// directly to stack indices. /// @@ -113,6 +123,14 @@ pub struct Scope { poisoned_tokens: HashMap<&'static str, usize>, } +impl Index for Scope { + type Output = Local; + + fn index(&self, index: LocalIdx) -> &Self::Output { + &self.locals[index.0] + } +} + impl Scope { /// Mark a globally defined token as poisoned. pub fn poison(&mut self, name: &'static str, depth: usize) { @@ -165,29 +183,69 @@ impl Scope { // we know its final runtime stack position, but it is // not yet on the stack. if !local.initialised { - return LocalPosition::Recursive(StackIdx(idx)); + return LocalPosition::Recursive(LocalIdx(idx)); } - // This local is known, but we need to account for - // uninitialised variables in this "initialiser - // stack". - return LocalPosition::Known(self.resolve_uninit(idx)); + return LocalPosition::Known(LocalIdx(idx)); } } LocalPosition::Unknown } - /// Return the "initialiser stack slot" of a value, that is the - /// stack slot of a value which might only exist during the - /// initialisation of another. This requires accounting for the - /// stack offsets of any unitialised variables. - fn resolve_uninit(&mut self, locals_idx: usize) -> StackIdx { - StackIdx( - self.locals[..locals_idx] - .iter() - .filter(|local| local.initialised) - .count(), - ) + /// Declare a local variable that occupies a stack slot and should + /// be accounted for, but is not directly accessible by users + /// (e.g. attribute sets used for `with`). + pub fn declare_phantom(&mut self) -> LocalIdx { + let idx = self.locals.len(); + self.locals.push(Local { + depth: self.scope_depth, + initialised: true, + needs_finaliser: false, + name: "".into(), + node: None, + phantom: true, + used: true, + }); + + LocalIdx(idx) + } + + /// Declare an uninitialised local variable. + pub fn declare_local(&mut self, name: String, node: rnix::SyntaxNode) -> LocalIdx { + let idx = self.locals.len(); + self.locals.push(Local { + name, + depth: self.scope_depth, + initialised: false, + needs_finaliser: false, + node: Some(node), + phantom: false, + used: false, + }); + + LocalIdx(idx) + } + + /// Mark local as initialised after compiling its expression. + pub fn mark_initialised(&mut self, idx: LocalIdx) { + self.locals[idx.0].initialised = true; + } + + /// Mark local as needing a finaliser. + pub fn mark_needs_finaliser(&mut self, idx: LocalIdx) { + self.locals[idx.0].needs_finaliser = true; + } + + /// Compute the runtime stack index for a given local by + /// accounting for uninitialised variables at scopes below this + /// one. + pub fn stack_index(&self, idx: LocalIdx) -> StackIdx { + let uninitialised_count = self.locals[..(idx.0)] + .iter() + .filter(|l| !l.initialised && self[idx].above(l.depth)) + .count(); + + StackIdx(idx.0 - uninitialised_count) } } -- cgit 1.4.1