From ca90c0f45ad9892c35a2a0402939b857a6fca08e Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Wed, 24 Aug 2022 15:37:09 +0300 Subject: refactor(tvix/eval): handle scope poisoning & globals dynamically Previously, the tokens that could poison a scope (`true`, `false`, `null`) had individual fields in the scope to track whether or not they were poisoned. This commit sets up new machinery that instead tracks scope poisoning dynamically using a HashMap, and which makes it possible to introduce additional tokens to the top-level ("global") scope that are directly resolved by the compiler by passing a map of runtime values to be used. With this solution, the compiler now contains all machinery required for wiring up builtins resolution. The set of builtins to be exposed at runtime must, however, be constructed *outside* of the compiler and passed in. Everything is prepared for this, but it is not yet wired up (so the only existing builtins are the ones we already had before). Note that this technically opens up an optimisation potential when compiling selection operations, where the attribute set being selected from is `builtins`. The compiler could directly resolve the builtins and place the right values on the stack. Change-Id: Ia7dad3c2a98703e7ea0c6ace1a722d57cc70a65c Reviewed-on: https://cl.tvl.fyi/c/depot/+/6253 Tested-by: BuildkiteCI Reviewed-by: sterni --- tvix/eval/src/compiler.rs | 186 +++++++++++++++++++++++++++++++--------------- 1 file changed, 125 insertions(+), 61 deletions(-) diff --git a/tvix/eval/src/compiler.rs b/tvix/eval/src/compiler.rs index 4043f268bff4..1bbd791876ab 100644 --- a/tvix/eval/src/compiler.rs +++ b/tvix/eval/src/compiler.rs @@ -16,7 +16,9 @@ use path_clean::PathClean; use rnix::ast::{self, AstToken, HasEntry}; use rowan::ast::AstNode; +use std::collections::{hash_map, HashMap}; use std::path::{Path, PathBuf}; +use std::rc::Rc; use crate::chunk::Chunk; use crate::errors::{Error, ErrorKind, EvalResult}; @@ -78,15 +80,40 @@ struct Scope { // `with`. with_stack: Vec, - // Certain symbols are considered to be "poisoning" the scope when - // defined. This is because users are allowed to override symbols - // like 'true' or 'null'. + // Users are allowed to override globally defined symbols like + // `true`, `false` or `null` in scopes. We call this "scope + // poisoning", as it requires runtime resolution of those tokens. // // To support this efficiently, the depth at which a poisoning // occured is tracked here. - poisoned_true: usize, - poisoned_false: usize, - poisoned_null: usize, + poisoned_tokens: HashMap<&'static str, usize>, +} + +impl Scope { + /// Mark a globally defined token as poisoned. + fn poison(&mut self, name: &'static str, depth: usize) { + match self.poisoned_tokens.entry(name) { + hash_map::Entry::Occupied(_) => { + /* do nothing, as the token is already poisoned at a + * lower scope depth */ + } + hash_map::Entry::Vacant(entry) => { + entry.insert(depth); + } + } + } + + /// Check whether a given token is poisoned. + fn is_poisoned(&self, name: &str) -> bool { + self.poisoned_tokens.contains_key(name) + } + + /// "Unpoison" tokens that were poisoned at a given depth. Used + /// when scopes are closed. + fn unpoison(&mut self, depth: usize) { + self.poisoned_tokens + .retain(|_, poisoned_at| *poisoned_at != depth); + } } /// Represents the lambda currently being compiled. @@ -104,11 +131,21 @@ impl LambdaCtx { } } +type GlobalsMap = HashMap<&'static str, Rc>; + struct Compiler { contexts: Vec, warnings: Vec, errors: Vec, root_dir: PathBuf, + + /// Carries all known global tokens; the full set of which is + /// created when the compiler is invoked. + /// + /// Each global has an associated token, which when encountered as + /// an identifier is resolved against the scope poisoning logic, + /// and a function that should emit code for the token. + globals: GlobalsMap, } // Helper functions for emitting code and metadata to the internal @@ -124,7 +161,7 @@ impl Compiler { } fn chunk(&mut self) -> &mut Chunk { - std::rc::Rc::::get_mut(self.context_mut().lambda.chunk()) + Rc::::get_mut(self.context_mut().lambda.chunk()) .expect("compiler flaw: long-lived chunk reference") } @@ -677,41 +714,29 @@ impl Compiler { } fn compile_ident(&mut self, node: ast::Ident) { - match node.ident_token().unwrap().text() { - // TODO(tazjin): Nix technically allows code like - // - // let null = 1; in null - // => 1 - // - // which we do *not* want to check at runtime. Once - // scoping is introduced, the compiler should carry some - // optimised information about any "weird" stuff that's - // happened to the scope (such as overrides of these - // literals, or builtins). - "true" if self.scope().poisoned_true == 0 => self.chunk().push_op(OpCode::OpTrue), - "false" if self.scope().poisoned_false == 0 => self.chunk().push_op(OpCode::OpFalse), - "null" if self.scope().poisoned_null == 0 => self.chunk().push_op(OpCode::OpNull), - - name => { - // Note: `with` and some other special scoping - // features are not yet implemented. - match self.resolve_local(name) { - Some(idx) => self.chunk().push_op(OpCode::OpGetLocal(idx)), - None => { - if self.scope().with_stack.is_empty() { - self.emit_error( - node.syntax().clone(), - ErrorKind::UnknownStaticVariable, - ); - return; - } - - // Variable needs to be dynamically resolved - // at runtime. - self.emit_constant(Value::String(name.into())); - self.chunk().push_op(OpCode::OpResolveWith) - } + let ident = node.ident_token().unwrap(); + + // If the identifier is a global, and it is not poisoned, emit + // the global directly. + if let Some(global) = self.globals.get(ident.text()) { + if !self.scope().is_poisoned(ident.text()) { + global.clone()(self); + return; + } + } + + match self.resolve_local(ident.text()) { + Some(idx) => self.chunk().push_op(OpCode::OpGetLocal(idx)), + None => { + if self.scope().with_stack.is_empty() { + self.emit_error(node.syntax().clone(), ErrorKind::UnknownStaticVariable); + return; } + + // Variable needs to be dynamically resolved + // at runtime. + self.emit_constant(Value::String(ident.text().into())); + self.chunk().push_op(OpCode::OpResolveWith) } }; } @@ -815,15 +840,8 @@ impl Compiler { // If this scope poisoned any builtins or special identifiers, // they need to be reset. - if self.scope().poisoned_true == self.scope().scope_depth { - self.scope_mut().poisoned_true = 0; - } - if self.scope().poisoned_false == self.scope().scope_depth { - self.scope_mut().poisoned_false = 0; - } - if self.scope().poisoned_null == self.scope().scope_depth { - self.scope_mut().poisoned_null = 0; - } + let depth = self.scope().scope_depth; + self.scope_mut().unpoison(depth); self.scope_mut().scope_depth -= 1; @@ -871,23 +889,24 @@ impl Compiler { /// compiled by pushing it to the locals. This is used to /// determine the stack offset of variables. fn declare_local>(&mut self, node: rnix::SyntaxNode, name: S) { - // Set up scope poisoning if required. let name = name.into(); - let mut scope = self.scope_mut(); - match name.as_str() { - "true" if scope.poisoned_true == 0 => scope.poisoned_true = scope.scope_depth, - - "false" if scope.poisoned_false == 0 => scope.poisoned_false = scope.scope_depth, - - "null" if scope.poisoned_null == 0 => scope.poisoned_null = scope.scope_depth, + let depth = self.scope().scope_depth; - _ => {} + // Do this little dance to get ahold of the *static* key and + // use it for poisoning if required. + let key: Option<&'static str> = match self.globals.get_key_value(name.as_str()) { + Some((key, _)) => Some(*key), + None => None, }; - scope.locals.push(Local { + if let Some(global_ident) = key { + self.scope_mut().poison(global_ident, depth); + } + + self.scope_mut().locals.push(Local { + depth, name: name.into(), node: Some(node), - depth: scope.scope_depth, phantom: false, used: false, }); @@ -969,6 +988,47 @@ fn normalise_ident_path>(path: I) -> EvalResult) -> GlobalsMap { + let mut globals: GlobalsMap = HashMap::new(); + + globals.insert( + "true", + Rc::new(|compiler| { + compiler.chunk().push_op(OpCode::OpTrue); + }), + ); + + globals.insert( + "false", + Rc::new(|compiler| { + compiler.chunk().push_op(OpCode::OpFalse); + }), + ); + + globals.insert( + "null", + Rc::new(|compiler| { + compiler.chunk().push_op(OpCode::OpNull); + }), + ); + + for (ident, value) in additional.into_iter() { + globals.insert( + ident, + Rc::new(move |compiler| compiler.emit_constant(value.clone())), + ); + } + + globals +} + pub fn compile(expr: ast::Expr, location: Option) -> EvalResult { let mut root_dir = match location { Some(dir) => Ok(dir), @@ -984,8 +1044,12 @@ pub fn compile(expr: ast::Expr, location: Option) -> EvalResult