about summary refs log tree commit diff
diff options
context:
space:
mode:
authorAdam Joseph <adam@westernsemico.com>2022-10-25T09·23-0700
committertazjin <tazjin@tvl.su>2023-01-21T10·19+0000
commit22b9e6ff092c531ee72037ff8f4c065eaff3b839 (patch)
treeca3f063bc088e06772626d02d9b0f865edca5bd6
parentab8486e5b8b12f18954d3754c1837882e30008dc (diff)
refactor(tvix/eval): administer antidote for poison r/5721
The codebase contains a lot of complexity and odd roundabout
handling for shadowing globals.  I'm pretty sure none of this is
necessary, and all of it disappears if you simply make the globals
part of the ordinary identifier resolution chain, with their own
scope up above the root scope.  Then the ordinary shadowing routines
do the right thing, and no special cases or new terminology are
required.

This commit does that.

Note by tazjin: This commit was originally abandoned when Adam decided
not to take away reviewer bandwidth for this at the time (eval was
still in a much earlier stage). As we've recently done some
significant refactoring of globals initialisation this came up again,
and it seems we can easily cover the use-cases of the poison tracking
in other ways now, so I've rebased, updated and resurrected the CL.

Co-Authored-By: Vincent Ambo <tazjin@tvl.su>
Signed-off-by: Adam Joseph <adam@westernsemico.com>
Change-Id: Ib3309a47a7b31fa5bf10466bade0d876b76ae462
Reviewed-on: https://cl.tvl.fyi/c/depot/+/7089
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
Reviewed-by: flokli <flokli@flokli.de>
-rw-r--r--tvix/eval/docs/known-optimisation-potential.md4
-rw-r--r--tvix/eval/src/chunk.rs5
-rw-r--r--tvix/eval/src/compiler/bindings.rs26
-rw-r--r--tvix/eval/src/compiler/mod.rs90
-rw-r--r--tvix/eval/src/compiler/optimiser.rs4
-rw-r--r--tvix/eval/src/compiler/scope.rs46
-rw-r--r--tvix/eval/src/opcode.rs5
-rw-r--r--tvix/eval/src/vm.rs4
8 files changed, 49 insertions, 135 deletions
diff --git a/tvix/eval/docs/known-optimisation-potential.md b/tvix/eval/docs/known-optimisation-potential.md
index 484ae27135..e3015fc44b 100644
--- a/tvix/eval/docs/known-optimisation-potential.md
+++ b/tvix/eval/docs/known-optimisation-potential.md
@@ -55,7 +55,7 @@ optimisations, but note the most important ones here.
   When accessing identifiers like `builtins.foo`, the compiler should
   not go through the trouble of setting up the attribute set on the
   stack and accessing `foo` from it if it knows that the scope for
-  `builtins` is unpoisoned. The same optimisation can also be done
+  `builtins` is unshadowed. The same optimisation can also be done
   for the other set operations like `builtins ? foo` and
   `builtins.foo or alternative-implementation`.
 
@@ -71,7 +71,7 @@ optimisations, but note the most important ones here.
   a builtin application.
 
   In case the compiler encounters a fully applied builtin (i.e.
-  no currying is occurring) and the `builtins` global is unpoisoned,
+  no currying is occurring) and the `builtins` global is unshadowed,
   it could compile the equivalent operator bytecode instead: For
   example, `builtins.add 20 22` would be compiled as `20 + 22`.
   This would ensure that equivalent `builtins` can also benefit
diff --git a/tvix/eval/src/chunk.rs b/tvix/eval/src/chunk.rs
index e9be0d2a63..35fd7e78e6 100644
--- a/tvix/eval/src/chunk.rs
+++ b/tvix/eval/src/chunk.rs
@@ -156,11 +156,10 @@ mod tests {
     use crate::test_utils::dummy_span;
 
     use super::*;
-
     #[test]
     fn push_op() {
         let mut chunk = Chunk::default();
-        chunk.push_op(OpCode::OpNull, dummy_span());
-        assert_eq!(chunk.code.last().unwrap(), &OpCode::OpNull);
+        chunk.push_op(OpCode::OpAdd, dummy_span());
+        assert_eq!(chunk.code.last().unwrap(), &OpCode::OpAdd);
     }
 }
diff --git a/tvix/eval/src/compiler/bindings.rs b/tvix/eval/src/compiler/bindings.rs
index 682cf134ce..59ca222b72 100644
--- a/tvix/eval/src/compiler/bindings.rs
+++ b/tvix/eval/src/compiler/bindings.rs
@@ -685,6 +685,14 @@ impl Compiler<'_> {
         self.push_op(OpCode::OpAttrsSelect, node);
     }
 
+    /// Is the given identifier defined *by the user* in any current scope?
+    pub(super) fn is_user_defined(&mut self, ident: &str) -> bool {
+        matches!(
+            self.scope_mut().resolve_local(ident),
+            LocalPosition::Known(_) | LocalPosition::Recursive(_)
+        )
+    }
+
     /// Resolve and compile access to an identifier in the scope.
     fn compile_identifier_access<N: ToSpan + Clone>(
         &mut self,
@@ -692,15 +700,6 @@ impl Compiler<'_> {
         ident: &str,
         node: &N,
     ) {
-        // If the identifier is a global, and it is not poisoned, emit the
-        // global directly.
-        if let Some(global) = self.globals.get(ident) {
-            if !self.scope().is_poisoned(ident) {
-                global.clone()(self, self.span_for(node));
-                return;
-            }
-        }
-
         match self.scope_mut().resolve_local(ident) {
             LocalPosition::Unknown => {
                 // Are we possibly dealing with an upvalue?
@@ -709,6 +708,15 @@ impl Compiler<'_> {
                     return;
                 }
 
+                // Globals are the "upmost upvalues": they behave
+                // exactly like a `let ... in` prepended to the
+                // program's text, and the global scope is nothing
+                // more than the parent scope of the root scope.
+                if let Some(global) = self.globals.get(ident) {
+                    self.emit_constant(global.clone(), &self.span_for(node));
+                    return;
+                }
+
                 // If there is a non-empty `with`-stack (or a parent context
                 // with one), emit a runtime dynamic resolution instruction.
                 //
diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs
index 4a50a3be49..a552f50b5e 100644
--- a/tvix/eval/src/compiler/mod.rs
+++ b/tvix/eval/src/compiler/mod.rs
@@ -28,7 +28,7 @@ use std::sync::Arc;
 
 use crate::chunk::Chunk;
 use crate::errors::{Error, ErrorKind, EvalResult};
-use crate::observer::{CompilerObserver, NoOpObserver};
+use crate::observer::CompilerObserver;
 use crate::opcode::{CodeIdx, Count, JumpOffset, OpCode, UpvalueIdx};
 use crate::spans::LightSpan;
 use crate::spans::ToSpan;
@@ -78,14 +78,9 @@ impl LambdaCtx {
     }
 }
 
-/// The type of a global as used inside of the compiler. Differs from
-/// Nix's own notion of "builtins" in that it can emit arbitrary code.
-/// Nix's builtins are wrapped inside of this type.
-pub type Global = Rc<dyn Fn(&mut Compiler, Span)>;
-
-/// The map of globally available functions that should implicitly
-/// be resolvable in the global scope.
-pub(crate) type GlobalsMap = HashMap<&'static str, Rc<dyn Fn(&mut Compiler, Span)>>;
+/// The map of globally available functions and other values that
+/// should implicitly be resolvable in the global scope.
+pub(crate) type GlobalsMap = HashMap<&'static str, Value>;
 
 /// Set of builtins that (if they exist) should be made available in
 /// the global scope, meaning that they can be accessed not just
@@ -1152,9 +1147,6 @@ impl Compiler<'_> {
     /// Open a new lambda context within which to compile a function,
     /// closure or thunk.
     fn new_context(&mut self) {
-        // This must inherit the scope-poisoning status of the parent
-        // in order for upvalue resolution to work correctly with
-        // poisoned identifiers.
         self.contexts.push(self.context().inherit());
     }
 
@@ -1165,16 +1157,10 @@ impl Compiler<'_> {
         let name = name.into();
         let depth = self.scope().scope_depth();
 
-        // Do this little dance to get ahold of the *static* key and
-        // use it for poisoning if required.
-        let key: Option<&'static str> = match self.globals.get_key_value(name.as_str()) {
-            Some((key, _)) => Some(*key),
-            None => None,
-        };
-
-        if let Some(global_ident) = key {
+        // Do this little dance to turn name:&'a str into the same
+        // string with &'static lifetime, as required by WarningKind
+        if let Some((global_ident, _)) = self.globals.get_key_value(name.as_str()) {
             self.emit_warning(node, WarningKind::ShadowedGlobal(global_ident));
-            self.scope_mut().poison(global_ident, depth);
         }
 
         let span = self.span_for(node);
@@ -1255,8 +1241,7 @@ pub fn prepare_globals(
     Rc::new_cyclic(Box::new(move |weak: &Weak<GlobalsMap>| {
         // First step is to construct the builtins themselves as
         // `NixAttrs`.
-        let mut builtins_under_construction: HashMap<&'static str, Value> =
-            HashMap::from_iter(builtins.into_iter());
+        let mut builtins: GlobalsMap = HashMap::from_iter(builtins.into_iter());
 
         // At this point, optionally insert `import` if enabled. To
         // "tie the knot" of `import` needing the full set of globals
@@ -1264,7 +1249,7 @@ pub fn prepare_globals(
         // here.
         if enable_import {
             let import = Value::Builtin(import::builtins_import(weak, source.clone()));
-            builtins_under_construction.insert("import", import);
+            builtins.insert("import", import);
         }
 
         // Next, the actual map of globals is constructed and
@@ -1273,10 +1258,8 @@ pub fn prepare_globals(
         let mut globals: GlobalsMap = HashMap::new();
 
         for global in GLOBAL_BUILTINS {
-            if let Some(builtin) = builtins_under_construction.get(global).cloned() {
-                let global_builtin: Global =
-                    Rc::new(move |c, s| c.emit_constant(builtin.clone(), &s));
-                globals.insert(global, global_builtin);
+            if let Some(builtin) = builtins.get(global).cloned() {
+                globals.insert(global, builtin);
             }
         }
 
@@ -1288,58 +1271,29 @@ pub fn prepare_globals(
         // `builtins.builtins`, it would only happen once as the thunk
         // is then resolved.
         let weak_globals = weak.clone();
-        builtins_under_construction.insert(
+        builtins.insert(
             "builtins",
             Value::Thunk(Thunk::new_suspended_native(Rc::new(move |_| {
-                let file = source.add_file("builtins-dot-builtins.nix".into(), "builtins".into());
-                let span = file.span;
-                let mut observer = NoOpObserver::default();
-
-                let mut compiler = Compiler::new(
-                    None,
-                    file,
-                    weak_globals
-                        .upgrade()
-                        .expect("globals dropped while still in use"),
-                    &mut observer,
-                )?;
-
-                weak_globals.upgrade().unwrap().get("builtins").unwrap()(&mut compiler, span);
-
-                Ok(compiler.chunk().constants[0].clone())
+                Ok(weak_globals
+                    .upgrade()
+                    .unwrap()
+                    .get("builtins")
+                    .cloned()
+                    .unwrap())
             }))),
         );
 
         // This is followed by the actual `builtins` attribute set
         // being constructed and inserted in the global scope.
-        let builtins_set =
-            Value::attrs(NixAttrs::from_iter(builtins_under_construction.into_iter()));
         globals.insert(
             "builtins",
-            Rc::new(move |c, s| c.emit_constant(builtins_set.clone(), &s)),
+            Value::attrs(NixAttrs::from_iter(builtins.into_iter())),
         );
 
         // Finally insert the compiler-internal "magic" builtins for top-level values.
-        globals.insert(
-            "true",
-            Rc::new(|compiler, span| {
-                compiler.push_op(OpCode::OpTrue, &span);
-            }),
-        );
-
-        globals.insert(
-            "false",
-            Rc::new(|compiler, span| {
-                compiler.push_op(OpCode::OpFalse, &span);
-            }),
-        );
-
-        globals.insert(
-            "null",
-            Rc::new(|compiler, span| {
-                compiler.push_op(OpCode::OpNull, &span);
-            }),
-        );
+        globals.insert("true", Value::Bool(true));
+        globals.insert("false", Value::Bool(false));
+        globals.insert("null", Value::Null);
 
         globals
     }))
diff --git a/tvix/eval/src/compiler/optimiser.rs b/tvix/eval/src/compiler/optimiser.rs
index af0ad2256e..ace5335d68 100644
--- a/tvix/eval/src/compiler/optimiser.rs
+++ b/tvix/eval/src/compiler/optimiser.rs
@@ -37,10 +37,10 @@ fn is_lit_bool(expr: ast::Expr) -> LitBool {
 fn optimise_bin_op(c: &mut Compiler, slot: LocalIdx, expr: ast::Expr) -> ast::Expr {
     use ast::BinOpKind;
 
-    // bail out of this check if the user has poisoned either `true`
+    // bail out of this check if the user has overridden either `true`
     // or `false` identifiers. Note that they will have received a
     // separate warning about this for shadowing the global(s).
-    if c.scope().is_poisoned("true") || c.scope().is_poisoned("false") {
+    if c.is_user_defined("true") || c.is_user_defined("false") {
         return expr;
     }
 
diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs
index ef4b4d51e1..892727c107 100644
--- a/tvix/eval/src/compiler/scope.rs
+++ b/tvix/eval/src/compiler/scope.rs
@@ -2,8 +2,8 @@
 //! compiler.
 //!
 //! Scoping in Nix is fairly complicated, there are features like
-//! mutually recursive bindings, `with`, upvalue capturing, builtin
-//! poisoning and so on that introduce a fair bit of complexity.
+//! mutually recursive bindings, `with`, upvalue capturing, and so
+//! on that introduce a fair bit of complexity.
 //!
 //! Tvix attempts to do as much of the heavy lifting of this at
 //! compile time, and leave the runtime to mostly deal with known
@@ -75,7 +75,7 @@ impl Local {
     }
 }
 
-/// Represents the current position of a local as resolved in a scope.
+/// Represents the current position of an identifier as resolved in a scope.
 pub enum LocalPosition {
     /// Local is not known in this scope.
     Unknown,
@@ -171,14 +171,6 @@ pub struct Scope {
 
     /// Current size of the `with`-stack at runtime.
     with_stack_size: usize,
-
-    /// Users are allowed to override globally defined symbols like
-    /// `true`, `false` or `null` in scopes. We call this "scope
-    /// poisoning", as it requires runtime resolution of those tokens.
-    ///
-    /// To support this efficiently, the depth at which a poisoning
-    /// occured is tracked here.
-    poisoned_tokens: HashMap<&'static str, usize>,
 }
 
 impl Index<LocalIdx> for Scope {
@@ -190,43 +182,17 @@ impl Index<LocalIdx> for Scope {
 }
 
 impl Scope {
-    /// Mark a globally defined token as poisoned.
-    pub fn poison(&mut self, name: &'static str, depth: usize) {
-        match self.poisoned_tokens.entry(name) {
-            hash_map::Entry::Occupied(_) => {
-                /* do nothing, as the token is already poisoned at a
-                 * lower scope depth */
-            }
-            hash_map::Entry::Vacant(entry) => {
-                entry.insert(depth);
-            }
-        }
-    }
-
     /// Inherit scope details from a parent scope (required for
     /// correctly nesting scopes in lambdas and thunks when special
-    /// scope features like poisoning are present).
+    /// scope features like dynamic resolution are present).
     pub fn inherit(&self) -> Self {
         Self {
-            poisoned_tokens: self.poisoned_tokens.clone(),
             scope_depth: self.scope_depth + 1,
             with_stack_size: self.with_stack_size,
             ..Default::default()
         }
     }
 
-    /// Check whether a given token is poisoned.
-    pub fn is_poisoned(&self, name: &str) -> bool {
-        self.poisoned_tokens.contains_key(name)
-    }
-
-    /// "Unpoison" tokens that were poisoned at the current depth.
-    /// Used when scopes are closed.
-    fn unpoison(&mut self) {
-        self.poisoned_tokens
-            .retain(|_, poisoned_at| *poisoned_at != self.scope_depth);
-    }
-
     /// Increase the `with`-stack size of this scope.
     pub fn push_with(&mut self) {
         self.with_stack_size += 1;
@@ -365,10 +331,6 @@ impl Scope {
     pub fn end_scope(&mut self) -> (usize, Vec<codemap::Span>) {
         debug_assert!(self.scope_depth != 0, "can not end top scope");
 
-        // If this scope poisoned any builtins or special identifiers,
-        // they need to be reset.
-        self.unpoison();
-
         let mut pops = 0;
         let mut unused_spans = vec![];
 
diff --git a/tvix/eval/src/opcode.rs b/tvix/eval/src/opcode.rs
index 0ed0ceb020..a49bee425d 100644
--- a/tvix/eval/src/opcode.rs
+++ b/tvix/eval/src/opcode.rs
@@ -63,11 +63,6 @@ pub enum OpCode {
     /// Discard a value from the stack.
     OpPop,
 
-    // Push a literal value.
-    OpNull,
-    OpTrue,
-    OpFalse,
-
     // Unary operators
     OpInvert,
     OpNegate,
diff --git a/tvix/eval/src/vm.rs b/tvix/eval/src/vm.rs
index 06ead175ee..818b5ff9b1 100644
--- a/tvix/eval/src/vm.rs
+++ b/tvix/eval/src/vm.rs
@@ -750,10 +750,6 @@ impl<'o> VM<'o> {
             OpCode::OpMore => cmp_op!(self, >),
             OpCode::OpMoreOrEq => cmp_op!(self, >=),
 
-            OpCode::OpNull => self.push(Value::Null),
-            OpCode::OpTrue => self.push(Value::Bool(true)),
-            OpCode::OpFalse => self.push(Value::Bool(false)),
-
             OpCode::OpAttrs(Count(count)) => self.run_attrset(count)?,
 
             OpCode::OpAttrsUpdate => {