about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--tvix/eval/src/compiler/mod.rs179
-rw-r--r--tvix/eval/src/compiler/scope.rs189
2 files changed, 196 insertions, 172 deletions
diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs
index 63ffe61bcd98..17ea8921a0c3 100644
--- a/tvix/eval/src/compiler/mod.rs
+++ b/tvix/eval/src/compiler/mod.rs
@@ -13,11 +13,13 @@
 //! by the code in this module, `debug_assert!` has been used to catch
 //! mistakes early during development.
 
+mod scope;
+
 use path_clean::PathClean;
 use rnix::ast::{self, AstToken, HasEntry};
 use rowan::ast::AstNode;
 use smol_str::SmolStr;
-use std::collections::{hash_map, HashMap};
+use std::collections::HashMap;
 use std::path::{Path, PathBuf};
 use std::rc::Rc;
 
@@ -27,6 +29,8 @@ use crate::opcode::{CodeIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx};
 use crate::value::{Closure, Lambda, Value};
 use crate::warnings::{EvalWarning, WarningKind};
 
+use self::scope::{Depth, Local, LocalPosition, Scope, Upvalue, With};
+
 /// Represents the result of compiling a piece of Nix code. If
 /// compilation was successful, the resulting bytecode can be passed
 /// to the VM.
@@ -36,177 +40,6 @@ pub struct CompilationOutput {
     pub errors: Vec<Error>,
 }
 
-/// Represents the initialisation status of a variable, tracking
-/// whether it is only known or also already defined.
-enum Depth {
-    /// Variable is defined and located at the given depth.
-    At(usize),
-
-    /// Variable is known but not yet defined.
-    Unitialised,
-}
-
-impl Depth {
-    /// Does this variable live above the other given depth?
-    fn above(&self, theirs: usize) -> bool {
-        match self {
-            Depth::Unitialised => false,
-            Depth::At(ours) => *ours > theirs,
-        }
-    }
-
-    /// Does this variable live below the other given depth?
-    fn below(&self, theirs: usize) -> bool {
-        match self {
-            Depth::Unitialised => false,
-            Depth::At(ours) => *ours < theirs,
-        }
-    }
-}
-
-/// Represents a single local already known to the compiler.
-struct Local {
-    // Definition name, which can be different kinds of tokens (plain
-    // string or identifier). Nix does not allow dynamic names inside
-    // of `let`-expressions.
-    name: String,
-
-    // Syntax node at which this local was declared.
-    node: Option<rnix::SyntaxNode>,
-
-    // Scope depth of this local.
-    depth: Depth,
-
-    // Phantom locals are not actually accessible by users (e.g.
-    // intermediate values used for `with`).
-    phantom: bool,
-
-    // Is this local known to have been used at all?
-    used: bool,
-}
-
-/// Represents a stack offset containing keys which are currently
-/// in-scope through a with expression.
-#[derive(Debug)]
-struct With {}
-
-#[derive(Debug, PartialEq)]
-enum Upvalue {
-    /// This upvalue captures a local from the stack.
-    Stack(StackIdx),
-
-    /// This upvalue captures an enclosing upvalue.
-    Upvalue(UpvalueIdx),
-
-    /// This upvalue captures a dynamically resolved value (i.e.
-    /// `with`).
-    Dynamic(SmolStr),
-}
-
-/// Represents the current position of a local as resolved in a scope.
-enum LocalPosition {
-    /// Local is not known in this scope.
-    Unknown,
-
-    /// Local is known and defined at the given stack index.
-    Known(StackIdx),
-
-    /// Local is known, but is being accessed recursively within its
-    /// own initialisation. Depending on context, this is either an
-    /// error or forcing a closure/thunk.
-    Recursive(StackIdx),
-}
-
-/// Represents a scope known during compilation, which can be resolved
-/// directly to stack indices.
-///
-/// TODO(tazjin): `with`-stack
-/// TODO(tazjin): flag "specials" (e.g. note depth if builtins are
-/// overridden)
-#[derive(Default)]
-struct Scope {
-    locals: Vec<Local>,
-    upvalues: Vec<Upvalue>,
-
-    // How many scopes "deep" are these locals?
-    scope_depth: usize,
-
-    // Stack indices of attribute sets currently in scope through
-    // `with`.
-    with_stack: Vec<With>,
-
-    // Users are allowed to override globally defined symbols like
-    // `true`, `false` or `null` in scopes. We call this "scope
-    // poisoning", as it requires runtime resolution of those tokens.
-    //
-    // To support this efficiently, the depth at which a poisoning
-    // occured is tracked here.
-    poisoned_tokens: HashMap<&'static str, usize>,
-}
-
-impl Scope {
-    /// Mark a globally defined token as poisoned.
-    fn poison(&mut self, name: &'static str, depth: usize) {
-        match self.poisoned_tokens.entry(name) {
-            hash_map::Entry::Occupied(_) => {
-                /* do nothing, as the token is already poisoned at a
-                 * lower scope depth */
-            }
-            hash_map::Entry::Vacant(entry) => {
-                entry.insert(depth);
-            }
-        }
-    }
-
-    /// Check whether a given token is poisoned.
-    fn is_poisoned(&self, name: &str) -> bool {
-        self.poisoned_tokens.contains_key(name)
-    }
-
-    /// "Unpoison" tokens that were poisoned at a given depth. Used
-    /// when scopes are closed.
-    fn unpoison(&mut self, depth: usize) {
-        self.poisoned_tokens
-            .retain(|_, poisoned_at| *poisoned_at != depth);
-    }
-
-    /// Resolve the stack index of a statically known local.
-    fn resolve_local(&mut self, name: &str) -> LocalPosition {
-        for (idx, local) in self.locals.iter_mut().enumerate().rev() {
-            if !local.phantom && local.name == name {
-                local.used = true;
-
-                match local.depth {
-                    // This local is still being initialised, meaning
-                    // that we know its final runtime stack position,
-                    // but it is not yet on the stack.
-                    Depth::Unitialised => return LocalPosition::Recursive(StackIdx(idx)),
-
-                    // This local is known, but we need to account for
-                    // uninitialised variables in this "initialiser
-                    // stack".
-                    Depth::At(_) => return LocalPosition::Known(self.resolve_uninit(idx)),
-                }
-            }
-        }
-
-        LocalPosition::Unknown
-    }
-
-    /// Return the "initialiser stack slot" of a value, that is the
-    /// stack slot of a value which might only exist during the
-    /// initialisation of another. This requires accounting for the
-    /// stack offsets of any unitialised variables.
-    fn resolve_uninit(&mut self, locals_idx: usize) -> StackIdx {
-        StackIdx(
-            self.locals[..locals_idx]
-                .iter()
-                .filter(|local| matches!(local.depth, Depth::At(_)))
-                .count(),
-        )
-    }
-}
-
 /// Represents the lambda currently being compiled.
 struct LambdaCtx {
     lambda: Lambda,
@@ -222,6 +55,8 @@ impl LambdaCtx {
     }
 }
 
+/// Alias for the map of globally available functions that should
+/// implicitly be resolvable in the global scope.
 type GlobalsMap = HashMap<&'static str, Rc<dyn Fn(&mut Compiler)>>;
 
 struct Compiler {
diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs
new file mode 100644
index 000000000000..93aa39a0d86f
--- /dev/null
+++ b/tvix/eval/src/compiler/scope.rs
@@ -0,0 +1,189 @@
+//! This module implements the scope-tracking logic of the Tvix
+//! compiler.
+//!
+//! Scoping in Nix is fairly complicated, there are features like
+//! mutually recursive bindings, `with`, upvalue capturing, builtin
+//! poisoning and so on that introduce a fair bit of complexity.
+//!
+//! Tvix attempts to do as much of the heavy lifting of this at
+//! compile time, and leave the runtime to mostly deal with known
+//! stack indices. To do this, the compiler simulates where locals
+//! will be at runtime using the data structures implemented here.
+
+use std::collections::{hash_map, HashMap};
+
+use smol_str::SmolStr;
+
+use crate::opcode::{StackIdx, UpvalueIdx};
+
+/// Represents the initialisation status of a variable, tracking
+/// whether it is only known or also already defined.
+pub enum Depth {
+    /// Variable is defined and located at the given depth.
+    At(usize),
+
+    /// Variable is known but not yet defined.
+    Unitialised,
+}
+
+impl Depth {
+    /// Does this variable live above the other given depth?
+    pub fn above(&self, theirs: usize) -> bool {
+        match self {
+            Depth::Unitialised => false,
+            Depth::At(ours) => *ours > theirs,
+        }
+    }
+
+    /// Does this variable live below the other given depth?
+    pub fn below(&self, theirs: usize) -> bool {
+        match self {
+            Depth::Unitialised => false,
+            Depth::At(ours) => *ours < theirs,
+        }
+    }
+}
+
+/// Represents a single local already known to the compiler.
+pub struct Local {
+    // Definition name, which can be different kinds of tokens (plain
+    // string or identifier). Nix does not allow dynamic names inside
+    // of `let`-expressions.
+    pub name: String,
+
+    // Syntax node at which this local was declared.
+    pub node: Option<rnix::SyntaxNode>,
+
+    // Scope depth of this local.
+    pub depth: Depth,
+
+    // Phantom locals are not actually accessible by users (e.g.
+    // intermediate values used for `with`).
+    pub phantom: bool,
+
+    // Is this local known to have been used at all?
+    pub used: bool,
+}
+
+/// Represents an entry on the runtime with-stack.
+#[derive(Debug)]
+pub struct With {}
+
+/// Represents the current position of a local as resolved in a scope.
+pub enum LocalPosition {
+    /// Local is not known in this scope.
+    Unknown,
+
+    /// Local is known and defined at the given stack index.
+    Known(StackIdx),
+
+    /// Local is known, but is being accessed recursively within its
+    /// own initialisation. Depending on context, this is either an
+    /// error or forcing a closure/thunk.
+    Recursive(StackIdx),
+}
+
+/// Represents the different ways in which upvalues can be captured in
+/// closures or thunks.
+#[derive(Debug, PartialEq)]
+pub enum Upvalue {
+    /// This upvalue captures a local from the stack.
+    Stack(StackIdx),
+
+    /// This upvalue captures an enclosing upvalue.
+    Upvalue(UpvalueIdx),
+
+    /// This upvalue captures a dynamically resolved value (i.e.
+    /// `with`).
+    Dynamic(SmolStr),
+}
+
+/// Represents a scope known during compilation, which can be resolved
+/// directly to stack indices.
+///
+/// TODO(tazjin): `with`-stack
+/// TODO(tazjin): flag "specials" (e.g. note depth if builtins are
+/// overridden)
+#[derive(Default)]
+pub struct Scope {
+    pub locals: Vec<Local>,
+    pub upvalues: Vec<Upvalue>,
+
+    // How many scopes "deep" are these locals?
+    pub scope_depth: usize,
+
+    // Stack indices of attribute sets currently in scope through
+    // `with`.
+    pub with_stack: Vec<With>,
+
+    // Users are allowed to override globally defined symbols like
+    // `true`, `false` or `null` in scopes. We call this "scope
+    // poisoning", as it requires runtime resolution of those tokens.
+    //
+    // To support this efficiently, the depth at which a poisoning
+    // occured is tracked here.
+    poisoned_tokens: HashMap<&'static str, usize>,
+}
+
+impl Scope {
+    /// Mark a globally defined token as poisoned.
+    pub fn poison(&mut self, name: &'static str, depth: usize) {
+        match self.poisoned_tokens.entry(name) {
+            hash_map::Entry::Occupied(_) => {
+                /* do nothing, as the token is already poisoned at a
+                 * lower scope depth */
+            }
+            hash_map::Entry::Vacant(entry) => {
+                entry.insert(depth);
+            }
+        }
+    }
+
+    /// Check whether a given token is poisoned.
+    pub fn is_poisoned(&self, name: &str) -> bool {
+        self.poisoned_tokens.contains_key(name)
+    }
+
+    /// "Unpoison" tokens that were poisoned at a given depth. Used
+    /// when scopes are closed.
+    pub fn unpoison(&mut self, depth: usize) {
+        self.poisoned_tokens
+            .retain(|_, poisoned_at| *poisoned_at != depth);
+    }
+
+    /// Resolve the stack index of a statically known local.
+    pub fn resolve_local(&mut self, name: &str) -> LocalPosition {
+        for (idx, local) in self.locals.iter_mut().enumerate().rev() {
+            if !local.phantom && local.name == name {
+                local.used = true;
+
+                match local.depth {
+                    // This local is still being initialised, meaning
+                    // that we know its final runtime stack position,
+                    // but it is not yet on the stack.
+                    Depth::Unitialised => return LocalPosition::Recursive(StackIdx(idx)),
+
+                    // This local is known, but we need to account for
+                    // uninitialised variables in this "initialiser
+                    // stack".
+                    Depth::At(_) => return LocalPosition::Known(self.resolve_uninit(idx)),
+                }
+            }
+        }
+
+        LocalPosition::Unknown
+    }
+
+    /// Return the "initialiser stack slot" of a value, that is the
+    /// stack slot of a value which might only exist during the
+    /// initialisation of another. This requires accounting for the
+    /// stack offsets of any unitialised variables.
+    fn resolve_uninit(&mut self, locals_idx: usize) -> StackIdx {
+        StackIdx(
+            self.locals[..locals_idx]
+                .iter()
+                .filter(|local| matches!(local.depth, Depth::At(_)))
+                .count(),
+        )
+    }
+}