about summary refs log tree commit diff
path: root/tvix/eval/src/compiler/scope.rs
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/eval/src/compiler/scope.rs')
-rw-r--r--tvix/eval/src/compiler/scope.rs378
1 files changed, 378 insertions, 0 deletions
diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs
new file mode 100644
index 000000000000..892727c107c9
--- /dev/null
+++ b/tvix/eval/src/compiler/scope.rs
@@ -0,0 +1,378 @@
+//! This module implements the scope-tracking logic of the Tvix
+//! compiler.
+//!
+//! Scoping in Nix is fairly complicated, there are features like
+//! mutually recursive bindings, `with`, upvalue capturing, and so
+//! on that introduce a fair bit of complexity.
+//!
+//! Tvix attempts to do as much of the heavy lifting of this at
+//! compile time, and leave the runtime to mostly deal with known
+//! stack indices. To do this, the compiler simulates where locals
+//! will be at runtime using the data structures implemented here.
+
+use std::{
+    collections::{hash_map, HashMap},
+    ops::Index,
+};
+
+use smol_str::SmolStr;
+
+use crate::opcode::{StackIdx, UpvalueIdx};
+
+#[derive(Debug)]
+enum LocalName {
+    /// Normally declared local with a statically known name.
+    Ident(String),
+
+    /// Phantom stack value (e.g. attribute set used for `with`) that
+    /// must be accounted for to calculate correct stack offsets.
+    Phantom,
+}
+
+/// Represents a single local already known to the compiler.
+#[derive(Debug)]
+pub struct Local {
+    /// Identifier of this local. This is always a statically known
+    /// value (Nix does not allow dynamic identifier names in locals),
+    /// or a "phantom" value not accessible by users.
+    name: LocalName,
+
+    /// Source span at which this local was declared.
+    pub span: codemap::Span,
+
+    /// Scope depth of this local.
+    pub depth: usize,
+
+    /// Is this local initialised?
+    pub initialised: bool,
+
+    /// Is this local known to have been used at all?
+    pub used: bool,
+
+    /// Does this local need to be finalised after the enclosing scope
+    /// is completely constructed?
+    pub needs_finaliser: bool,
+
+    /// Does this local's upvalues contain a reference to itself?
+    pub must_thunk: bool,
+}
+
+impl Local {
+    /// Retrieve the name of the given local (if available).
+    pub fn name(&self) -> Option<SmolStr> {
+        match &self.name {
+            LocalName::Phantom => None,
+            LocalName::Ident(name) => Some(SmolStr::new(name)),
+        }
+    }
+
+    /// Is this local intentionally ignored? (i.e. name starts with `_`)
+    pub fn is_ignored(&self) -> bool {
+        match &self.name {
+            LocalName::Ident(name) => name.starts_with('_'),
+            LocalName::Phantom => false,
+        }
+    }
+}
+
+/// Represents the current position of an identifier as resolved in a scope.
+pub enum LocalPosition {
+    /// Local is not known in this scope.
+    Unknown,
+
+    /// Local is known at the given local index.
+    Known(LocalIdx),
+
+    /// Local is known, but is being accessed recursively within its
+    /// own initialisation. Depending on context, this is either an
+    /// error or forcing a closure/thunk.
+    Recursive(LocalIdx),
+}
+
+/// Represents the different ways in which upvalues can be captured in
+/// closures or thunks.
+#[derive(Clone, Debug, PartialEq, Eq)]
+pub enum UpvalueKind {
+    /// This upvalue captures a local from the stack.
+    Local(LocalIdx),
+
+    /// This upvalue captures an enclosing upvalue.
+    Upvalue(UpvalueIdx),
+}
+
+#[derive(Clone, Debug)]
+pub struct Upvalue {
+    pub kind: UpvalueKind,
+    pub span: codemap::Span,
+}
+
+/// The index of a local in the scope's local array at compile time.
+#[repr(transparent)]
+#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd)]
+pub struct LocalIdx(usize);
+
+/// Helper struct for indexing over `Scope::locals` by name.
+#[derive(Debug)]
+enum ByName {
+    Single(LocalIdx),
+    Shadowed(Vec<LocalIdx>),
+}
+
+impl ByName {
+    /// Add an additional index for this name.
+    fn add_idx(&mut self, new: LocalIdx) {
+        match self {
+            ByName::Shadowed(indices) => indices.push(new),
+            ByName::Single(idx) => {
+                *self = ByName::Shadowed(vec![*idx, new]);
+            }
+        }
+    }
+
+    /// Remove the most recent index for this name, unless it is a
+    /// single. Returns `true` if an entry was removed.
+    fn remove_idx(&mut self) -> bool {
+        match self {
+            ByName::Single(_) => false,
+            ByName::Shadowed(indices) => match indices[..] {
+                [fst, _snd] => {
+                    *self = ByName::Single(fst);
+                    true
+                }
+                _ => {
+                    indices.pop();
+                    true
+                }
+            },
+        }
+    }
+
+    /// Return the most recent index.
+    pub fn index(&self) -> LocalIdx {
+        match self {
+            ByName::Single(idx) => *idx,
+            ByName::Shadowed(vec) => *vec.last().unwrap(),
+        }
+    }
+}
+
+/// Represents a scope known during compilation, which can be resolved
+/// directly to stack indices.
+#[derive(Debug, Default)]
+pub struct Scope {
+    locals: Vec<Local>,
+    pub upvalues: Vec<Upvalue>,
+
+    /// Secondary by-name index over locals.
+    by_name: HashMap<String, ByName>,
+
+    /// How many scopes "deep" are these locals?
+    scope_depth: usize,
+
+    /// Current size of the `with`-stack at runtime.
+    with_stack_size: usize,
+}
+
+impl Index<LocalIdx> for Scope {
+    type Output = Local;
+
+    fn index(&self, index: LocalIdx) -> &Self::Output {
+        &self.locals[index.0]
+    }
+}
+
+impl Scope {
+    /// Inherit scope details from a parent scope (required for
+    /// correctly nesting scopes in lambdas and thunks when special
+    /// scope features like dynamic resolution are present).
+    pub fn inherit(&self) -> Self {
+        Self {
+            scope_depth: self.scope_depth + 1,
+            with_stack_size: self.with_stack_size,
+            ..Default::default()
+        }
+    }
+
+    /// Increase the `with`-stack size of this scope.
+    pub fn push_with(&mut self) {
+        self.with_stack_size += 1;
+    }
+
+    /// Decrease the `with`-stack size of this scope.
+    pub fn pop_with(&mut self) {
+        self.with_stack_size -= 1;
+    }
+
+    /// Does this scope currently require dynamic runtime resolution
+    /// of identifiers that could not be found?
+    pub fn has_with(&self) -> bool {
+        self.with_stack_size > 0
+    }
+
+    /// Resolve the stack index of a statically known local.
+    pub fn resolve_local(&mut self, name: &str) -> LocalPosition {
+        if let Some(by_name) = self.by_name.get(name) {
+            let idx = by_name.index();
+            let local = self
+                .locals
+                .get_mut(idx.0)
+                .expect("invalid compiler state: indexed local missing");
+
+            local.used = true;
+
+            // This local is still being initialised, meaning that
+            // we know its final runtime stack position, but it is
+            // not yet on the stack.
+            if !local.initialised {
+                return LocalPosition::Recursive(idx);
+            }
+
+            return LocalPosition::Known(idx);
+        }
+
+        LocalPosition::Unknown
+    }
+
+    /// Declare a local variable that occupies a stack slot and should
+    /// be accounted for, but is not directly accessible by users
+    /// (e.g. attribute sets used for `with`).
+    pub fn declare_phantom(&mut self, span: codemap::Span, initialised: bool) -> LocalIdx {
+        let idx = self.locals.len();
+        self.locals.push(Local {
+            initialised,
+            span,
+            name: LocalName::Phantom,
+            depth: self.scope_depth,
+            needs_finaliser: false,
+            must_thunk: false,
+            used: true,
+        });
+
+        LocalIdx(idx)
+    }
+
+    /// Declare an uninitialised, named local variable.
+    ///
+    /// Returns the `LocalIdx` of the new local, and optionally the
+    /// index of a previous local shadowed by this one.
+    pub fn declare_local(
+        &mut self,
+        name: String,
+        span: codemap::Span,
+    ) -> (LocalIdx, Option<LocalIdx>) {
+        let idx = LocalIdx(self.locals.len());
+        self.locals.push(Local {
+            name: LocalName::Ident(name.clone()),
+            span,
+            depth: self.scope_depth,
+            initialised: false,
+            needs_finaliser: false,
+            must_thunk: false,
+            used: false,
+        });
+
+        let mut shadowed = None;
+        match self.by_name.entry(name) {
+            hash_map::Entry::Occupied(mut entry) => {
+                let existing = entry.get_mut();
+                shadowed = Some(existing.index());
+                existing.add_idx(idx);
+            }
+            hash_map::Entry::Vacant(entry) => {
+                entry.insert(ByName::Single(idx));
+            }
+        }
+
+        (idx, shadowed)
+    }
+
+    /// Mark local as initialised after compiling its expression.
+    pub fn mark_initialised(&mut self, idx: LocalIdx) {
+        self.locals[idx.0].initialised = true;
+    }
+
+    /// Mark local as needing a finaliser.
+    pub fn mark_needs_finaliser(&mut self, idx: LocalIdx) {
+        self.locals[idx.0].needs_finaliser = true;
+    }
+
+    /// Mark local as must be wrapped in a thunk.  This happens if
+    /// the local has a reference to itself in its upvalues.
+    pub fn mark_must_thunk(&mut self, idx: LocalIdx) {
+        self.locals[idx.0].must_thunk = true;
+    }
+
+    /// Compute the runtime stack index for a given local by
+    /// accounting for uninitialised variables at scopes below this
+    /// one.
+    pub fn stack_index(&self, idx: LocalIdx) -> StackIdx {
+        let uninitialised_count = self.locals[..(idx.0)]
+            .iter()
+            .filter(|l| !l.initialised && self[idx].depth > l.depth)
+            .count();
+
+        StackIdx(idx.0 - uninitialised_count)
+    }
+
+    /// Increase the current scope depth (e.g. within a new bindings
+    /// block, or `with`-scope).
+    pub fn begin_scope(&mut self) {
+        self.scope_depth += 1;
+    }
+
+    /// Decrease the scope depth and remove all locals still tracked
+    /// for the current scope.
+    ///
+    /// Returns the count of locals that were dropped while marked as
+    /// initialised (used by the compiler to determine whether to emit
+    /// scope cleanup operations), as well as the spans of the
+    /// definitions of unused locals (used by the compiler to emit
+    /// unused binding warnings).
+    pub fn end_scope(&mut self) -> (usize, Vec<codemap::Span>) {
+        debug_assert!(self.scope_depth != 0, "can not end top scope");
+
+        let mut pops = 0;
+        let mut unused_spans = vec![];
+
+        // TL;DR - iterate from the back while things belonging to the
+        // ended scope still exist.
+        while self.locals.last().unwrap().depth == self.scope_depth {
+            if let Some(local) = self.locals.pop() {
+                // pop the local from the stack if it was actually
+                // initialised
+                if local.initialised {
+                    pops += 1;
+                }
+
+                // analyse whether the local was accessed during its
+                // lifetime, and emit a warning otherwise (unless the
+                // user explicitly chose to ignore it by prefixing the
+                // identifier with `_`)
+                if !local.used && !local.is_ignored() {
+                    unused_spans.push(local.span);
+                }
+
+                // remove the by-name index if this was a named local
+                if let LocalName::Ident(name) = local.name {
+                    if let hash_map::Entry::Occupied(mut entry) = self.by_name.entry(name) {
+                        // If no removal occured through `remove_idx`
+                        // (i.e. there was no shadowing going on),
+                        // nuke the whole entry.
+                        if !entry.get_mut().remove_idx() {
+                            entry.remove();
+                        }
+                    }
+                }
+            }
+        }
+
+        self.scope_depth -= 1;
+
+        (pops, unused_spans)
+    }
+
+    /// Access the current scope depth.
+    pub fn scope_depth(&self) -> usize {
+        self.scope_depth
+    }
+}