diff options
author | Adam Joseph <adam@westernsemico.com> | 2022-10-26T12·16-0700 |
---|---|---|
committer | Adam Joseph <adam@westernsemico.com> | 2022-10-27T21·36+0000 |
commit | 8616f13a71e3fc186e01bdb4ada60503f233be99 (patch) | |
tree | eba8c8a944f3ba8707632f4a0abb566dcb19fc5d /tvix/eval/src/compiler | |
parent | 79ef6ee283182873090534d8d559dcde7ed3ebb4 (diff) |
feat(tvix/eval): builtins.import without RefCell r/5213
CL/6867 added support for builtins.import, which required a cyclic reference import->globals->builtins->import. This was implemented using a RefCell, which makes it possible to mutate the builtins during evaluation. The commit message for CL/6867 expressed a desire to eliminate this possibility: This opens up a potentially dangerous footgun in which we could mutate the builtins at runtime leading to different compiler invocations seeing different builtins, so it'd be nice to have some kind of "finalised" status for them or some such, but I'm not sure how to represent that atm. This CL replaces the RefCell with Rc::new_cyclic(), making the globals/builtins immutable once again. At VM runtime (once opcodes start executing) everything is the same as before this CL, except that the Rc<RefCell<>> introduced by CL/6867 is turned into an rc::Weak<>. The function passed to Rc::new_cyclic works very similarly to overlays in nixpkgs: a function takes its own result as an argument. However instead of laziness "breaking the cycle", Rust's Rc::new_cyclic() instead uses an rc::Weak. This is done to prevent memory leaks rather than divergence. This CL also resolves the following TODO from CL/6867: // TODO: encapsulate this import weirdness in builtins The main disadvantage of this CL is the fact that the VM now must ensure that it holds a strong reference to the globals while a program is executing; failure to do so will cause a panic when the weak reference in the builtins is upgrade()d. In theory it should be possible to create strong reference cycles the same way Rc::new_cyclic() creates weak cycles, but these cycles would cause a permanent memory leak -- without either an rc::Weak or RefCell there is no way to break the cycle. At some point we will have to implement some form of cycle collection; whatever library we choose for that purpose is likely to provide an "immutable strong reference cycle" primitive similar to Rc::new_cyclic(), and we should be able to simply drop it in. Signed-off-by: Adam Joseph <adam@westernsemico.com> Change-Id: I34bb5821628eb97e426bdb880b02e2097402adb7 Reviewed-on: https://cl.tvl.fyi/c/depot/+/7097 Tested-by: BuildkiteCI Reviewed-by: tazjin <tazjin@tvl.su>
Diffstat (limited to 'tvix/eval/src/compiler')
-rw-r--r-- | tvix/eval/src/compiler/mod.rs | 96 |
1 files changed, 51 insertions, 45 deletions
diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 2985c7e90e0b..34a15330c227 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -19,10 +19,9 @@ mod scope; use codemap::Span; use rnix::ast::{self, AstToken}; use smol_str::SmolStr; -use std::cell::RefCell; use std::collections::HashMap; use std::path::{Path, PathBuf}; -use std::rc::Rc; +use std::rc::{Rc, Weak}; use std::sync::Arc; use crate::chunk::Chunk; @@ -42,6 +41,10 @@ pub struct CompilationOutput { pub lambda: Rc<Lambda>, pub warnings: Vec<EvalWarning>, pub errors: Vec<Error>, + + // This field must outlive the rc::Weak reference which breaks + // the builtins -> import -> builtins reference cycle. + pub globals: Rc<GlobalsMap>, } /// Represents the lambda currently being compiled. @@ -69,11 +72,19 @@ impl LambdaCtx { } } -/// Alias for the map of globally available functions that should -/// implicitly be resolvable in the global scope. -type GlobalsMap = HashMap<&'static str, Rc<dyn Fn(&mut Compiler, Span)>>; +/// The map of globally available functions that should implicitly +/// be resolvable in the global scope. +pub type GlobalsMap = HashMap<&'static str, Rc<dyn Fn(&mut Compiler, Span)>>; + +/// Functions with this type are used to construct a +/// self-referential `builtins` object; it takes a weak reference to +/// its own result, similar to how nixpkgs' overlays work. +/// Rc::new_cyclic() is what "ties the knot". The heap allocation +/// (Box) and vtable (dyn) do not impair runtime or compile-time +/// performance; they exist only during compiler startup. +pub type GlobalsMapFunc = Box<dyn FnOnce(&Weak<GlobalsMap>) -> GlobalsMap>; -struct Compiler<'observer> { +pub struct Compiler<'observer> { contexts: Vec<LambdaCtx>, warnings: Vec<EvalWarning>, errors: Vec<Error>, @@ -85,7 +96,7 @@ struct Compiler<'observer> { /// Each global has an associated token, which when encountered as /// an identifier is resolved against the scope poisoning logic, /// and a function that should emit code for the token. - globals: GlobalsMap, + globals: Rc<GlobalsMap>, /// File reference in the codemap contains all known source code /// and is used to track the spans from which instructions where @@ -108,7 +119,7 @@ impl<'observer> Compiler<'observer> { pub(crate) fn new( location: Option<PathBuf>, file: Arc<codemap::File>, - globals: Rc<RefCell<HashMap<&'static str, Value>>>, + globals: Rc<GlobalsMap>, observer: &'observer mut dyn CompilerObserver, ) -> EvalResult<Self> { let mut root_dir = match location { @@ -136,8 +147,6 @@ impl<'observer> Compiler<'observer> { root_dir.pop(); } - let globals = globals.borrow(); - #[cfg(not(target_arch = "wasm32"))] debug_assert!(root_dir.is_absolute()); @@ -145,7 +154,7 @@ impl<'observer> Compiler<'observer> { root_dir, file, observer, - globals: prepare_globals(&globals), + globals, contexts: vec![LambdaCtx::new()], warnings: vec![], errors: vec![], @@ -186,7 +195,7 @@ impl Compiler<'_> { /// Emit a single constant to the current bytecode chunk and track /// the source span from which it was compiled. - fn emit_constant<T: ToSpan>(&mut self, value: Value, node: &T) { + pub(super) fn emit_constant<T: ToSpan>(&mut self, value: Value, node: &T) { let idx = self.chunk().push_constant(value); self.push_op(OpCode::OpConstant(idx), node); } @@ -1174,54 +1183,50 @@ fn optimise_tail_call(chunk: &mut Chunk) { /// Prepare the full set of globals from additional globals supplied /// by the caller of the compiler, as well as the built-in globals -/// that are always part of the language. +/// that are always part of the language. This also "ties the knot" +/// required in order for import to have a reference cycle back to +/// the globals. /// /// Note that all builtin functions are *not* considered part of the /// language in this sense and MUST be supplied as additional global /// values, including the `builtins` set itself. -fn prepare_globals(additional: &HashMap<&'static str, Value>) -> GlobalsMap { - let mut globals: GlobalsMap = HashMap::new(); - - globals.insert( - "true", - Rc::new(|compiler, span| { - compiler.push_op(OpCode::OpTrue, &span); - }), - ); - - globals.insert( - "false", - Rc::new(|compiler, span| { - compiler.push_op(OpCode::OpFalse, &span); - }), - ); - - globals.insert( - "null", - Rc::new(|compiler, span| { - compiler.push_op(OpCode::OpNull, &span); - }), - ); - - for (ident, value) in additional.iter() { - let value: Value = value.clone(); +pub fn prepare_globals(additional: GlobalsMapFunc) -> Rc<GlobalsMap> { + Rc::new_cyclic(Box::new(|weak: &Weak<GlobalsMap>| { + let mut globals = additional(weak); + globals.insert( - ident, - Rc::new(move |compiler, span| compiler.emit_constant(value.clone(), &span)), + "true", + Rc::new(|compiler, span| { + compiler.push_op(OpCode::OpTrue, &span); + }), + ); + + globals.insert( + "false", + Rc::new(|compiler, span| { + compiler.push_op(OpCode::OpFalse, &span); + }), + ); + + globals.insert( + "null", + Rc::new(|compiler, span| { + compiler.push_op(OpCode::OpNull, &span); + }), ); - } - globals + globals + })) } pub fn compile( expr: &ast::Expr, location: Option<PathBuf>, file: Arc<codemap::File>, - globals: Rc<RefCell<HashMap<&'static str, Value>>>, + globals: Rc<GlobalsMap>, observer: &mut dyn CompilerObserver, ) -> EvalResult<CompilationOutput> { - let mut c = Compiler::new(location, file, globals, observer)?; + let mut c = Compiler::new(location, file, globals.clone(), observer)?; let root_span = c.span_for(expr); let root_slot = c.scope_mut().declare_phantom(root_span, false); @@ -1240,5 +1245,6 @@ pub fn compile( lambda, warnings: c.warnings, errors: c.errors, + globals: globals, }) } |