From 3d238c350b4c0b4430d694b15e89319a150af889 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Tue, 3 Jan 2023 22:30:49 +0300 Subject: refactor(tvix/eval): streamline construction of globals/builtins Previously the construction of globals (a compiler-only concept) and builtins (a (now) user-facing API) was intermingled between multiple different modules, and kind of difficult to understand. The complexity of this had grown in large part due to the implementation of `builtins.import`, which required the notorious "knot-tying" trick using Rc::new_cyclic (see cl/7097) for constructing the set of globals. As part of the new `Evaluation` API users should have the ability to bring their own builtins, and control explicitly whether or not impure builtins are available (regardless of whether they're compiled in or not). To streamline the construction and allow the new API features to work, this commit restructures things by making these changes: 1. The `tvix_eval::builtins` module is now only responsible for exporting sets of builtins. It no longer has any knowledge of whether or not certain sets (e.g. only pure, or pure+impure) are enabled, and it has no control over which builtins are globally available (this is now handled in the compiler). 2. The compiler module is now responsible for both constructing the final attribute set of builtins from the set of builtins supplied by a user, as well as for populating its globals (that is identifiers which are available at the top-level scope). 3. The `Evaluation` API now carries a `builtins` field which is populated with the pure builtins by default, and can be extended by users. 4. The `import` feature has been moved into the compiler, as a special case. In general, builtins no longer have the ability to reference the "fix point" of the globals set. This should not change any functionality, and in fact preserves minor differences between Tvix/Nix that we already had (such as `builtins.builtins` not existing). Change-Id: Icdf5dd50eb81eb9260d89269d6e08b1e67811a2c Reviewed-on: https://cl.tvl.fyi/c/depot/+/7738 Reviewed-by: sterni Autosubmit: tazjin Tested-by: BuildkiteCI Reviewed-by: flokli --- tvix/cli/src/main.rs | 2 +- tvix/eval/src/builtins/impure.rs | 111 ++++----------------------------- tvix/eval/src/builtins/mod.rs | 130 ++++++++++++--------------------------- tvix/eval/src/compiler/import.rs | 105 +++++++++++++++++++++++++++++++ tvix/eval/src/compiler/mod.rs | 103 ++++++++++++++++++++++++------- tvix/eval/src/lib.rs | 35 +++++++++-- tvix/eval/src/tests/mod.rs | 3 +- 7 files changed, 270 insertions(+), 219 deletions(-) create mode 100644 tvix/eval/src/compiler/import.rs diff --git a/tvix/cli/src/main.rs b/tvix/cli/src/main.rs index cef51ed19a16..6b9f4abe861b 100644 --- a/tvix/cli/src/main.rs +++ b/tvix/cli/src/main.rs @@ -40,7 +40,7 @@ struct Args { /// and the result itself. The return value indicates whether /// evaluation succeeded. fn interpret(code: &str, path: Option, args: &Args, explain: bool) -> bool { - let mut eval = tvix_eval::Evaluation::new(code, path); + let mut eval = tvix_eval::Evaluation::new_impure(code, path); eval.io_handle = Box::new(nix_compat::NixCompatIO::new()); eval.nix_path = args.nix_search_path.clone(); diff --git a/tvix/eval/src/builtins/impure.rs b/tvix/eval/src/builtins/impure.rs index d371d877977d..e8c032cc77e6 100644 --- a/tvix/eval/src/builtins/impure.rs +++ b/tvix/eval/src/builtins/impure.rs @@ -2,20 +2,17 @@ use builtin_macros::builtins; use smol_str::SmolStr; use std::{ - collections::BTreeMap, env, - rc::{Rc, Weak}, + rc::Rc, time::{SystemTime, UNIX_EPOCH}, }; use crate::{ - compiler::GlobalsMap, errors::ErrorKind, io::FileType, - observer::NoOpObserver, - value::{Builtin, BuiltinArgument, NixAttrs, Thunk}, + value::{NixAttrs, Thunk}, vm::VM, - SourceCode, Value, + Value, }; #[builtins] @@ -67,13 +64,13 @@ mod impure_builtins { /// Return all impure builtins, that is all builtins which may perform I/O /// outside of the VM and so cannot be used in all contexts (e.g. WASM). -pub(super) fn builtins() -> BTreeMap<&'static str, Value> { - let mut map: BTreeMap<&'static str, Value> = impure_builtins::builtins() +pub fn impure_builtins() -> Vec<(&'static str, Value)> { + let mut result = impure_builtins::builtins() .into_iter() - .map(|b| (b.name(), Value::Builtin(b))) - .collect(); + .map(super::builtin_tuple) + .collect::>(); - map.insert( + result.push(( "storeDir", Value::Thunk(Thunk::new_suspended_native(Rc::new(Box::new( |vm: &mut VM| match vm.io().store_dir() { @@ -81,7 +78,7 @@ pub(super) fn builtins() -> BTreeMap<&'static str, Value> { Some(dir) => Ok(Value::String(dir.into())), }, )))), - ); + )); // currentTime pins the time at which evaluation was started { @@ -92,94 +89,8 @@ pub(super) fn builtins() -> BTreeMap<&'static str, Value> { Err(err) => -(err.duration().as_secs() as i64), }; - map.insert("currentTime", Value::Integer(seconds)); + result.push(("currentTime", Value::Integer(seconds))); } - map -} - -/// Constructs and inserts the `import` builtin. This builtin is special in that -/// it needs to capture the [crate::SourceCode] structure to correctly track -/// source code locations while invoking a compiler. -// TODO: need to be able to pass through a CompilationObserver, too. -pub fn builtins_import(globals: &Weak, source: SourceCode) -> Builtin { - // This (very cheap, once-per-compiler-startup) clone exists - // solely in order to keep the borrow checker happy. It - // resolves the tension between the requirements of - // Rc::new_cyclic() and Builtin::new() - let globals = globals.clone(); - - Builtin::new( - "import", - &[BuiltinArgument { - strict: true, - name: "path", - }], - None, - move |mut args: Vec, vm: &mut VM| { - let mut path = super::coerce_value_to_path(&args.pop().unwrap(), vm)?; - if path.is_dir() { - path.push("default.nix"); - } - - let current_span = vm.current_light_span(); - - if let Some(cached) = vm.import_cache.get(&path) { - return Ok(cached.clone()); - } - - let contents = vm.io().read_to_string(path.clone())?; - - let parsed = rnix::ast::Root::parse(&contents); - let errors = parsed.errors(); - - let file = source.add_file(path.to_string_lossy().to_string(), contents); - - if !errors.is_empty() { - return Err(ErrorKind::ImportParseError { - path, - file, - errors: errors.to_vec(), - }); - } - - let result = crate::compiler::compile( - &parsed.tree().expr().unwrap(), - Some(path.clone()), - file, - // The VM must ensure that a strong reference to the - // globals outlives any self-references (which are - // weak) embedded within the globals. If the - // expect() below panics, it means that did not - // happen. - globals - .upgrade() - .expect("globals dropped while still in use"), - &mut NoOpObserver::default(), - ) - .map_err(|err| ErrorKind::ImportCompilerError { - path: path.clone(), - errors: vec![err], - })?; - - if !result.errors.is_empty() { - return Err(ErrorKind::ImportCompilerError { - path, - errors: result.errors, - }); - } - - // Compilation succeeded, we can construct a thunk from whatever it spat - // out and return that. - let res = Value::Thunk(Thunk::new_suspended(result.lambda, current_span)); - - vm.import_cache.insert(path, res.clone()); - - for warning in result.warnings { - vm.push_warning(warning); - } - - Ok(res) - }, - ) + result } diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 2e043a1b104f..01ef1678c7ee 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -3,17 +3,15 @@ //! See //tvix/eval/docs/builtins.md for a some context on the //! available builtins in Nix. -use crate::compiler::{GlobalsMap, GlobalsMapFunc}; -use crate::source::SourceCode; -use crate::value::BuiltinArgument; use std::cmp::{self, Ordering}; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashSet}; use std::path::PathBuf; -use std::rc::Rc; use builtin_macros::builtins; use regex::Regex; +use crate::arithmetic_op; +use crate::value::BuiltinArgument; use crate::warnings::WarningKind; use crate::{ errors::{ErrorKind, EvalResult}, @@ -21,13 +19,18 @@ use crate::{ vm::VM, }; -use crate::arithmetic_op; - use self::versions::{VersionPart, VersionPartsIter}; +mod versions; + #[cfg(feature = "impure")] -pub mod impure; -pub mod versions; +mod impure; + +#[cfg(feature = "impure")] +pub use impure::impure_builtins; + +// we set TVIX_CURRENT_SYSTEM in build.rs +pub const CURRENT_PLATFORM: &str = env!("TVIX_CURRENT_SYSTEM"); /// Coerce a Nix Value to a plain path, e.g. in order to access the /// file it points to via either `builtins.toPath` or an impure @@ -942,15 +945,37 @@ mod pure_builtins { } } -pub use pure_builtins::builtins as pure_builtins; +fn builtin_tuple(builtin: Builtin) -> (&'static str, Value) { + (builtin.name(), Value::Builtin(builtin)) +} + +/// The set of standard pure builtins in Nix, mostly concerned with +/// data structure manipulation (string, attrs, list, etc. functions). +pub fn pure_builtins() -> Vec<(&'static str, Value)> { + let mut result = pure_builtins::builtins() + .into_iter() + .map(builtin_tuple) + .collect::>(); + + // Pure-value builtins + result.push(("nixVersion", Value::String("2.3-compat-tvix-0.1".into()))); + result.push(("langVersion", Value::Integer(6))); + + result.push(( + "currentSystem", + crate::systems::llvm_triple_to_nix_double(CURRENT_PLATFORM).into(), + )); + + result +} /// Placeholder builtins that technically have a function which we do /// not yet implement, but which is also not easily observable from /// within a pure evaluation context. /// /// These are used as a crutch to make progress on nixpkgs evaluation. -fn placeholders() -> Vec { - vec![ +pub fn placeholders() -> Vec<(&'static str, Value)> { + let ph = vec![ Builtin::new( "addErrorContext", &[ @@ -1041,84 +1066,7 @@ fn placeholders() -> Vec { Ok(Value::Attrs(Box::new(attrs))) }, ), - ] -} -// we set TVIX_CURRENT_SYSTEM in build.rs -pub const CURRENT_PLATFORM: &str = env!("TVIX_CURRENT_SYSTEM"); - -/// Set of Nix builtins that are globally available. -pub fn global_builtins(source: SourceCode) -> GlobalsMapFunc { - Box::new(move |globals: &std::rc::Weak| { - let mut map: BTreeMap<&'static str, Value> = BTreeMap::new(); - - // Pure-value builtins - map.insert("nixVersion", Value::String("2.3-compat-tvix-0.1".into())); - - map.insert("langVersion", Value::Integer(6)); - - map.insert( - "currentSystem", - crate::systems::llvm_triple_to_nix_double(CURRENT_PLATFORM).into(), - ); - - let mut add_builtins = |builtins: Vec| { - for builtin in builtins { - map.insert(builtin.name(), Value::Builtin(builtin)); - } - }; - - add_builtins(pure_builtins()); - add_builtins(placeholders()); - - #[cfg(feature = "impure")] - { - map.extend(impure::builtins()); - - // We need to insert import into the builtins, but the - // builtins passed to import must have import *in it*. - let import = Value::Builtin(crate::builtins::impure::builtins_import(globals, source)); - - map.insert("import", import); - }; - - let mut globals: GlobalsMap = HashMap::new(); - - let builtins = Rc::new(NixAttrs::from_iter(map.into_iter())); - - // known global builtins from the builtins set. - for global in &[ - "abort", - "baseNameOf", - "derivation", - "derivationStrict", - "dirOf", - "fetchGit", - "fetchMercurial", - "fetchTarball", - "fromTOML", - "import", - "isNull", - "map", - "placeholder", - "removeAttrs", - "scopedImport", - "throw", - "toString", - ] { - if let Some(builtin) = builtins.select(global) { - let builtin = builtin.clone(); - globals.insert( - global, - Rc::new(move |c, s| c.emit_constant(builtin.clone(), &s)), - ); - } - } - - globals.insert( - "builtins", - Rc::new(move |c, s| c.emit_constant(Value::attrs(builtins.as_ref().clone()), &s)), - ); + ]; - globals - }) + ph.into_iter().map(builtin_tuple).collect() } diff --git a/tvix/eval/src/compiler/import.rs b/tvix/eval/src/compiler/import.rs new file mode 100644 index 000000000000..3a8847f2cbb4 --- /dev/null +++ b/tvix/eval/src/compiler/import.rs @@ -0,0 +1,105 @@ +//! This module implements the Nix language's `import` feature, which +//! is exposed as a builtin in the Nix language. +//! +//! This is not a typical builtin, as it needs access to internal +//! compiler and VM state (such as the [`crate::SourceCode`] +//! instance, or observers). + +use std::rc::Weak; + +use crate::{ + observer::NoOpObserver, + value::{Builtin, BuiltinArgument, Thunk}, + vm::VM, + ErrorKind, SourceCode, Value, +}; + +use super::GlobalsMap; +use crate::builtins::coerce_value_to_path; + +/// Constructs and inserts the `import` builtin. This builtin is special in that +/// it needs to capture the [crate::SourceCode] structure to correctly track +/// source code locations while invoking a compiler. +// TODO: need to be able to pass through a CompilationObserver, too. +// TODO: can the `SourceCode` come from the compiler? +pub(super) fn builtins_import(globals: &Weak, source: SourceCode) -> Builtin { + // This (very cheap, once-per-compiler-startup) clone exists + // solely in order to keep the borrow checker happy. It + // resolves the tension between the requirements of + // Rc::new_cyclic() and Builtin::new() + let globals = globals.clone(); + + Builtin::new( + "import", + &[BuiltinArgument { + strict: true, + name: "path", + }], + None, + move |mut args: Vec, vm: &mut VM| { + let mut path = coerce_value_to_path(&args.pop().unwrap(), vm)?; + if path.is_dir() { + path.push("default.nix"); + } + + let current_span = vm.current_light_span(); + + if let Some(cached) = vm.import_cache.get(&path) { + return Ok(cached.clone()); + } + + let contents = vm.io().read_to_string(path.clone())?; + + let parsed = rnix::ast::Root::parse(&contents); + let errors = parsed.errors(); + + let file = source.add_file(path.to_string_lossy().to_string(), contents); + + if !errors.is_empty() { + return Err(ErrorKind::ImportParseError { + path, + file, + errors: errors.to_vec(), + }); + } + + let result = crate::compiler::compile( + &parsed.tree().expr().unwrap(), + Some(path.clone()), + file, + // The VM must ensure that a strong reference to the + // globals outlives any self-references (which are + // weak) embedded within the globals. If the + // expect() below panics, it means that did not + // happen. + globals + .upgrade() + .expect("globals dropped while still in use"), + &mut NoOpObserver::default(), + ) + .map_err(|err| ErrorKind::ImportCompilerError { + path: path.clone(), + errors: vec![err], + })?; + + if !result.errors.is_empty() { + return Err(ErrorKind::ImportCompilerError { + path, + errors: result.errors, + }); + } + + // Compilation succeeded, we can construct a thunk from whatever it spat + // out and return that. + let res = Value::Thunk(Thunk::new_suspended(result.lambda, current_span)); + + vm.import_cache.insert(path, res.clone()); + + for warning in result.warnings { + vm.push_warning(warning); + } + + Ok(res) + }, + ) +} diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 12fd269c2f9b..4a44f95691c0 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -14,6 +14,7 @@ //! mistakes early during development. mod bindings; +mod import; mod scope; use codemap::Span; @@ -30,8 +31,9 @@ use crate::observer::CompilerObserver; use crate::opcode::{CodeIdx, Count, JumpOffset, OpCode, UpvalueIdx}; use crate::spans::LightSpan; use crate::spans::ToSpan; -use crate::value::{Closure, Formals, Lambda, Thunk, Value}; +use crate::value::{Closure, Formals, Lambda, NixAttrs, Thunk, Value}; use crate::warnings::{EvalWarning, WarningKind}; +use crate::SourceCode; use self::scope::{LocalIdx, LocalPosition, Scope, Upvalue, UpvalueKind}; @@ -73,17 +75,38 @@ impl LambdaCtx { } } +/// The type of a global as used inside of the compiler. Differs from +/// Nix's own notion of "builtins" in that it can emit arbitrary code. +/// Nix's builtins are wrapped inside of this type. +pub type Global = Rc; + /// The map of globally available functions that should implicitly /// be resolvable in the global scope. -pub type GlobalsMap = HashMap<&'static str, Rc>; - -/// Functions with this type are used to construct a -/// self-referential `builtins` object; it takes a weak reference to -/// its own result, similar to how nixpkgs' overlays work. -/// Rc::new_cyclic() is what "ties the knot". The heap allocation -/// (Box) and vtable (dyn) do not impair runtime or compile-time -/// performance; they exist only during compiler startup. -pub type GlobalsMapFunc = Box) -> GlobalsMap>; +type GlobalsMap = HashMap<&'static str, Rc>; + +/// Set of builtins that (if they exist) should be made available in +/// the global scope, meaning that they can be accessed not just +/// through `builtins.`, but directly as ``. This is not +/// configurable, it is based on what Nix 2.3 exposed. +const GLOBAL_BUILTINS: &'static [&'static str] = &[ + "abort", + "baseNameOf", + "derivation", + "derivationStrict", + "dirOf", + "fetchGit", + "fetchMercurial", + "fetchTarball", + "fromTOML", + "import", + "isNull", + "map", + "placeholder", + "removeAttrs", + "scopedImport", + "throw", + "toString", +]; pub struct Compiler<'observer> { contexts: Vec, @@ -1183,19 +1206,57 @@ fn optimise_tail_call(chunk: &mut Chunk) { } } -/// Prepare the full set of globals from additional globals supplied -/// by the caller of the compiler, as well as the built-in globals -/// that are always part of the language. This also "ties the knot" -/// required in order for import to have a reference cycle back to -/// the globals. +/// Prepare the full set of globals available in evaluated code. These +/// are constructed from the set of builtins supplied by the caller, +/// which are made available globally under the `builtins` identifier. +/// +/// A subset of builtins (specified by [`GLOBAL_BUILTINS`]) is +/// available globally *iff* they are set. /// -/// Note that all builtin functions are *not* considered part of the -/// language in this sense and MUST be supplied as additional global -/// values, including the `builtins` set itself. -pub fn prepare_globals(additional: GlobalsMapFunc) -> Rc { - Rc::new_cyclic(Box::new(|weak: &Weak| { - let mut globals = additional(weak); +/// Optionally adds the `import` feature if desired by the caller. +pub fn prepare_globals( + builtins: Vec<(&'static str, Value)>, + source: SourceCode, + enable_import: bool, +) -> Rc { + Rc::new_cyclic(Box::new(move |weak: &Weak| { + // First step is to construct the builtins themselves as + // `NixAttrs`. + let mut builtins_under_construction: HashMap<&'static str, Value> = + HashMap::from_iter(builtins.into_iter()); + + // At this point, optionally insert `import` if enabled. To + // "tie the knot" of `import` needing the full set of globals + // to instantiate its compiler, the `Weak` reference is passed + // here. + if enable_import { + let import = Value::Builtin(import::builtins_import(weak, source)); + builtins_under_construction.insert("import", import); + } + + // Next, the actual map of globals is constructed and + // populated with (copies) of the values that should be + // available in the global scope (see [`GLOBAL_BUILTINS`]). + let mut globals: GlobalsMap = HashMap::new(); + + for global in GLOBAL_BUILTINS { + if let Some(builtin) = builtins_under_construction.get(global).cloned() { + let global_builtin: Global = + Rc::new(move |c, s| c.emit_constant(builtin.clone(), &s)); + globals.insert(global, global_builtin); + } + } + + // This is followed by the actual `builtins` attribute set + // being constructed and inserted in the global scope. + let builtins_set = + Value::attrs(NixAttrs::from_iter(builtins_under_construction.into_iter())); + globals.insert( + "builtins", + Rc::new(move |c, s| c.emit_constant(builtins_set.clone(), &s)), + ); + // Finally insert the compiler-internal "magic" builtins for top-level values. globals.insert( "true", Rc::new(|compiler, span| { diff --git a/tvix/eval/src/lib.rs b/tvix/eval/src/lib.rs index a467a1884c41..fa76aca56ba7 100644 --- a/tvix/eval/src/lib.rs +++ b/tvix/eval/src/lib.rs @@ -12,7 +12,7 @@ //! These features are optional and the API of this crate exposes functionality //! for controlling how they work. -mod builtins; +pub mod builtins; mod chunk; mod compiler; mod errors; @@ -41,7 +41,6 @@ use std::str::FromStr; use std::sync::Arc; // Re-export the public interface used by other crates. -pub use crate::builtins::global_builtins; pub use crate::compiler::{compile, prepare_globals}; pub use crate::errors::{Error, ErrorKind, EvalResult}; pub use crate::io::{DummyIO, EvalIO, FileType}; @@ -83,12 +82,24 @@ pub struct Evaluation<'code, 'co, 'ro> { /// Top-level file reference for this code inside the source map. file: Arc, + /// Set of all builtins that should be available during the + /// evaluation. + /// + /// This defaults to all pure builtins. Users might want to add + /// the set of impure builtins, or other custom builtins. + pub builtins: Vec<(&'static str, Value)>, + /// Implementation of file-IO to use during evaluation, e.g. for /// impure builtins. /// /// Defaults to [`DummyIO`] if not set explicitly. pub io_handle: Box, + /// Determines whether the `import` builtin should be made + /// available. Note that this depends on the `io_handle` being + /// able to read the files specified as arguments to `import`. + pub enable_import: bool, + /// (optional) Nix search path, e.g. the value of `NIX_PATH` used /// for resolving items on the search path (such as ``). pub nix_path: Option, @@ -134,18 +145,34 @@ impl<'code, 'co, 'ro> Evaluation<'code, 'co, 'ro> { let file = source_map.add_file(location_str, code.into()); + let mut builtins = builtins::pure_builtins(); + builtins.extend(builtins::placeholders()); // these are temporary + Evaluation { code, location, source_map, file, + builtins, io_handle: Box::new(DummyIO {}), + enable_import: false, nix_path: None, compiler_observer: None, runtime_observer: None, } } + #[cfg(feature = "impure")] + /// Initialise an `Evaluation` for the given snippet, with all + /// impure features turned on by default. + pub fn new_impure(code: &'code str, location: Option) -> Self { + let mut eval = Self::new(code, location); + eval.enable_import = true; + eval.builtins.extend(builtins::impure_builtins()); + eval.io_handle = Box::new(StdIO); + eval + } + /// Clone the reference to the contained source code map. This is used after /// an evaluation for pretty error printing. pub fn source_map(&self) -> SourceCode { @@ -173,8 +200,8 @@ impl<'code, 'co, 'ro> Evaluation<'code, 'co, 'ro> { // access to the parsed expression. result.expr = parsed.tree().expr(); - let builtins = - crate::compiler::prepare_globals(Box::new(global_builtins(self.source_map()))); + let source = self.source_map(); + let builtins = crate::compiler::prepare_globals(self.builtins, source, self.enable_import); let mut noop_observer = observer::NoOpObserver::default(); let compiler_observer = self.compiler_observer.take().unwrap_or(&mut noop_observer); diff --git a/tvix/eval/src/tests/mod.rs b/tvix/eval/src/tests/mod.rs index bb46cf2b79b0..b9bb6d8cf311 100644 --- a/tvix/eval/src/tests/mod.rs +++ b/tvix/eval/src/tests/mod.rs @@ -17,8 +17,7 @@ fn eval_test(code_path: &str, expect_success: bool) { return; } - let mut eval = crate::Evaluation::new(&code, Some(code_path.into())); - eval.io_handle = Box::new(crate::StdIO); + let eval = crate::Evaluation::new_impure(&code, Some(code_path.into())); let result = eval.evaluate(); -- cgit 1.4.1