diff options
Diffstat (limited to 'tvix/eval')
51 files changed, 2073 insertions, 1673 deletions
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index 4cf8ea146cf8..c413e1780f21 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -8,42 +8,42 @@ name = "tvix_eval" [dependencies] builtin-macros = { path = "./builtin-macros", package = "tvix-eval-builtin-macros" } -bytes = "1.4.0" -bstr = { version = "1.8.0", features = ["serde"] } -codemap = "0.1.3" -codemap-diagnostic = "0.1.1" -dirs = "4.0.0" -genawaiter = { version = "0.99.1", default_features = false } -imbl = { version = "2.0", features = [ "serde" ] } -itertools = "0.12.0" -lazy_static = "1.4.0" -lexical-core = { version = "0.8.5", features = ["format", "parse-floats"] } -os_str_bytes = { version = "6.3", features = ["conversions"] } -path-clean = "0.1" -proptest = { version = "1.3.0", default_features = false, features = ["std", "alloc", "tempfile"], optional = true } -regex = "1.6.0" -rnix = "0.11.0" -rowan = "*" # pinned by rnix -serde = { version = "1.0", features = [ "rc", "derive" ] } -serde_json = "1.0" -smol_str = "0.2.0" -tabwriter = "1.2" -test-strategy = { version = "0.2.1", optional = true } +bytes = { workspace = true } +bstr = { workspace = true, features = ["serde"] } +codemap = { workspace = true } +codemap-diagnostic = { workspace = true } +dirs = { workspace = true } +genawaiter = { workspace = true } +itertools = { workspace = true } +lexical-core = { workspace = true, features = ["format", "parse-floats"] } +os_str_bytes = { workspace = true, features = ["conversions"] } +path-clean = { workspace = true } +proptest = { workspace = true, features = ["std", "alloc", "tempfile"], optional = true } +regex = { workspace = true } +rnix = { workspace = true } +rowan = { workspace = true } # pinned by rnix +serde = { workspace = true, features = ["rc", "derive"] } +serde_json = { workspace = true } +smol_str = { workspace = true } +tabwriter = { workspace = true } +test-strategy = { workspace = true, optional = true } toml = "0.6.0" -sha2 = "0.10.8" -sha1 = "0.10.6" -md-5 = "0.10.6" -data-encoding = "2.5.0" +sha2 = { workspace = true } +sha1 = { workspace = true } +md-5 = { workspace = true } +data-encoding = { workspace = true } +rustc-hash = { workspace = true } +nohash-hasher = { workspace = true } +vu128 = { workspace = true } +thiserror.workspace = true [dev-dependencies] -criterion = "0.5" -itertools = "0.12.0" -pretty_assertions = "1.2.1" -rstest = "0.19.0" -tempfile = "3.3.0" - -[target.'cfg(not(target_env = "msvc"))'.dev-dependencies] -tikv-jemallocator = "0.5" +criterion = { workspace = true } +itertools = { workspace = true } +mimalloc = { workspace = true } +pretty_assertions = { workspace = true } +rstest = { workspace = true } +tempfile = { workspace = true } [features] default = ["impure", "arbitrary", "nix_tests"] @@ -56,7 +56,12 @@ nix_tests = [] impure = [] # Enables Arbitrary impls for internal types (required to run tests) -arbitrary = ["proptest", "test-strategy", "imbl/proptest"] +arbitrary = ["proptest", "test-strategy"] + +# Don't leak strings (enable this if you care about peak memory usage of eval) +# +# This is intended as a stop-gap until we have a garbage collector +no_leak = [] [[bench]] name = "eval" diff --git a/tvix/eval/benches/eval.rs b/tvix/eval/benches/eval.rs index 1333f5018cb3..f4d6489f1e5c 100644 --- a/tvix/eval/benches/eval.rs +++ b/tvix/eval/benches/eval.rs @@ -1,14 +1,14 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use itertools::Itertools; -#[cfg(not(target_env = "msvc"))] -use tikv_jemallocator::Jemalloc; +use mimalloc::MiMalloc; -#[cfg(not(target_env = "msvc"))] #[global_allocator] -static GLOBAL: Jemalloc = Jemalloc; +static GLOBAL: MiMalloc = MiMalloc; fn interpret(code: &str) { - tvix_eval::Evaluation::new_pure().evaluate(code, None); + tvix_eval::Evaluation::builder_pure() + .build() + .evaluate(code, None); } fn eval_literals(c: &mut Criterion) { diff --git a/tvix/eval/builtin-macros/Cargo.toml b/tvix/eval/builtin-macros/Cargo.toml index 3a35ea12a0c0..0696d742b342 100644 --- a/tvix/eval/builtin-macros/Cargo.toml +++ b/tvix/eval/builtin-macros/Cargo.toml @@ -5,9 +5,9 @@ authors = [ "Griffin Smith <root@gws.fyi>" ] edition = "2021" [dependencies] -syn = { version = "1.0.57", features = ["full", "parsing", "printing", "visit", "visit-mut", "extra-traits"] } -quote = "1.0.8" -proc-macro2 = "1" +syn = { version = "1.0.109", features = ["full", "parsing", "printing", "visit", "visit-mut", "extra-traits"] } +quote = { workspace = true } +proc-macro2 = { workspace = true } [lib] proc-macro = true diff --git a/tvix/eval/builtin-macros/src/lib.rs b/tvix/eval/builtin-macros/src/lib.rs index 5cc9807f54fe..9b14aee44605 100644 --- a/tvix/eval/builtin-macros/src/lib.rs +++ b/tvix/eval/builtin-macros/src/lib.rs @@ -281,31 +281,45 @@ pub fn builtins(args: TokenStream, item: TokenStream) -> TokenStream { let ty = &arg.ty; let ident = &arg.name; - if arg.strict { - if arg.catch { - f.block = Box::new(parse_quote_spanned! {arg.span=> { - let #ident: #ty = tvix_eval::generators::request_force(&co, values.pop() - .expect("Tvix bug: builtin called with incorrect number of arguments")).await; + f.block = Box::new(match arg { + BuiltinArgument { + strict: true, + catch: true, + .. + } => parse_quote_spanned! { + arg.span => { + let #ident: #ty = tvix_eval::generators::request_force( + &co, values.pop().expect("Tvix bug: builtin called with incorrect number of arguments") + ).await; #block - }}); - } else { - f.block = Box::new(parse_quote_spanned! {arg.span=> { - let #ident: #ty = tvix_eval::generators::request_force(&co, values.pop() - .expect("Tvix bug: builtin called with incorrect number of arguments")).await; + } + }, + BuiltinArgument { + strict: true, + catch: false, + .. + } => parse_quote_spanned! { + arg.span => { + let #ident: #ty = tvix_eval::generators::request_force( + &co, values.pop().expect("Tvix bug: builtin called with incorrect number of arguments") + ).await; if #ident.is_catchable() { return Ok(#ident); } #block - }}); - } - } else { - f.block = Box::new(parse_quote_spanned! {arg.span=> { - let #ident: #ty = values.pop() - .expect("Tvix bug: builtin called with incorrect number of arguments"); - - #block - }}) - } + } + }, + BuiltinArgument { + strict: false, + catch: _, + .. + } => parse_quote_spanned! { + arg.span => { + let #ident: #ty = values.pop().expect("Tvix bug: builtin called with incorrect number of arguments"); + #block + } + }, + }); } let fn_name = f.sig.ident.clone(); diff --git a/tvix/eval/clippy.toml b/tvix/eval/clippy.toml new file mode 100644 index 000000000000..c5302112b3ae --- /dev/null +++ b/tvix/eval/clippy.toml @@ -0,0 +1,3 @@ +# See https://nnethercote.github.io/perf-book/hashing.html. Use FxHashMap and +# FxHashSet, not HashMap and HashSet +disallowed-types = ["std::collections::HashMap", "std::collections::HashSet"] diff --git a/tvix/eval/default.nix b/tvix/eval/default.nix index 9dd5875f85a5..9370c81ced1c 100644 --- a/tvix/eval/default.nix +++ b/tvix/eval/default.nix @@ -8,9 +8,9 @@ testInputs = [ pkgs.nix ]; }).overrideAttrs (old: rec { meta.ci.targets = lib.filter (x: lib.hasPrefix "with-features" x || x == "no-features") (lib.attrNames passthru); - passthru = depot.tvix.utils.mkFeaturePowerset { + passthru = old.passthru // (depot.tvix.utils.mkFeaturePowerset { inherit (old) crateName; features = [ "nix_tests" ]; override.testInputs = [ pkgs.nix ]; - }; + }); }) diff --git a/tvix/eval/src/builtins/impure.rs b/tvix/eval/src/builtins/impure.rs index c82b910f5ffa..dccb7fbb7d8a 100644 --- a/tvix/eval/src/builtins/impure.rs +++ b/tvix/eval/src/builtins/impure.rs @@ -9,7 +9,6 @@ use std::{ use crate::{ self as tvix_eval, errors::ErrorKind, - io::FileType, value::NixAttrs, vm::generators::{self, GenCo}, NixString, Value, @@ -60,12 +59,7 @@ mod impure_builtins { NixString::from( String::from_utf8(name.to_vec()).expect("parsing file name as string"), ), - Value::from(match ftype { - FileType::Directory => "directory", - FileType::Regular => "regular", - FileType::Symlink => "symlink", - FileType::Unknown => "unknown", - }), + Value::from(ftype.to_string()), ) }); @@ -87,6 +81,18 @@ mod impure_builtins { } } } + + #[builtin("readFileType")] + async fn builtin_read_file_type(co: GenCo, path: Value) -> Result<Value, ErrorKind> { + match coerce_value_to_path(&co, path).await? { + Err(cek) => Ok(Value::from(cek)), + Ok(path) => Ok(Value::from( + generators::request_read_file_type(&co, path) + .await + .to_string(), + )), + } + } } /// Return all impure builtins, that is all builtins which may perform I/O diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 96e9985747f5..babea3200599 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -6,11 +6,10 @@ use bstr::{ByteSlice, ByteVec}; use builtin_macros::builtins; use genawaiter::rc::Gen; -use imbl::OrdMap; use regex::Regex; use std::cmp::{self, Ordering}; +use std::collections::BTreeMap; use std::collections::VecDeque; -use std::collections::{BTreeMap, HashSet}; use std::path::PathBuf; use crate::arithmetic_op; @@ -85,9 +84,9 @@ mod pure_builtins { use std::ffi::OsString; use bstr::{BString, ByteSlice, B}; - use imbl::Vector; use itertools::Itertools; use os_str_bytes::OsStringBytes; + use rustc_hash::FxHashSet; use crate::{value::PointerEquality, AddContext, NixContext, NixContextElement}; @@ -245,7 +244,7 @@ mod pure_builtins { #[builtin("concatLists")] async fn builtin_concat_lists(co: GenCo, lists: Value) -> Result<Value, ErrorKind> { - let mut out = imbl::Vector::new(); + let mut out = Vec::new(); for value in lists.to_list()? { let list = try_value!(generators::request_force(&co, value).await).to_list()?; @@ -258,7 +257,7 @@ mod pure_builtins { #[builtin("concatMap")] async fn builtin_concat_map(co: GenCo, f: Value, list: Value) -> Result<Value, ErrorKind> { let list = list.to_list()?; - let mut res = imbl::Vector::new(); + let mut res = Vec::new(); for val in list { let out = generators::request_call_with(&co, f.clone(), [val]).await; let out = try_value!(generators::request_force(&co, out).await); @@ -386,13 +385,13 @@ mod pure_builtins { #[builtin("filter")] async fn builtin_filter(co: GenCo, pred: Value, list: Value) -> Result<Value, ErrorKind> { let list: NixList = list.to_list()?; - let mut out = imbl::Vector::new(); + let mut out = Vec::new(); for value in list { let result = generators::request_call_with(&co, pred.clone(), [value.clone()]).await; let verdict = try_value!(generators::request_force(&co, result).await); if verdict.as_bool()? { - out.push_back(value); + out.push(value); } } @@ -443,17 +442,20 @@ mod pure_builtins { #[builtin("fromJSON")] async fn builtin_from_json(co: GenCo, json: Value) -> Result<Value, ErrorKind> { let json_str = json.to_str()?; - serde_json::from_slice(&json_str).map_err(|err| err.into()) } #[builtin("toJSON")] async fn builtin_to_json(co: GenCo, val: Value) -> Result<Value, ErrorKind> { - match val.into_contextful_json(&co).await? { - Err(cek) => Ok(Value::from(cek)), - Ok((json_value, ctx)) => { - let json_str = serde_json::to_string(&json_value)?; - Ok(NixString::new_context_from(ctx, json_str).into()) + match val.into_contextful_json(&co).await { + Err(ErrorKind::CatchableError(catchable)) => Ok(Value::Catchable(Box::new(catchable))), + Err(err) => Err(err), + Ok((json, context)) => { + let json_str = serde_json::to_string(&json) + .map_err(|err| ErrorKind::JsonError(err.to_string()))?; + Ok(Value::String(NixString::new_context_from( + context, json_str, + ))) } } } @@ -480,7 +482,7 @@ mod pure_builtins { let operator = attrs.select_required("operator")?; - let mut res = imbl::Vector::new(); + let mut res = Vec::new(); let mut done_keys: Vec<Value> = vec![]; while let Some(val) = work_set.pop_front() { @@ -498,7 +500,7 @@ mod pure_builtins { continue; } - res.push_back(val.clone()); + res.push(val.clone()); let op_result = generators::request_force( &co, @@ -519,18 +521,18 @@ mod pure_builtins { #[lazy] generator: Value, length: Value, ) -> Result<Value, ErrorKind> { - let mut out = imbl::Vector::<Value>::new(); let len = length.as_int()?; + let mut out = Vec::with_capacity( + len.try_into() + .map_err(|_| ErrorKind::Abort(format!("can not create list of size {}", len)))?, + ); + // the best span we can get… let span = generators::request_span(&co).await; for i in 0..len { - let val = Value::Thunk(Thunk::new_suspended_call( - generator.clone(), - i.into(), - span.clone(), - )); - out.push_back(val); + let val = Value::Thunk(Thunk::new_suspended_call(generator.clone(), i.into(), span)); + out.push(val); } Ok(Value::List(out.into())) @@ -551,7 +553,7 @@ mod pure_builtins { #[builtin("groupBy")] async fn builtin_group_by(co: GenCo, f: Value, list: Value) -> Result<Value, ErrorKind> { - let mut res: BTreeMap<NixString, imbl::Vector<Value>> = BTreeMap::new(); + let mut res: BTreeMap<NixString, Vec<Value>> = BTreeMap::new(); for val in list.to_list()? { let key = try_value!( generators::request_force( @@ -562,7 +564,7 @@ mod pure_builtins { ) .to_str()?; - res.entry(key).or_default().push_back(val); + res.entry(key).or_default().push(val); } Ok(Value::attrs(NixAttrs::from_iter( res.into_iter() @@ -628,7 +630,7 @@ mod pure_builtins { let elements = groups .into_iter() .map(|(key, group)| { - let mut outputs: Vector<NixString> = Vector::new(); + let mut outputs: Vec<NixString> = Vec::new(); let mut is_path = false; let mut all_outputs = false; @@ -641,7 +643,7 @@ mod pure_builtins { NixContextElement::Single { name, derivation } => { debug_assert!(derivation == key, "Unexpected group containing mixed keys, expected: {:?}, encountered {:?}", key, derivation); - outputs.push_back(name.clone().into()); + outputs.push(name.clone().into()); } NixContextElement::Derivation(drv_path) => { @@ -668,7 +670,7 @@ mod pure_builtins { vec_attrs.push(("outputs", Value::List(outputs .into_iter() .map(|s| s.into()) - .collect::<Vector<Value>>() + .collect::<Vec<Value>>() .into() ))); } @@ -704,7 +706,7 @@ mod pure_builtins { // // In this implementation, we do none of that, no syntax checks, no realization. // The next `TODO` are the checks that Nix implements. - let mut ctx_elements: HashSet<NixContextElement> = HashSet::new(); + let mut ctx_elements: FxHashSet<NixContextElement> = FxHashSet::default(); let span = generators::request_span(&co).await; let origin = origin .coerce_to_string( @@ -795,7 +797,7 @@ mod pure_builtins { } let mut right_keys = right_set.keys(); - let mut out: OrdMap<NixString, Value> = OrdMap::new(); + let mut out: BTreeMap<NixString, Value> = BTreeMap::new(); // Both iterators have at least one entry let mut left = left_keys.next().unwrap(); @@ -976,15 +978,16 @@ mod pure_builtins { } #[builtin("map")] - async fn builtin_map(co: GenCo, #[lazy] f: Value, list: Value) -> Result<Value, ErrorKind> { - let mut out = imbl::Vector::<Value>::new(); + async fn builtin_map(co: GenCo, #[lazy] f: Value, list_val: Value) -> Result<Value, ErrorKind> { + let list = list_val.to_list()?; + let mut out = Vec::with_capacity(list.len()); // the best span we can get… let span = generators::request_span(&co).await; - for val in list.to_list()? { - let result = Value::Thunk(Thunk::new_suspended_call(f.clone(), val, span.clone())); - out.push_back(result) + for val in list { + let result = Value::Thunk(Thunk::new_suspended_call(f.clone(), val, span)); + out.push(result) } Ok(Value::List(out.into())) @@ -997,7 +1000,7 @@ mod pure_builtins { attrs: Value, ) -> Result<Value, ErrorKind> { let attrs = attrs.to_attrs()?; - let mut out = imbl::OrdMap::new(); + let mut out: BTreeMap<NixString, Value> = BTreeMap::new(); // the best span we can get… let span = generators::request_span(&co).await; @@ -1006,9 +1009,9 @@ mod pure_builtins { let result = Value::Thunk(Thunk::new_suspended_call( f.clone(), key.clone().into(), - span.clone(), + span, )); - let result = Value::Thunk(Thunk::new_suspended_call(result, value, span.clone())); + let result = Value::Thunk(Thunk::new_suspended_call(result, value, span)); out.insert(key, result); } @@ -1043,7 +1046,7 @@ mod pure_builtins { // and then a compressor name will be extracted from that. grp.map(|g| Value::from(g.as_str())).unwrap_or(Value::Null) }) - .collect::<imbl::Vector<Value>>() + .collect::<Vec<Value>>() .into(), )), None => Ok(Value::Null), @@ -1086,17 +1089,17 @@ mod pure_builtins { #[builtin("partition")] async fn builtin_partition(co: GenCo, pred: Value, list: Value) -> Result<Value, ErrorKind> { - let mut right: imbl::Vector<Value> = Default::default(); - let mut wrong: imbl::Vector<Value> = Default::default(); + let mut right: Vec<Value> = Default::default(); + let mut wrong: Vec<Value> = Default::default(); let list: NixList = list.to_list()?; for elem in list { let result = generators::request_call_with(&co, pred.clone(), [elem.clone()]).await; if try_value!(generators::request_force(&co, result).await).as_bool()? { - right.push_back(elem); + right.push(elem); } else { - wrong.push_back(elem); + wrong.push(elem); }; } @@ -1119,7 +1122,7 @@ mod pure_builtins { .to_list()? .into_iter() .map(|v| v.to_str()) - .collect::<Result<HashSet<_>, _>>()?; + .collect::<Result<FxHashSet<_>, _>>()?; let res = attrs.iter().filter_map(|(k, v)| { if !keys.contains(k) { Some((k.clone(), v.clone())) @@ -1251,12 +1254,12 @@ mod pure_builtins { let re = Regex::new(re.to_str()?).unwrap(); let mut capture_locations = re.capture_locations(); let num_captures = capture_locations.len(); - let mut ret = imbl::Vector::new(); + let mut ret = Vec::new(); let mut pos = 0; while let Some(thematch) = re.captures_read_at(&mut capture_locations, text, pos) { // push the unmatched characters preceding the match - ret.push_back(Value::from(NixString::new_inherit_context_from( + ret.push(Value::from(NixString::new_inherit_context_from( &s, &text[pos..thematch.start()], ))); @@ -1265,7 +1268,7 @@ mod pure_builtins { // group in the regex, containing the characters // matched by that capture group, or null if no match. // We skip capture 0; it represents the whole match. - let v: imbl::Vector<Value> = (1..num_captures) + let v: Vec<Value> = (1..num_captures) .map(|i| capture_locations.get(i)) .map(|o| { o.map(|(start, end)| { @@ -1276,7 +1279,7 @@ mod pure_builtins { .unwrap_or(Value::Null) }) .collect(); - ret.push_back(Value::List(NixList::from(v))); + ret.push(Value::List(NixList::from(v))); if pos == text.len() { break; } @@ -1286,7 +1289,7 @@ mod pure_builtins { // push the unmatched characters following the last match // Here, a surprising thing happens: we silently discard the original // context. This is as intended, Nix does the same. - ret.push_back(Value::from(&text[pos..])); + ret.push(Value::from(&text[pos..])); Ok(Value::List(NixList::from(ret))) } @@ -1495,17 +1498,7 @@ mod pure_builtins { let mut buf: Vec<u8> = vec![]; let context = to_xml::value_to_xml(&mut buf, &value)?; - Ok(( - buf, - // FUTUREWORK: We have a distinction between an empty context, and - // no context at all. Fix this. - if !context.is_empty() { - Some(Box::new(context)) - } else { - None - }, - ) - .into()) + Ok(NixString::new_context_from(context, buf).into()) } #[builtin("trace")] @@ -1594,7 +1587,7 @@ pub fn pure_builtins() -> Vec<(&'static str, Value)> { let mut result = pure_builtins::builtins(); // Pure-value builtins - result.push(("nixVersion", Value::from("2.3-compat-tvix-0.1"))); + result.push(("nixVersion", Value::from("2.3.17-compat-tvix-0.1"))); result.push(("langVersion", Value::Integer(6))); result.push(("null", Value::Null)); result.push(("true", Value::Bool(true))); diff --git a/tvix/eval/src/builtins/to_xml.rs b/tvix/eval/src/builtins/to_xml.rs index 093e127fe25e..785992e457ac 100644 --- a/tvix/eval/src/builtins/to_xml.rs +++ b/tvix/eval/src/builtins/to_xml.rs @@ -122,7 +122,6 @@ fn value_variant_to_xml<W: Write>(w: &mut XmlEmitter<W>, value: &Value) -> Resul | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(..) | Value::FinaliseRequest(_) => { return Err(ErrorKind::TvixBug { msg: "internal value variant encountered in builtins.toXML", diff --git a/tvix/eval/src/chunk.rs b/tvix/eval/src/chunk.rs index f1a35a6ce162..2a5446a782ed 100644 --- a/tvix/eval/src/chunk.rs +++ b/tvix/eval/src/chunk.rs @@ -1,9 +1,10 @@ +use crate::opcode::{CodeIdx, ConstantIdx, Op, OpArg}; +use crate::value::Value; +use crate::{CoercionKind, SourceCode}; use std::io::Write; -use std::ops::{Index, IndexMut}; -use crate::opcode::{CodeIdx, ConstantIdx, OpCode}; -use crate::value::Value; -use crate::SourceCode; +/// Maximum size of a u64 encoded in the vu128 varint encoding. +const U64_VARINT_SIZE: usize = 9; /// Represents a source location from which one or more operations /// were compiled. @@ -30,39 +31,69 @@ struct SourceSpan { /// emitted by the compiler. #[derive(Debug, Default)] pub struct Chunk { - pub code: Vec<OpCode>, + pub code: Vec<u8>, pub constants: Vec<Value>, spans: Vec<SourceSpan>, + + /// Index of the last operation (i.e. not data) written to the code vector. + /// Some operations (e.g. jump patching) need to know this. + last_op: usize, } -impl Index<ConstantIdx> for Chunk { - type Output = Value; +impl Chunk { + pub fn push_op(&mut self, op: Op, span: codemap::Span) -> usize { + self.last_op = self.code.len(); + self.code.push(op as u8); + self.push_span(span, self.last_op); + self.last_op + } - fn index(&self, index: ConstantIdx) -> &Self::Output { - &self.constants[index.0] + pub fn push_uvarint(&mut self, data: u64) { + let mut encoded = [0u8; U64_VARINT_SIZE]; + let bytes_written = vu128::encode_u64(&mut encoded, data); + self.code.extend_from_slice(&encoded[..bytes_written]); } -} -impl Index<CodeIdx> for Chunk { - type Output = OpCode; + pub fn read_uvarint(&self, idx: usize) -> (u64, usize) { + debug_assert!( + idx < self.code.len(), + "invalid bytecode (missing varint operand)", + ); - fn index(&self, index: CodeIdx) -> &Self::Output { - &self.code[index.0] + if self.code.len() - idx >= U64_VARINT_SIZE { + vu128::decode_u64( + &self.code[idx..idx + U64_VARINT_SIZE] + .try_into() + .expect("size statically checked"), + ) + } else { + let mut tmp = [0u8; U64_VARINT_SIZE]; + tmp[..self.code.len() - idx].copy_from_slice(&self.code[idx..]); + vu128::decode_u64(&tmp) + } } -} -impl IndexMut<CodeIdx> for Chunk { - fn index_mut(&mut self, index: CodeIdx) -> &mut Self::Output { - &mut self.code[index.0] + pub fn push_u16(&mut self, data: u16) { + self.code.extend_from_slice(&data.to_le_bytes()) } -} -impl Chunk { - pub fn push_op(&mut self, data: OpCode, span: codemap::Span) -> CodeIdx { - let idx = self.code.len(); - self.code.push(data); - self.push_span(span, idx); - CodeIdx(idx) + /// Patches the argument to the jump operand of the jump at the given index + /// to point to the *next* instruction that will be emitted. + pub fn patch_jump(&mut self, idx: usize) { + let offset = (self.code.len() - idx - /* arg idx = */ 1 - /* jump arg size = */ 2) as u16; + self.code[idx + 1..idx + 3].copy_from_slice(&offset.to_le_bytes()) + } + + pub fn read_u16(&self, idx: usize) -> u16 { + if idx + 2 > self.code.len() { + panic!("Tvix bug: invalid bytecode (expected u16 operand not found)") + } + + let byte_array: &[u8; 2] = &self.code[idx..idx + 2] + .try_into() + .expect("fixed-size slice can not fail to convert to array"); + + u16::from_le_bytes(*byte_array) } /// Get the first span of a chunk, no questions asked. @@ -70,23 +101,13 @@ impl Chunk { self.spans[0].span } - /// Return a reference to the last op in the chunk, if any - pub fn last_op(&self) -> Option<&OpCode> { - self.code.last() - } - - /// Pop the last operation from the chunk and clean up its tracked - /// span. Used when the compiler backtracks. - pub fn pop_op(&mut self) { - // Simply drop the last op. - self.code.pop(); - - if let Some(span) = self.spans.last() { - // If the last span started at this op, drop it. - if span.start == self.code.len() { - self.spans.pop(); - } + /// Return the last op in the chunk together with its index, if any. + pub fn last_op(&self) -> Option<(Op, usize)> { + if self.code.is_empty() { + return None; } + + Some((self.code[self.last_op].into(), self.last_op)) } pub fn push_constant(&mut self, data: Value) -> ConstantIdx { @@ -100,8 +121,6 @@ impl Chunk { self.constants.get(constant.0) } - // Span tracking implementation - fn push_span(&mut self, span: codemap::Span, start: usize) { match self.spans.last_mut() { // We do not need to insert the same span again, as this @@ -136,66 +155,88 @@ impl Chunk { } /// Write the disassembler representation of the operation at - /// `idx` to the specified writer. + /// `idx` to the specified writer, and return how many bytes in the code to + /// skip for the next op. pub fn disassemble_op<W: Write>( &self, writer: &mut W, source: &SourceCode, width: usize, idx: CodeIdx, - ) -> Result<(), std::io::Error> { + ) -> Result<usize, std::io::Error> { write!(writer, "{:#width$x}\t ", idx.0, width = width)?; // Print continuation character if the previous operation was at // the same line, otherwise print the line. let line = source.get_line(self.get_span(idx)); - if idx.0 > 0 && source.get_line(self.get_span(CodeIdx(idx.0 - 1))) == line { + if idx.0 > 0 && source.get_line(self.get_span(idx - 1)) == line { write!(writer, " |\t")?; } else { write!(writer, "{:4}\t", line)?; } - match self[idx] { - OpCode::OpConstant(idx) => { - let val_str = match &self[idx] { - Value::Thunk(t) => t.debug_repr(), - Value::Closure(c) => format!("closure({:p})", c.lambda), - val => format!("{}", val), - }; + let _fmt_constant = |idx: ConstantIdx| match &self.constants[idx.0] { + Value::Thunk(t) => t.debug_repr(), + Value::Closure(c) => format!("closure({:p})", c.lambda), + Value::Blueprint(b) => format!("blueprint({:p})", b), + val => format!("{}", val), + }; - writeln!(writer, "OpConstant({}@{})", val_str, idx.0) + let op: Op = self.code[idx.0].into(); + + match op.arg_type() { + OpArg::None => { + writeln!(writer, "Op{:?}", op)?; + Ok(1) } - op => writeln!(writer, "{:?}", op), - }?; - Ok(()) - } + OpArg::Fixed => { + let arg = self.read_u16(idx.0 + 1); + writeln!(writer, "Op{:?}({})", op, arg)?; + Ok(3) + } - /// Extend this chunk with the content of another, moving out of the other - /// in the process. - /// - /// This is used by the compiler when it detects that it unnecessarily - /// thunked a nested expression. - pub fn extend(&mut self, other: Self) { - // Some operations need to be modified in certain ways before being - // valid as part of the new chunk. - let const_count = self.constants.len(); - for (idx, op) in other.code.iter().enumerate() { - let span = other.get_span(CodeIdx(idx)); - match op { - // As the constants shift, the index needs to be moved relatively. - OpCode::OpConstant(ConstantIdx(idx)) => { - self.push_op(OpCode::OpConstant(ConstantIdx(idx + const_count)), span) + OpArg::Uvarint => { + let (arg, size) = self.read_uvarint(idx.0 + 1); + writeln!(writer, "Op{:?}({})", op, arg)?; + Ok(1 + size) + } + + _ => match op { + Op::CoerceToString => { + let kind: CoercionKind = self.code[idx.0 + 1].into(); + writeln!(writer, "Op{:?}({:?})", op, kind)?; + Ok(2) } - // Other operations either operate on relative offsets, or no - // offsets, and are safe to keep as-is. - _ => self.push_op(*op, span), - }; - } + Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => { + let mut cidx = idx.0 + 1; - self.constants.extend(other.constants); - self.spans.extend(other.spans); + let (bp_idx, size) = self.read_uvarint(cidx); + cidx += size; + + let (packed_count, size) = self.read_uvarint(cidx); + cidx += size; + + let captures_with = packed_count & 0b1 == 1; + let count = packed_count >> 1; + + write!(writer, "Op{:?}(BP @ {}, ", op, bp_idx)?; + if captures_with { + write!(writer, "captures with, ")?; + } + writeln!(writer, "{} upvalues)", count)?; + + for _ in 0..count { + let (_, size) = self.read_uvarint(cidx); + cidx += size; + } + + Ok(cidx - idx.0) + } + _ => panic!("Tvix bug: don't know how to format argument for Op{:?}", op), + }, + } } } @@ -211,79 +252,49 @@ mod tests { #[test] fn push_op() { let mut chunk = Chunk::default(); - chunk.push_op(OpCode::OpAdd, dummy_span()); - assert_eq!(chunk.code.last().unwrap(), &OpCode::OpAdd); + let idx = chunk.push_op(Op::Add, dummy_span()); + assert_eq!(*chunk.code.last().unwrap(), Op::Add as u8); + assert_eq!(chunk.code[idx], Op::Add as u8); } #[test] - fn extend_empty() { + fn push_op_with_arg() { let mut chunk = Chunk::default(); - chunk.push_op(OpCode::OpAdd, dummy_span()); + let mut idx = chunk.push_op(Op::Constant, dummy_span()); + chunk.push_uvarint(42); - let other = Chunk::default(); - chunk.extend(other); + assert_eq!(chunk.code[idx], Op::Constant as u8); - assert_eq!( - chunk.code, - vec![OpCode::OpAdd], - "code should not have changed" - ); + idx += 1; + let (arg, size) = chunk.read_uvarint(idx); + assert_eq!(idx + size, chunk.code.len()); + assert_eq!(arg, 42); } #[test] - fn extend_simple() { - let span = dummy_span(); + fn push_jump() { let mut chunk = Chunk::default(); - chunk.push_op(OpCode::OpAdd, span); - let mut other = Chunk::default(); - other.push_op(OpCode::OpSub, span); - other.push_op(OpCode::OpMul, span); + chunk.push_op(Op::Constant, dummy_span()); + chunk.push_uvarint(0); - let expected_code = vec![OpCode::OpAdd, OpCode::OpSub, OpCode::OpMul]; + let idx = chunk.push_op(Op::Jump, dummy_span()); + chunk.push_u16(0); - chunk.extend(other); + chunk.push_op(Op::Constant, dummy_span()); + chunk.push_uvarint(1); - assert_eq!(chunk.code, expected_code, "code should have been extended"); - } + chunk.patch_jump(idx); + chunk.push_op(Op::Return, dummy_span()); - #[test] - fn extend_with_constant() { - let span = dummy_span(); - let mut chunk = Chunk::default(); - chunk.push_op(OpCode::OpAdd, span); - let cidx = chunk.push_constant(Value::Integer(0)); - assert_eq!( - cidx.0, 0, - "first constant in main chunk should have index 0" - ); - chunk.push_op(OpCode::OpConstant(cidx), span); - - let mut other = Chunk::default(); - other.push_op(OpCode::OpSub, span); - let other_cidx = other.push_constant(Value::Integer(1)); - assert_eq!( - other_cidx.0, 0, - "first constant in other chunk should have index 0" - ); - other.push_op(OpCode::OpConstant(other_cidx), span); - - chunk.extend(other); - - let expected_code = vec![ - OpCode::OpAdd, - OpCode::OpConstant(ConstantIdx(0)), - OpCode::OpSub, - OpCode::OpConstant(ConstantIdx(1)), // <- note: this was rewritten + #[rustfmt::skip] + let expected: Vec<u8> = vec![ + Op::Constant as u8, 0, + Op::Jump as u8, 2, 0, + Op::Constant as u8, 1, + Op::Return as u8, ]; - assert_eq!( - chunk.code, expected_code, - "code should have been extended and rewritten" - ); - - assert_eq!(chunk.constants.len(), 2); - assert!(matches!(chunk.constants[0], Value::Integer(0))); - assert!(matches!(chunk.constants[1], Value::Integer(1))); + assert_eq!(chunk.code, expected); } } diff --git a/tvix/eval/src/compiler/bindings.rs b/tvix/eval/src/compiler/bindings.rs index 634cc5402247..6a3ae485936c 100644 --- a/tvix/eval/src/compiler/bindings.rs +++ b/tvix/eval/src/compiler/bindings.rs @@ -9,6 +9,8 @@ use std::iter::Peekable; use rnix::ast::HasEntry; use rowan::ast::AstChildren; +use crate::spans::{EntireFile, OrEntireFile}; + use super::*; type PeekableAttrs = Peekable<AstChildren<ast::Attr>>; @@ -556,6 +558,15 @@ impl Compiler<'_, '_> { self.scope_mut().end_scope(); } + /// Emit definitions for all variables in the top-level global env passed to the evaluation (eg + /// local variables in the REPL) + pub(super) fn compile_env(&mut self, env: &FxHashMap<SmolStr, Value>) { + for (name, value) in env { + self.scope_mut().declare_constant(name.to_string()); + self.emit_constant(value.clone(), &EntireFile); + } + } + /// Actually binds all tracked bindings by emitting the bytecode that places /// them in their stack slots. fn bind_values(&mut self, bindings: TrackedBindings) { @@ -569,7 +580,7 @@ impl Compiler<'_, '_> { KeySlot::Static { slot, name } => { let span = self.scope()[slot].span; - self.emit_constant(name.as_str().into(), &span); + self.emit_constant(name.as_str().into(), &OrEntireFile(span)); self.scope_mut().mark_initialised(slot); } @@ -594,7 +605,7 @@ impl Compiler<'_, '_> { c.emit_force(&namespace); c.emit_constant(name.as_str().into(), &span); - c.push_op(OpCode::OpAttrsSelect, &span); + c.push_op(Op::AttrsSelect, &span); }) } @@ -621,7 +632,8 @@ impl Compiler<'_, '_> { if self.scope()[idx].needs_finaliser { let stack_idx = self.scope().stack_index(idx); let span = self.scope()[idx].span; - self.push_op(OpCode::OpFinalise(stack_idx), &span); + self.push_op(Op::Finalise, &OrEntireFile(span)); + self.push_uvarint(stack_idx.0 as u64) } } } @@ -656,11 +668,8 @@ impl Compiler<'_, '_> { self.bind_values(bindings); if kind.is_attrs() { - self.push_op(OpCode::OpAttrs(Count(count)), node); - } - - if count == 0 { - self.unthunk(); + self.push_op(Op::Attrs, node); + self.push_uvarint(count as u64); } } @@ -686,7 +695,7 @@ impl Compiler<'_, '_> { self.scope_mut().end_scope(); self.emit_constant("body".into(), node); - self.push_op(OpCode::OpAttrsSelect, node); + self.push_op(Op::AttrsSelect, node); } /// Is the given identifier defined *by the user* in any current scope? @@ -707,8 +716,9 @@ impl Compiler<'_, '_> { match self.scope_mut().resolve_local(ident) { LocalPosition::Unknown => { // Are we possibly dealing with an upvalue? - if let Some(idx) = self.resolve_upvalue(self.contexts.len() - 1, ident, node) { - self.push_op(OpCode::OpGetUpvalue(idx), node); + if let Some(idx) = self.resolve_upvalue(self.contexts.len() - 1, ident) { + self.push_op(Op::GetUpvalue, node); + self.push_uvarint(idx.0 as u64); return; } @@ -731,7 +741,7 @@ impl Compiler<'_, '_> { self.thunk(slot, node, |c, _| { c.context_mut().captures_with_stack = true; c.emit_constant(ident.into(), node); - c.push_op(OpCode::OpResolveWith, node); + c.push_op(Op::ResolveWith, node); }); return; } @@ -742,18 +752,17 @@ impl Compiler<'_, '_> { LocalPosition::Known(idx) => { let stack_idx = self.scope().stack_index(idx); - self.push_op(OpCode::OpGetLocal(stack_idx), node); + self.push_op(Op::GetLocal, node); + self.push_uvarint(stack_idx.0 as u64); } // This identifier is referring to a value from the same scope which // is not yet defined. This identifier access must be thunked. LocalPosition::Recursive(idx) => self.thunk(slot, node, move |compiler, _| { - let upvalue_idx = compiler.add_upvalue( - compiler.contexts.len() - 1, - node, - UpvalueKind::Local(idx), - ); - compiler.push_op(OpCode::OpGetUpvalue(upvalue_idx), node); + let upvalue_idx = + compiler.add_upvalue(compiler.contexts.len() - 1, UpvalueKind::Local(idx)); + compiler.push_op(Op::GetUpvalue, node); + compiler.push_uvarint(upvalue_idx.0 as u64); }), }; } @@ -766,12 +775,7 @@ impl Compiler<'_, '_> { /// Private compiler helpers related to bindings. impl Compiler<'_, '_> { - fn resolve_upvalue<N: ToSpan>( - &mut self, - ctx_idx: usize, - name: &str, - node: &N, - ) -> Option<UpvalueIdx> { + fn resolve_upvalue(&mut self, ctx_idx: usize, name: &str) -> Option<UpvalueIdx> { if ctx_idx == 0 { // There can not be any upvalue at the outermost context. return None; @@ -784,7 +788,7 @@ impl Compiler<'_, '_> { // stack (i.e. in the right position) *during* their runtime // construction LocalPosition::Known(idx) | LocalPosition::Recursive(idx) => { - return Some(self.add_upvalue(ctx_idx, node, UpvalueKind::Local(idx))) + return Some(self.add_upvalue(ctx_idx, UpvalueKind::Local(idx))) } LocalPosition::Unknown => { /* continue below */ } @@ -792,19 +796,14 @@ impl Compiler<'_, '_> { // If the upvalue comes from even further up, we need to recurse to make // sure that the upvalues are created at each level. - if let Some(idx) = self.resolve_upvalue(ctx_idx - 1, name, node) { - return Some(self.add_upvalue(ctx_idx, node, UpvalueKind::Upvalue(idx))); + if let Some(idx) = self.resolve_upvalue(ctx_idx - 1, name) { + return Some(self.add_upvalue(ctx_idx, UpvalueKind::Upvalue(idx))); } None } - fn add_upvalue<N: ToSpan>( - &mut self, - ctx_idx: usize, - node: &N, - kind: UpvalueKind, - ) -> UpvalueIdx { + fn add_upvalue(&mut self, ctx_idx: usize, kind: UpvalueKind) -> UpvalueIdx { // If there is already an upvalue closing over the specified index, // retrieve that instead. for (idx, existing) in self.contexts[ctx_idx].scope.upvalues.iter().enumerate() { @@ -813,11 +812,7 @@ impl Compiler<'_, '_> { } } - let span = self.span_for(node); - self.contexts[ctx_idx] - .scope - .upvalues - .push(Upvalue { kind, span }); + self.contexts[ctx_idx].scope.upvalues.push(Upvalue { kind }); let idx = UpvalueIdx(self.contexts[ctx_idx].lambda.upvalue_count); self.contexts[ctx_idx].lambda.upvalue_count += 1; diff --git a/tvix/eval/src/compiler/import.rs b/tvix/eval/src/compiler/import.rs index 9036eec81731..862e792df566 100644 --- a/tvix/eval/src/compiler/import.rs +++ b/tvix/eval/src/compiler/import.rs @@ -65,6 +65,7 @@ async fn import_impl( globals .upgrade() .expect("globals dropped while still in use"), + None, &source, &file, &mut NoOpObserver::default(), diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index 60c55dda27b4..33b70b87ce84 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -20,16 +20,16 @@ mod scope; use codemap::Span; use rnix::ast::{self, AstToken}; +use rustc_hash::FxHashMap; use smol_str::SmolStr; -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; use std::path::{Path, PathBuf}; use std::rc::{Rc, Weak}; use crate::chunk::Chunk; use crate::errors::{CatchableErrorKind, Error, ErrorKind, EvalResult}; use crate::observer::CompilerObserver; -use crate::opcode::{CodeIdx, ConstantIdx, Count, JumpOffset, OpCode, UpvalueIdx}; -use crate::spans::LightSpan; +use crate::opcode::{CodeIdx, Op, Position, UpvalueIdx}; use crate::spans::ToSpan; use crate::value::{Closure, Formals, Lambda, NixAttrs, Thunk, Value}; use crate::warnings::{EvalWarning, WarningKind}; @@ -45,11 +45,6 @@ pub struct CompilationOutput { pub lambda: Rc<Lambda>, pub warnings: Vec<EvalWarning>, pub errors: Vec<Error>, - - // This field must outlive the rc::Weak reference which breaks the - // builtins -> import -> builtins reference cycle. For this - // reason, it must be passed to the VM. - pub globals: Rc<GlobalsMap>, } /// Represents the lambda currently being compiled. @@ -57,7 +52,6 @@ struct LambdaCtx { lambda: Lambda, scope: Scope, captures_with_stack: bool, - unthunk: bool, } impl LambdaCtx { @@ -66,7 +60,6 @@ impl LambdaCtx { lambda: Lambda::default(), scope: Default::default(), captures_with_stack: false, - unthunk: false, } } @@ -75,7 +68,6 @@ impl LambdaCtx { lambda: Lambda::default(), scope: self.scope.inherit(), captures_with_stack: false, - unthunk: false, } } } @@ -123,7 +115,7 @@ impl TrackedFormal { /// The map of globally available functions and other values that /// should implicitly be resolvable in the global scope. -pub(crate) type GlobalsMap = HashMap<&'static str, Value>; +pub type GlobalsMap = FxHashMap<&'static str, Value>; /// Set of builtins that (if they exist) should be made available in /// the global scope, meaning that they can be accessed not just @@ -193,6 +185,7 @@ impl<'source, 'observer> Compiler<'source, 'observer> { pub(crate) fn new( location: Option<PathBuf>, globals: Rc<GlobalsMap>, + env: Option<&FxHashMap<SmolStr, Value>>, source: &'source SourceCode, file: &'source codemap::File, observer: &'observer mut dyn CompilerObserver, @@ -228,7 +221,7 @@ impl<'source, 'observer> Compiler<'source, 'observer> { #[cfg(not(target_arch = "wasm32"))] debug_assert!(root_dir.is_absolute()); - Ok(Self { + let mut compiler = Self { root_dir, source, file, @@ -238,7 +231,13 @@ impl<'source, 'observer> Compiler<'source, 'observer> { warnings: vec![], errors: vec![], dead_scope: 0, - }) + }; + + if let Some(env) = env { + compiler.compile_env(env); + } + + Ok(compiler) } } @@ -268,13 +267,37 @@ impl Compiler<'_, '_> { /// Push a single instruction to the current bytecode chunk and /// track the source span from which it was compiled. - fn push_op<T: ToSpan>(&mut self, data: OpCode, node: &T) -> CodeIdx { + fn push_op<T: ToSpan>(&mut self, data: Op, node: &T) -> CodeIdx { if self.dead_scope > 0 { return CodeIdx(0); } let span = self.span_for(node); - self.chunk().push_op(data, span) + CodeIdx(self.chunk().push_op(data, span)) + } + + fn push_u8(&mut self, data: u8) { + if self.dead_scope > 0 { + return; + } + + self.chunk().code.push(data); + } + + fn push_uvarint(&mut self, data: u64) { + if self.dead_scope > 0 { + return; + } + + self.chunk().push_uvarint(data); + } + + fn push_u16(&mut self, data: u16) { + if self.dead_scope > 0 { + return; + } + + self.chunk().push_u16(data); } /// Emit a single constant to the current bytecode chunk and track @@ -285,7 +308,8 @@ impl Compiler<'_, '_> { } let idx = self.chunk().push_constant(value); - self.push_op(OpCode::OpConstant(idx), node); + self.push_op(Op::Constant, node); + self.push_uvarint(idx.0 as u64); } } @@ -398,7 +422,7 @@ impl Compiler<'_, '_> { Value::UnresolvedPath(Box::new(home_relative_path.into())), node, ); - self.push_op(OpCode::OpResolveHomePath, node); + self.push_op(Op::ResolveHomePath, node); return; } else if raw_path.starts_with('<') { // TODO: decide what to do with findFile @@ -414,7 +438,7 @@ impl Compiler<'_, '_> { // Make a thunk to resolve the path (without using `findFile`, at least for now?) return self.thunk(slot, node, move |c, _| { c.emit_constant(Value::UnresolvedPath(Box::new(path.into())), node); - c.push_op(OpCode::OpFindFile, node); + c.push_op(Op::FindFile, node); }); } else { let mut buf = self.root_dir.clone(); @@ -450,13 +474,15 @@ impl Compiler<'_, '_> { ast::InterpolPart::Interpolation(ipol) => { self.compile(slot, ipol.expr().unwrap()); // implicitly forces as well - self.push_op( - OpCode::OpCoerceToString(CoercionKind { - strong: false, - import_paths: true, - }), - ipol, - ); + self.push_op(Op::CoerceToString, ipol); + + let encoded: u8 = CoercionKind { + strong: false, + import_paths: true, + } + .into(); + + self.push_u8(encoded); } ast::InterpolPart::Literal(lit) => { @@ -466,7 +492,8 @@ impl Compiler<'_, '_> { } if parts.len() != 1 { - self.push_op(OpCode::OpInterpolate(Count(parts.len())), parent_node); + self.push_op(Op::Interpolate, parent_node); + self.push_uvarint(parts.len() as u64); } } @@ -492,8 +519,8 @@ impl Compiler<'_, '_> { self.emit_force(op); let opcode = match op.operator().unwrap() { - ast::UnaryOpKind::Invert => OpCode::OpInvert, - ast::UnaryOpKind::Negate => OpCode::OpNegate, + ast::UnaryOpKind::Invert => Op::Invert, + ast::UnaryOpKind::Negate => Op::Negate, }; self.push_op(opcode, op); @@ -524,21 +551,21 @@ impl Compiler<'_, '_> { self.emit_force(&op.rhs().unwrap()); match op.operator().unwrap() { - BinOpKind::Add => self.push_op(OpCode::OpAdd, op), - BinOpKind::Sub => self.push_op(OpCode::OpSub, op), - BinOpKind::Mul => self.push_op(OpCode::OpMul, op), - BinOpKind::Div => self.push_op(OpCode::OpDiv, op), - BinOpKind::Update => self.push_op(OpCode::OpAttrsUpdate, op), - BinOpKind::Equal => self.push_op(OpCode::OpEqual, op), - BinOpKind::Less => self.push_op(OpCode::OpLess, op), - BinOpKind::LessOrEq => self.push_op(OpCode::OpLessOrEq, op), - BinOpKind::More => self.push_op(OpCode::OpMore, op), - BinOpKind::MoreOrEq => self.push_op(OpCode::OpMoreOrEq, op), - BinOpKind::Concat => self.push_op(OpCode::OpConcat, op), + BinOpKind::Add => self.push_op(Op::Add, op), + BinOpKind::Sub => self.push_op(Op::Sub, op), + BinOpKind::Mul => self.push_op(Op::Mul, op), + BinOpKind::Div => self.push_op(Op::Div, op), + BinOpKind::Update => self.push_op(Op::AttrsUpdate, op), + BinOpKind::Equal => self.push_op(Op::Equal, op), + BinOpKind::Less => self.push_op(Op::Less, op), + BinOpKind::LessOrEq => self.push_op(Op::LessOrEq, op), + BinOpKind::More => self.push_op(Op::More, op), + BinOpKind::MoreOrEq => self.push_op(Op::MoreOrEq, op), + BinOpKind::Concat => self.push_op(Op::Concat, op), BinOpKind::NotEqual => { - self.push_op(OpCode::OpEqual, op); - self.push_op(OpCode::OpInvert, op) + self.push_op(Op::Equal, op); + self.push_op(Op::Invert, op) } // Handled by separate branch above. @@ -559,20 +586,22 @@ impl Compiler<'_, '_> { self.compile(slot, node.lhs().unwrap()); self.emit_force(&node.lhs().unwrap()); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), node); + let throw_idx = self.push_op(Op::JumpIfCatchable, node); + self.push_u16(0); // If this value is false, jump over the right-hand side - the // whole expression is false. - let end_idx = self.push_op(OpCode::OpJumpIfFalse(JumpOffset(0)), node); + let end_idx = self.push_op(Op::JumpIfFalse, node); + self.push_u16(0); // Otherwise, remove the previous value and leave the // right-hand side on the stack. Its result is now the value // of the whole expression. - self.push_op(OpCode::OpPop, node); + self.push_op(Op::Pop, node); self.compile(slot, node.rhs().unwrap()); self.emit_force(&node.rhs().unwrap()); self.patch_jump(end_idx); - self.push_op(OpCode::OpAssertBool, node); + self.push_op(Op::AssertBool, node); self.patch_jump(throw_idx); } @@ -587,16 +616,18 @@ impl Compiler<'_, '_> { self.compile(slot, node.lhs().unwrap()); self.emit_force(&node.lhs().unwrap()); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), node); + let throw_idx = self.push_op(Op::JumpIfCatchable, node); + self.push_u16(0); // Opposite of above: If this value is **true**, we can // short-circuit the right-hand side. - let end_idx = self.push_op(OpCode::OpJumpIfTrue(JumpOffset(0)), node); - self.push_op(OpCode::OpPop, node); + let end_idx = self.push_op(Op::JumpIfTrue, node); + self.push_u16(0); + self.push_op(Op::Pop, node); self.compile(slot, node.rhs().unwrap()); self.emit_force(&node.rhs().unwrap()); self.patch_jump(end_idx); - self.push_op(OpCode::OpAssertBool, node); + self.push_op(Op::AssertBool, node); self.patch_jump(throw_idx); } @@ -610,17 +641,20 @@ impl Compiler<'_, '_> { // Leave left-hand side value on the stack and invert it. self.compile(slot, node.lhs().unwrap()); self.emit_force(&node.lhs().unwrap()); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), node); - self.push_op(OpCode::OpInvert, node); + let throw_idx = self.push_op(Op::JumpIfCatchable, node); + self.push_u16(0); + self.push_op(Op::Invert, node); // Exactly as `||` (because `a -> b` = `!a || b`). - let end_idx = self.push_op(OpCode::OpJumpIfTrue(JumpOffset(0)), node); - self.push_op(OpCode::OpPop, node); + let end_idx = self.push_op(Op::JumpIfTrue, node); + self.push_u16(0); + + self.push_op(Op::Pop, node); self.compile(slot, node.rhs().unwrap()); self.emit_force(&node.rhs().unwrap()); self.patch_jump(end_idx); - self.push_op(OpCode::OpAssertBool, node); + self.push_op(Op::AssertBool, node); self.patch_jump(throw_idx); } @@ -655,11 +689,8 @@ impl Compiler<'_, '_> { self.scope_mut().mark_initialised(item_slot); } - if count == 0 { - self.unthunk(); - } - - self.push_op(OpCode::OpList(Count(count)), node); + self.push_op(Op::List, node); + self.push_uvarint(count as u64); self.scope_mut().end_scope(); } @@ -688,7 +719,7 @@ impl Compiler<'_, '_> { // next nested element, for all fragments except the last one. for (count, fragment) in node.attrpath().unwrap().attrs().enumerate() { if count > 0 { - self.push_op(OpCode::OpAttrsTrySelect, &fragment); + self.push_op(Op::AttrsTrySelect, &fragment); self.emit_force(&fragment); } @@ -697,7 +728,7 @@ impl Compiler<'_, '_> { // After the last fragment, emit the actual instruction that // leaves a boolean on the stack. - self.push_op(OpCode::OpHasAttr, node); + self.push_op(Op::HasAttr, node); } /// When compiling select or select_or expressions, an optimisation is @@ -721,8 +752,9 @@ impl Compiler<'_, '_> { // set that is lacking a key, because that thunk is never // evaluated). If anything is missing, just move on. We may // want to emit warnings here in the future. - if let Some(OpCode::OpConstant(ConstantIdx(idx))) = self.chunk().code.last().cloned() { - let constant = &mut self.chunk().constants[idx]; + if let Some((Op::Constant, op_idx)) = self.chunk().last_op() { + let (idx, _) = self.chunk().read_uvarint(op_idx + 1); + let constant = &mut self.chunk().constants[idx as usize]; if let Value::Attrs(attrs) = constant { let mut path_iter = path.attrs(); @@ -734,10 +766,6 @@ impl Compiler<'_, '_> { if let Some(ident) = expr_static_attr_str(&attr) { if let Some(selected_value) = attrs.select(ident.as_bytes()) { *constant = selected_value.clone(); - - // If this worked, we can unthunk the current thunk. - self.unthunk(); - return true; } } @@ -771,7 +799,7 @@ impl Compiler<'_, '_> { self.emit_force(&set); self.compile_attr(slot, &fragment); - self.push_op(OpCode::OpAttrsSelect, &fragment); + self.push_op(Op::AttrsSelect, &fragment); } } @@ -821,11 +849,13 @@ impl Compiler<'_, '_> { for fragment in path.attrs() { self.emit_force(&fragment); self.compile_attr(slot, &fragment.clone()); - self.push_op(OpCode::OpAttrsTrySelect, &fragment); - jumps.push(self.push_op(OpCode::OpJumpIfNotFound(JumpOffset(0)), &fragment)); + self.push_op(Op::AttrsTrySelect, &fragment); + jumps.push(self.push_op(Op::JumpIfNotFound, &fragment)); + self.push_u16(0); } - let final_jump = self.push_op(OpCode::OpJump(JumpOffset(0)), &path); + let final_jump = self.push_op(Op::Jump, &path); + self.push_u16(0); for jump in jumps { self.patch_jump(jump); @@ -853,17 +883,22 @@ impl Compiler<'_, '_> { // Compile the assertion condition to leave its value on the stack. self.compile(slot, node.condition().unwrap()); self.emit_force(&node.condition().unwrap()); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), node); - let then_idx = self.push_op(OpCode::OpJumpIfFalse(JumpOffset(0)), node); - self.push_op(OpCode::OpPop, node); + let throw_idx = self.push_op(Op::JumpIfCatchable, node); + self.push_u16(0); + + let then_idx = self.push_op(Op::JumpIfFalse, node); + self.push_u16(0); + + self.push_op(Op::Pop, node); self.compile(slot, node.body().unwrap()); - let else_idx = self.push_op(OpCode::OpJump(JumpOffset(0)), node); + let else_idx = self.push_op(Op::Jump, node); + self.push_u16(0); self.patch_jump(then_idx); - self.push_op(OpCode::OpPop, node); - self.push_op(OpCode::OpAssertFail, &node.condition().unwrap()); + self.push_op(Op::Pop, node); + self.push_op(Op::AssertFail, &node.condition().unwrap()); self.patch_jump(else_idx); self.patch_jump(throw_idx); @@ -885,22 +920,20 @@ impl Compiler<'_, '_> { self.compile(slot, node.condition().unwrap()); self.emit_force(&node.condition().unwrap()); - let throw_idx = self.push_op( - OpCode::OpJumpIfCatchable(JumpOffset(0)), - &node.condition().unwrap(), - ); - let then_idx = self.push_op( - OpCode::OpJumpIfFalse(JumpOffset(0)), - &node.condition().unwrap(), - ); + let throw_idx = self.push_op(Op::JumpIfCatchable, &node.condition().unwrap()); + self.push_u16(0); - self.push_op(OpCode::OpPop, node); // discard condition value + let then_idx = self.push_op(Op::JumpIfFalse, &node.condition().unwrap()); + self.push_u16(0); + + self.push_op(Op::Pop, node); // discard condition value self.compile(slot, node.body().unwrap()); - let else_idx = self.push_op(OpCode::OpJump(JumpOffset(0)), node); + let else_idx = self.push_op(Op::Jump, node); + self.push_u16(0); self.patch_jump(then_idx); // patch jump *to* else_body - self.push_op(OpCode::OpPop, node); // discard condition value + self.push_op(Op::Pop, node); // discard condition value self.compile(slot, node.else_body().unwrap()); self.patch_jump(else_idx); // patch jump *over* else body @@ -929,11 +962,12 @@ impl Compiler<'_, '_> { self.scope_mut().push_with(); - self.push_op(OpCode::OpPushWith(with_idx), &node.namespace().unwrap()); + self.push_op(Op::PushWith, &node.namespace().unwrap()); + self.push_uvarint(with_idx.0 as u64); self.compile(slot, node.body().unwrap()); - self.push_op(OpCode::OpPopWith, node); + self.push_op(Op::PopWith, node); self.scope_mut().pop_with(); self.cleanup_scope(node); } @@ -993,13 +1027,15 @@ impl Compiler<'_, '_> { // At call time, the attribute set is already at the top of the stack. self.scope_mut().mark_initialised(set_idx); self.emit_force(pattern); - let throw_idx = self.push_op(OpCode::OpJumpIfCatchable(JumpOffset(0)), pattern); + let throw_idx = self.push_op(Op::JumpIfCatchable, pattern); + self.push_u16(0); + // Evaluation fails on a type error, even if the argument(s) are unused. - self.push_op(OpCode::OpAssertAttrs, pattern); + self.push_op(Op::AssertAttrs, pattern); let ellipsis = pattern.ellipsis_token().is_some(); if !ellipsis { - self.push_op(OpCode::OpValidateClosedFormals, pattern); + self.push_op(Op::ValidateClosedFormals, pattern); } // Similar to `let ... in ...`, we now do multiple passes over @@ -1039,7 +1075,8 @@ impl Compiler<'_, '_> { // attempt to select from it. let stack_idx = self.scope().stack_index(set_idx); for tracked_formal in entries.iter() { - self.push_op(OpCode::OpGetLocal(stack_idx), pattern); + self.push_op(Op::GetLocal, pattern); + self.push_uvarint(stack_idx.0 as u64); self.emit_literal_ident(&tracked_formal.pattern_entry().ident().unwrap()); let idx = tracked_formal.local_idx(); @@ -1068,14 +1105,14 @@ impl Compiler<'_, '_> { // we only know better after compiling the default expression, so // avoiding unnecessary locals would mean we'd need to modify the chunk // after the fact. - self.push_op(OpCode::OpAttrsTrySelect, &pattern_entry.ident().unwrap()); - let jump_to_default = - self.push_op(OpCode::OpJumpIfNotFound(JumpOffset(0)), default_expr); + self.push_op(Op::AttrsTrySelect, &pattern_entry.ident().unwrap()); + let jump_to_default = self.push_op(Op::JumpIfNotFound, default_expr); + self.push_u16(0); self.emit_constant(Value::FinaliseRequest(false), default_expr); - let jump_over_default = - self.push_op(OpCode::OpJump(JumpOffset(0)), default_expr); + let jump_over_default = self.push_op(Op::Jump, default_expr); + self.push_u16(0); self.patch_jump(jump_to_default); @@ -1087,7 +1124,7 @@ impl Compiler<'_, '_> { self.patch_jump(jump_over_default); } TrackedFormal::NoDefault { pattern_entry, .. } => { - self.push_op(OpCode::OpAttrsSelect, &pattern_entry.ident().unwrap()); + self.push_op(Op::AttrsSelect, &pattern_entry.ident().unwrap()); } } @@ -1111,23 +1148,16 @@ impl Compiler<'_, '_> { let finalise_request_stack_idx = self.scope().stack_index(*finalise_request_idx); // TODO(sterni): better spans - self.push_op( - OpCode::OpGetLocal(finalise_request_stack_idx), - pattern - ); + self.push_op(Op::GetLocal, pattern); + self.push_uvarint(finalise_request_stack_idx.0 as u64); let jump_over_finalise = - self.push_op( - OpCode::OpJumpIfNoFinaliseRequest( - JumpOffset(0)), - pattern - ); - self.push_op( - OpCode::OpFinalise(stack_idx), - pattern, - ); + self.push_op(Op::JumpIfNoFinaliseRequest, pattern); + self.push_u16(0); + self.push_op(Op::Finalise, pattern); + self.push_uvarint(stack_idx.0 as u64); self.patch_jump(jump_over_finalise); // Get rid of finaliser request value on the stack - self.push_op(OpCode::OpPop, pattern); + self.push_op(Op::Pop, pattern); } } } @@ -1186,12 +1216,6 @@ impl Compiler<'_, '_> { }) } - /// Mark the current thunk as redundant, i.e. possible to merge directly - /// into its parent lambda context without affecting runtime behaviour. - fn unthunk(&mut self) { - self.context_mut().unthunk = true; - } - /// Compile an expression into a runtime closure or thunk fn compile_lambda_or_thunk<N, F>( &mut self, @@ -1220,31 +1244,15 @@ impl Compiler<'_, '_> { self.patch_jump(throw_idx); } - // TODO: determine and insert enclosing name, if available. - // Pop the lambda context back off, and emit the finished // lambda as a constant. let mut compiled = self.contexts.pop().unwrap(); - // The compiler might have decided to unthunk, i.e. raise the compiled - // code to the parent context. In that case we do so and return right - // away. - if compiled.unthunk && is_suspended_thunk { - self.chunk().extend(compiled.lambda.chunk); - return; - } - // Emit an instruction to inform the VM that the chunk has ended. compiled .lambda .chunk - .push_op(OpCode::OpReturn, self.span_for(node)); - - // Capturing the with stack counts as an upvalue, as it is - // emitted as an upvalue data instruction. - if compiled.captures_with_stack { - compiled.lambda.upvalue_count += 1; - } + .push_op(Op::Return, self.span_for(node)); let lambda = Rc::new(compiled.lambda); if is_suspended_thunk { @@ -1254,10 +1262,10 @@ impl Compiler<'_, '_> { } // If no upvalues are captured, emit directly and move on. - if lambda.upvalue_count == 0 { + if lambda.upvalue_count == 0 && !compiled.captures_with_stack { self.emit_constant( if is_suspended_thunk { - Value::Thunk(Thunk::new_suspended(lambda, LightSpan::new_actual(span))) + Value::Thunk(Thunk::new_suspended(lambda, span)) } else { Value::Closure(Rc::new(Closure::new(lambda))) }, @@ -1274,12 +1282,13 @@ impl Compiler<'_, '_> { let code_idx = self.push_op( if is_suspended_thunk { - OpCode::OpThunkSuspended(blueprint_idx) + Op::ThunkSuspended } else { - OpCode::OpThunkClosure(blueprint_idx) + Op::ThunkClosure }, node, ); + self.push_uvarint(blueprint_idx.0 as u64); self.emit_upvalue_data( outer_slot, @@ -1290,18 +1299,21 @@ impl Compiler<'_, '_> { if !is_suspended_thunk && !self.scope()[outer_slot].needs_finaliser { if !self.scope()[outer_slot].must_thunk { - // The closure has upvalues, but is not recursive. Therefore no thunk is required, - // which saves us the overhead of Rc<RefCell<>> - self.chunk()[code_idx] = OpCode::OpClosure(blueprint_idx); + // The closure has upvalues, but is not recursive. Therefore no + // thunk is required, which saves us the overhead of + // Rc<RefCell<>> + self.chunk().code[code_idx.0] = Op::Closure as u8; } else { - // This case occurs when a closure has upvalue-references to itself but does not need a - // finaliser. Since no OpFinalise will be emitted later on we synthesize one here. - // It is needed here only to set [`Closure::is_finalised`] which is used for sanity checks. + // This case occurs when a closure has upvalue-references to + // itself but does not need a finaliser. Since no OpFinalise + // will be emitted later on we synthesize one here. It is needed + // here only to set [`Closure::is_finalised`] which is used for + // sanity checks. #[cfg(debug_assertions)] - self.push_op( - OpCode::OpFinalise(self.scope().stack_index(outer_slot)), - &self.span_for(node), - ); + { + self.push_op(Op::Finalise, &self.span_for(node)); + self.push_uvarint(self.scope().stack_index(outer_slot).0 as u64); + } } } } @@ -1314,7 +1326,7 @@ impl Compiler<'_, '_> { self.compile(slot, node.argument().unwrap()); self.compile(slot, node.lambda().unwrap()); self.emit_force(&node.lambda().unwrap()); - self.push_op(OpCode::OpCall, node); + self.push_op(Op::Call, node); } /// Emit the data instructions that the runtime needs to correctly @@ -1322,10 +1334,18 @@ impl Compiler<'_, '_> { fn emit_upvalue_data<T: ToSpan>( &mut self, slot: LocalIdx, - node: &T, + _: &T, // TODO upvalues: Vec<Upvalue>, capture_with: bool, ) { + // Push the count of arguments to be expected, with one bit set to + // indicate whether the with stack needs to be captured. + let mut count = (upvalues.len() as u64) << 1; + if capture_with { + count |= 1; + } + self.push_uvarint(count); + for upvalue in upvalues { match upvalue.kind { UpvalueKind::Local(idx) => { @@ -1335,27 +1355,22 @@ impl Compiler<'_, '_> { // If the target is not yet initialised, we need to defer // the local access if !target.initialised { - self.push_op(OpCode::DataDeferredLocal(stack_idx), &upvalue.span); + self.push_uvarint(Position::deferred_local(stack_idx).0); self.scope_mut().mark_needs_finaliser(slot); } else { // a self-reference if slot == idx { self.scope_mut().mark_must_thunk(slot); } - self.push_op(OpCode::DataStackIdx(stack_idx), &upvalue.span); + self.push_uvarint(Position::stack_index(stack_idx).0); } } UpvalueKind::Upvalue(idx) => { - self.push_op(OpCode::DataUpvalueIdx(idx), &upvalue.span); + self.push_uvarint(Position::upvalue_index(idx).0); } }; } - - if capture_with { - // TODO(tazjin): probably better to emit span for the ident that caused this - self.push_op(OpCode::DataCaptureWith, node); - } } /// Emit the literal string value of an identifier. Required for @@ -1372,20 +1387,7 @@ impl Compiler<'_, '_> { /// not known at the time when the jump operation itself is /// emitted. fn patch_jump(&mut self, idx: CodeIdx) { - let offset = JumpOffset(self.chunk().code.len() - 1 - idx.0); - - match &mut self.chunk().code[idx.0] { - OpCode::OpJump(n) - | OpCode::OpJumpIfFalse(n) - | OpCode::OpJumpIfTrue(n) - | OpCode::OpJumpIfCatchable(n) - | OpCode::OpJumpIfNotFound(n) - | OpCode::OpJumpIfNoFinaliseRequest(n) => { - *n = offset; - } - - op => panic!("attempted to patch unsupported op: {:?}", op), - } + self.chunk().patch_jump(idx.0); } /// Decrease scope depth of the current function and emit @@ -1401,7 +1403,8 @@ impl Compiler<'_, '_> { } if popcount > 0 { - self.push_op(OpCode::OpCloseScope(Count(popcount)), node); + self.push_op(Op::CloseScope, node); + self.push_uvarint(popcount as u64); } } @@ -1459,16 +1462,7 @@ impl Compiler<'_, '_> { } fn emit_force<N: ToSpan>(&mut self, node: &N) { - if let Some(&OpCode::OpConstant(c)) = self.chunk().last_op() { - if !self.chunk().get_constant(c).unwrap().is_thunk() { - // Optimization: Don't emit a force op for non-thunk constants, since they don't - // need one! - // TODO: this is probably doable for more ops (?) - return; - } - } - - self.push_op(OpCode::OpForce, node); + self.push_op(Op::Force, node); } fn emit_warning<N: ToSpan>(&mut self, node: &N, kind: WarningKind) { @@ -1549,6 +1543,7 @@ fn compile_src_builtin( &parsed.tree().expr().unwrap(), None, weak.upgrade().unwrap(), + None, &source, &file, &mut crate::observer::NoOpObserver {}, @@ -1565,10 +1560,7 @@ fn compile_src_builtin( }); } - Ok(Value::Thunk(Thunk::new_suspended( - result.lambda, - LightSpan::Actual { span: file.span }, - ))) + Ok(Value::Thunk(Thunk::new_suspended(result.lambda, file.span))) }))) } @@ -1589,7 +1581,7 @@ pub fn prepare_globals( Rc::new_cyclic(Box::new(move |weak: &Weak<GlobalsMap>| { // First step is to construct the builtins themselves as // `NixAttrs`. - let mut builtins: GlobalsMap = HashMap::from_iter(builtins); + let mut builtins: GlobalsMap = FxHashMap::from_iter(builtins); // At this point, optionally insert `import` if enabled. To // "tie the knot" of `import` needing the full set of globals @@ -1602,7 +1594,7 @@ pub fn prepare_globals( // Next, the actual map of globals which the compiler will use // to resolve identifiers is constructed. - let mut globals: GlobalsMap = HashMap::new(); + let mut globals: GlobalsMap = FxHashMap::default(); // builtins contain themselves (`builtins.builtins`), which we // can resolve by manually constructing a suspended thunk that @@ -1655,11 +1647,12 @@ pub fn compile( expr: &ast::Expr, location: Option<PathBuf>, globals: Rc<GlobalsMap>, + env: Option<&FxHashMap<SmolStr, Value>>, source: &SourceCode, file: &codemap::File, observer: &mut dyn CompilerObserver, ) -> EvalResult<CompilationOutput> { - let mut c = Compiler::new(location, globals.clone(), source, file, observer)?; + let mut c = Compiler::new(location, globals.clone(), env, source, file, observer)?; let root_span = c.span_for(expr); let root_slot = c.scope_mut().declare_phantom(root_span, false); @@ -1670,7 +1663,13 @@ pub fn compile( // unevaluated state (though in practice, a value *containing* a // thunk might be returned). c.emit_force(expr); - c.push_op(OpCode::OpReturn, &root_span); + if let Some(env) = env { + if !env.is_empty() { + c.push_op(Op::CloseScope, &root_span); + c.push_uvarint(env.len() as u64); + } + } + c.push_op(Op::Return, &root_span); let lambda = Rc::new(c.contexts.pop().unwrap().lambda); c.observer.observe_compiled_toplevel(&lambda); @@ -1679,6 +1678,5 @@ pub fn compile( lambda, warnings: c.warnings, errors: c.errors, - globals, }) } diff --git a/tvix/eval/src/compiler/scope.rs b/tvix/eval/src/compiler/scope.rs index 892727c107c9..b8cd855ecde5 100644 --- a/tvix/eval/src/compiler/scope.rs +++ b/tvix/eval/src/compiler/scope.rs @@ -10,10 +10,8 @@ //! stack indices. To do this, the compiler simulates where locals //! will be at runtime using the data structures implemented here. -use std::{ - collections::{hash_map, HashMap}, - ops::Index, -}; +use rustc_hash::FxHashMap; +use std::{collections::hash_map, ops::Index}; use smol_str::SmolStr; @@ -38,7 +36,7 @@ pub struct Local { name: LocalName, /// Source span at which this local was declared. - pub span: codemap::Span, + pub span: Option<codemap::Span>, /// Scope depth of this local. pub depth: usize, @@ -73,6 +71,10 @@ impl Local { LocalName::Phantom => false, } } + + pub fn is_used(&self) -> bool { + self.depth == 0 || self.used || self.is_ignored() + } } /// Represents the current position of an identifier as resolved in a scope. @@ -103,7 +105,6 @@ pub enum UpvalueKind { #[derive(Clone, Debug)] pub struct Upvalue { pub kind: UpvalueKind, - pub span: codemap::Span, } /// The index of a local in the scope's local array at compile time. @@ -164,7 +165,7 @@ pub struct Scope { pub upvalues: Vec<Upvalue>, /// Secondary by-name index over locals. - by_name: HashMap<String, ByName>, + by_name: FxHashMap<String, ByName>, /// How many scopes "deep" are these locals? scope_depth: usize, @@ -240,7 +241,7 @@ impl Scope { let idx = self.locals.len(); self.locals.push(Local { initialised, - span, + span: Some(span), name: LocalName::Phantom, depth: self.scope_depth, needs_finaliser: false, @@ -263,7 +264,7 @@ impl Scope { let idx = LocalIdx(self.locals.len()); self.locals.push(Local { name: LocalName::Ident(name.clone()), - span, + span: Some(span), depth: self.scope_depth, initialised: false, needs_finaliser: false, @@ -286,6 +287,23 @@ impl Scope { (idx, shadowed) } + pub fn declare_constant(&mut self, name: String) -> LocalIdx { + let idx = LocalIdx(self.locals.len()); + self.locals.push(Local { + name: LocalName::Ident(name.clone()), + span: None, + depth: 0, + initialised: true, + used: false, + needs_finaliser: false, + must_thunk: false, + }); + // We don't need to worry about shadowing for constants; they're defined at the toplevel + // always + self.by_name.insert(name, ByName::Single(idx)); + idx + } + /// Mark local as initialised after compiling its expression. pub fn mark_initialised(&mut self, idx: LocalIdx) { self.locals[idx.0].initialised = true; @@ -348,8 +366,8 @@ impl Scope { // lifetime, and emit a warning otherwise (unless the // user explicitly chose to ignore it by prefixing the // identifier with `_`) - if !local.used && !local.is_ignored() { - unused_spans.push(local.span); + if !local.is_used() { + unused_spans.extend(local.span); } // remove the by-name index if this was a named local diff --git a/tvix/eval/src/errors.rs b/tvix/eval/src/errors.rs index e32cfa04ed40..9b5384690e46 100644 --- a/tvix/eval/src/errors.rs +++ b/tvix/eval/src/errors.rs @@ -35,96 +35,109 @@ use crate::{SourceCode, Value}; /// because Rust's magic `?`-syntax does not work on nested Result /// values like this. // TODO(amjoseph): investigate result<T,Either<CatchableErrorKind,ErrorKind>> -#[derive(Clone, Debug)] +#[derive(thiserror::Error, Clone, Debug)] pub enum CatchableErrorKind { + #[error("error thrown: {0}")] Throw(Box<str>), + + #[error("assertion failed")] AssertionFailed, + + #[error("feature {0} is not implemented yet")] UnimplementedFeature(Box<str>), + /// Resolving a user-supplied angle brackets path literal failed in some way. + #[error("Nix path entry could not be resolved: {0}")] NixPathResolution(Box<str>), } -impl Display for CatchableErrorKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - CatchableErrorKind::Throw(s) => write!(f, "error thrown: {}", s), - CatchableErrorKind::AssertionFailed => write!(f, "assertion failed"), - CatchableErrorKind::UnimplementedFeature(s) => { - write!(f, "feature {} is not implemented yet", s) - } - CatchableErrorKind::NixPathResolution(s) => { - write!(f, "Nix path entry could not be resolved: {}", s) - } - } - } -} - -#[derive(Clone, Debug)] +#[derive(thiserror::Error, Clone, Debug)] pub enum ErrorKind { /// These are user-generated errors through builtins. + #[error("evaluation aborted: {0}")] Abort(String), + #[error("division by zero")] DivisionByZero, - DuplicateAttrsKey { - key: String, - }, + #[error("attribute key '{key}' already defined")] + DuplicateAttrsKey { key: String }, /// Attempted to specify an invalid key type (e.g. integer) in a /// dynamic attribute name. + #[error( + "found attribute name '{0}' of type '{}', but attribute names must be strings", + .0.type_of() + )] InvalidAttributeName(Value), - AttributeNotFound { - name: String, - }, + #[error("attribute with name '{name}' could not be found in the set")] + AttributeNotFound { name: String }, /// Attempted to index into a list beyond its boundaries. - IndexOutOfBounds { - index: i64, - }, + #[error("list index '{index}' is out of bounds")] + IndexOutOfBounds { index: i64 }, /// Attempted to call `builtins.tail` on an empty list. + #[error("'tail' called on an empty list")] TailEmptyList, + #[error("expected value of type '{expected}', but found a '{actual}'")] TypeError { expected: &'static str, actual: &'static str, }, + #[error("can not compare a {lhs} with a {rhs}")] Incomparable { lhs: &'static str, rhs: &'static str, }, /// Resolving a user-supplied relative or home-relative path literal failed in some way. + #[error("could not resolve path: {0}")] RelativePathResolution(String), /// Dynamic keys are not allowed in some scopes. + #[error("dynamically evaluated keys are not allowed in {0}")] DynamicKeyInScope(&'static str), /// Unknown variable in statically known scope. + #[error("variable not found")] UnknownStaticVariable, /// Unknown variable in dynamic scope (with, rec, ...). + #[error( + r#"variable '{0}' could not be found + +Note that this occured within a `with`-expression. The problem may be related +to a missing value in the attribute set(s) included via `with`."# + )] UnknownDynamicVariable(String), /// User is defining the same variable twice at the same depth. - VariableAlreadyDefined(Span), + #[error("variable has already been defined")] + VariableAlreadyDefined(Option<Span>), /// Attempt to call something that is not callable. + #[error("only functions and builtins can be called, but this is a '{0}'")] NotCallable(&'static str), /// Infinite recursion encountered while forcing thunks. + #[error("infinite recursion encountered")] InfiniteRecursion { first_force: Span, suspended_at: Option<Span>, content_span: Option<Span>, }, + // Errors themselves ignored here & handled in Self::spans instead + #[error("failed to parse Nix code:")] ParseErrors(Vec<rnix::parser::ParseError>), /// An error occured while executing some native code (e.g. a /// builtin), and needs to be chained up. + #[error("while evaluating this as native code ({gen_type})")] NativeError { gen_type: &'static str, err: Box<Error>, @@ -132,31 +145,44 @@ pub enum ErrorKind { /// An error occured while executing Tvix bytecode, but needs to /// be chained up. + #[error("while evaluating this Nix code")] BytecodeError(Box<Error>), /// Given type can't be coerced to a string in the respective context + #[error("cannot ({}) coerce {from} to a string{}", + (if .kind.strong { "strongly" } else { "weakly" }), + (if *.from == "set" { + ", missing a `__toString` or `outPath` attribute" + } else { + "" + }) + )] NotCoercibleToString { from: &'static str, kind: CoercionKind, }, /// The given string doesn't represent an absolute path + #[error("string '{}' does not represent an absolute path", .0.to_string_lossy())] NotAnAbsolutePath(PathBuf), /// An error occurred when parsing an integer + #[error("invalid integer: {0}")] ParseIntError(ParseIntError), // Errors specific to nested attribute sets and merges thereof. /// Nested attributes can not be merged with an inherited value. - UnmergeableInherit { - name: SmolStr, - }, + #[error("cannot merge a nested attribute set into the inherited entry '{name}'")] + UnmergeableInherit { name: SmolStr }, /// Nested attributes can not be merged with values that are not /// literal attribute sets. + #[error("nested attribute sets or keys can only be merged with literal attribute sets")] UnmergeableValue, + // Errors themselves ignored here & handled in Self::spans instead /// Parse errors occured while importing a file. + #[error("parse errors occured while importing '{}'", .path.to_string_lossy())] ImportParseError { path: PathBuf, file: Arc<File>, @@ -164,44 +190,73 @@ pub enum ErrorKind { }, /// Compilation errors occured while importing a file. - ImportCompilerError { - path: PathBuf, - errors: Vec<Error>, - }, + #[error("compiler errors occured while importing '{}'", .path.to_string_lossy())] + ImportCompilerError { path: PathBuf, errors: Vec<Error> }, /// I/O errors + #[error("I/O error: {}", + ({ + let mut msg = String::new(); + + if let Some(path) = .path { + msg.push_str(&format!("{}: ", path.display())); + } + + msg.push_str(&.error.to_string()); + + msg + }) + )] IO { path: Option<PathBuf>, error: Rc<io::Error>, }, /// Errors parsing JSON, or serializing as JSON. + #[error("Error converting JSON to a Nix value or back: {0}")] JsonError(String), /// Nix value that can not be serialised to JSON. + #[error("a {0} cannot be converted to JSON")] NotSerialisableToJson(&'static str), /// Errors converting TOML to a value + #[error("Error converting TOML to a Nix value: {0}")] FromTomlError(String), /// An unexpected argument was supplied to a builtin + #[error("Unexpected agrument `{0}` passed to builtin")] UnexpectedArgumentBuiltin(NixString), /// An unexpected argument was supplied to a function that takes formal parameters - UnexpectedArgumentFormals { - arg: NixString, - formals_span: Span, - }, + #[error("Unexpected argument `{arg}` supplied to function")] + UnexpectedArgumentFormals { arg: NixString, formals_span: Span }, /// Invalid UTF-8 was encoutered somewhere + #[error("Invalid UTF-8 in string")] Utf8, + #[error("Invalid hash: {0}")] + InvalidHash(String), + /// Variant for errors that bubble up to eval from other Tvix /// components. + #[error("{0}")] TvixError(Rc<dyn error::Error>), /// Variant for code paths that are known bugs in Tvix (usually /// issues with the compiler/VM interaction). + #[error("{}", + ({ + let mut disp = format!("Tvix bug: {}", .msg); + + if let Some(metadata) = .metadata { + disp.push_str(&format!("; metadata: {:?}", metadata)); + } + + disp + }) + )] TvixBug { msg: &'static str, metadata: Option<Rc<dyn Debug>>, @@ -210,15 +265,18 @@ pub enum ErrorKind { /// Tvix internal warning for features triggered by users that are /// not actually implemented yet, and without which eval can not /// proceed. + #[error("feature not yet implemented in Tvix: {0}")] NotImplemented(&'static str), /// Internal variant which should disappear during error construction. + #[error("internal ErrorKind::WithContext variant leaked")] WithContext { context: String, underlying: Box<ErrorKind>, }, /// Unexpected context string + #[error("unexpected context string")] UnexpectedContext, /// Top-level evaluation result was a catchable Nix error, and @@ -227,10 +285,12 @@ pub enum ErrorKind { /// This variant **must** only be used at the top-level of /// tvix-eval when returning a result to the user, never inside of /// eval code. + #[error("{0}")] CatchableError(CatchableErrorKind), /// Invalid hash type specified, must be one of "md5", "sha1", "sha256" /// or "sha512" + #[error("unknown hash type '{0}'")] UnknownHashType(String), } @@ -334,211 +394,6 @@ impl Error { } } -impl Display for ErrorKind { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match &self { - ErrorKind::Abort(msg) => write!(f, "evaluation aborted: {}", msg), - - ErrorKind::DivisionByZero => write!(f, "division by zero"), - - ErrorKind::DuplicateAttrsKey { key } => { - write!(f, "attribute key '{}' already defined", key) - } - - ErrorKind::InvalidAttributeName(val) => write!( - f, - "found attribute name '{}' of type '{}', but attribute names must be strings", - val, - val.type_of() - ), - - ErrorKind::AttributeNotFound { name } => write!( - f, - "attribute with name '{}' could not be found in the set", - name - ), - - ErrorKind::IndexOutOfBounds { index } => { - write!(f, "list index '{}' is out of bounds", index) - } - - ErrorKind::TailEmptyList => write!(f, "'tail' called on an empty list"), - - ErrorKind::TypeError { expected, actual } => write!( - f, - "expected value of type '{}', but found a '{}'", - expected, actual - ), - - ErrorKind::Incomparable { lhs, rhs } => { - write!(f, "can not compare a {} with a {}", lhs, rhs) - } - - ErrorKind::RelativePathResolution(err) => { - write!(f, "could not resolve path: {}", err) - } - - ErrorKind::DynamicKeyInScope(scope) => { - write!(f, "dynamically evaluated keys are not allowed in {}", scope) - } - - ErrorKind::UnknownStaticVariable => write!(f, "variable not found"), - - ErrorKind::UnknownDynamicVariable(name) => write!( - f, - r#"variable '{}' could not be found - -Note that this occured within a `with`-expression. The problem may be related -to a missing value in the attribute set(s) included via `with`."#, - name - ), - - ErrorKind::VariableAlreadyDefined(_) => write!(f, "variable has already been defined"), - - ErrorKind::NotCallable(other_type) => { - write!( - f, - "only functions and builtins can be called, but this is a '{}'", - other_type - ) - } - - ErrorKind::InfiniteRecursion { .. } => write!(f, "infinite recursion encountered"), - - // Errors themselves ignored here & handled in Self::spans instead - ErrorKind::ParseErrors(_) => write!(f, "failed to parse Nix code:"), - - ErrorKind::NativeError { gen_type, .. } => { - write!(f, "while evaluating this as native code ({})", gen_type) - } - - ErrorKind::BytecodeError(_) => write!(f, "while evaluating this Nix code"), - - ErrorKind::NotCoercibleToString { kind, from } => { - let kindly = if kind.strong { "strongly" } else { "weakly" }; - - let hint = if *from == "set" { - ", missing a `__toString` or `outPath` attribute" - } else { - "" - }; - - write!(f, "cannot ({kindly}) coerce {from} to a string{hint}") - } - - ErrorKind::NotAnAbsolutePath(given) => { - write!( - f, - "string '{}' does not represent an absolute path", - given.to_string_lossy() - ) - } - - ErrorKind::ParseIntError(err) => { - write!(f, "invalid integer: {}", err) - } - - ErrorKind::UnmergeableInherit { name } => { - write!( - f, - "cannot merge a nested attribute set into the inherited entry '{}'", - name - ) - } - - ErrorKind::UnmergeableValue => { - write!( - f, - "nested attribute sets or keys can only be merged with literal attribute sets" - ) - } - - // Errors themselves ignored here & handled in Self::spans instead - ErrorKind::ImportParseError { path, .. } => { - write!( - f, - "parse errors occured while importing '{}'", - path.to_string_lossy() - ) - } - - ErrorKind::ImportCompilerError { path, .. } => { - writeln!( - f, - "compiler errors occured while importing '{}'", - path.to_string_lossy() - ) - } - - ErrorKind::IO { path, error } => { - write!(f, "I/O error: ")?; - if let Some(path) = path { - write!(f, "{}: ", path.display())?; - } - write!(f, "{error}") - } - - ErrorKind::JsonError(msg) => { - write!(f, "Error converting JSON to a Nix value or back: {msg}") - } - - ErrorKind::NotSerialisableToJson(_type) => { - write!(f, "a {} cannot be converted to JSON", _type) - } - - ErrorKind::FromTomlError(msg) => { - write!(f, "Error converting TOML to a Nix value: {msg}") - } - - ErrorKind::UnexpectedArgumentBuiltin(arg) => { - write!(f, "Unexpected agrument `{arg}` passed to builtin",) - } - - ErrorKind::UnexpectedArgumentFormals { arg, .. } => { - write!(f, "Unexpected argument `{arg}` supplied to function",) - } - - ErrorKind::Utf8 => { - write!(f, "Invalid UTF-8 in string") - } - - ErrorKind::TvixError(inner_error) => { - write!(f, "{inner_error}") - } - - ErrorKind::TvixBug { msg, metadata } => { - write!(f, "Tvix bug: {}", msg)?; - - if let Some(metadata) = metadata { - write!(f, "; metadata: {:?}", metadata)?; - } - - Ok(()) - } - - ErrorKind::NotImplemented(feature) => { - write!(f, "feature not yet implemented in Tvix: {}", feature) - } - - ErrorKind::WithContext { .. } => { - panic!("internal ErrorKind::WithContext variant leaked") - } - - ErrorKind::UnexpectedContext => { - write!(f, "unexpected context string") - } - - ErrorKind::CatchableError(inner) => { - write!(f, "{}", inner) - } - - ErrorKind::UnknownHashType(hash_type) => { - write!(f, "unknown hash type '{}'", hash_type) - } - } - } -} - impl Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.kind) @@ -824,6 +679,7 @@ impl Error { | ErrorKind::NotImplemented(_) | ErrorKind::WithContext { .. } | ErrorKind::UnknownHashType(_) + | ErrorKind::InvalidHash(_) | ErrorKind::CatchableError(_) => return None, }; @@ -870,6 +726,7 @@ impl Error { ErrorKind::Utf8 => "E038", ErrorKind::UnknownHashType(_) => "E039", ErrorKind::UnexpectedArgumentBuiltin { .. } => "E040", + ErrorKind::InvalidHash(_) => "E041", // Special error code for errors from other Tvix // components. We may want to introduce a code namespacing diff --git a/tvix/eval/src/io.rs b/tvix/eval/src/io.rs index 9acfd6eeba02..f30ef164c41a 100644 --- a/tvix/eval/src/io.rs +++ b/tvix/eval/src/io.rs @@ -26,7 +26,7 @@ use std::os::unix::ffi::OsStringExt; #[cfg(feature = "impure")] use std::fs::File; -/// Types of files as represented by `builtins.readDir` in Nix. +/// Types of files as represented by `builtins.readFileType` and `builtins.readDir` in Nix. #[derive(Debug)] pub enum FileType { Directory, @@ -35,6 +35,33 @@ pub enum FileType { Unknown, } +impl std::fmt::Display for FileType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let type_as_str = match &self { + FileType::Directory => "directory", + FileType::Regular => "regular", + FileType::Symlink => "symlink", + FileType::Unknown => "unknown", + }; + + write!(f, "{}", type_as_str) + } +} + +impl From<std::fs::FileType> for FileType { + fn from(value: std::fs::FileType) -> Self { + if value.is_file() { + Self::Regular + } else if value.is_dir() { + Self::Directory + } else if value.is_symlink() { + Self::Symlink + } else { + Self::Unknown + } + } +} + /// Represents all possible filesystem interactions that exist in the Nix /// language, and that need to be executed somehow. /// @@ -98,7 +125,8 @@ pub struct StdIO; #[cfg(feature = "impure")] impl EvalIO for StdIO { fn path_exists(&self, path: &Path) -> io::Result<bool> { - path.try_exists() + // In general, an IO error indicates the path doesn't exist + Ok(path.try_exists().unwrap_or(false)) } fn open(&self, path: &Path) -> io::Result<Box<dyn io::Read>> { @@ -106,7 +134,7 @@ impl EvalIO for StdIO { } fn file_type(&self, path: &Path) -> io::Result<FileType> { - let file_type = File::open(path)?.metadata()?.file_type(); + let file_type = std::fs::symlink_metadata(path)?; Ok(if file_type.is_dir() { FileType::Directory diff --git a/tvix/eval/src/lib.rs b/tvix/eval/src/lib.rs index 398da4d6e22e..a4ef3f01e40a 100644 --- a/tvix/eval/src/lib.rs +++ b/tvix/eval/src/lib.rs @@ -36,80 +36,342 @@ mod test_utils; #[cfg(test)] mod tests; +use rustc_hash::FxHashMap; use std::path::PathBuf; use std::rc::Rc; use std::str::FromStr; use std::sync::Arc; -use crate::compiler::GlobalsMap; use crate::observer::{CompilerObserver, RuntimeObserver}; use crate::value::Lambda; use crate::vm::run_lambda; // Re-export the public interface used by other crates. -pub use crate::compiler::{compile, prepare_globals, CompilationOutput}; +pub use crate::compiler::{compile, prepare_globals, CompilationOutput, GlobalsMap}; pub use crate::errors::{AddContext, CatchableErrorKind, Error, ErrorKind, EvalResult}; pub use crate::io::{DummyIO, EvalIO, FileType}; pub use crate::pretty_ast::pretty_print_expr; pub use crate::source::SourceCode; pub use crate::value::{NixContext, NixContextElement}; -pub use crate::vm::generators; +pub use crate::vm::{generators, EvalMode}; pub use crate::warnings::{EvalWarning, WarningKind}; pub use builtin_macros; +use smol_str::SmolStr; pub use crate::value::{Builtin, CoercionKind, NixAttrs, NixList, NixString, Value}; #[cfg(feature = "impure")] pub use crate::io::StdIO; +struct BuilderBuiltins { + builtins: Vec<(&'static str, Value)>, + src_builtins: Vec<(&'static str, &'static str)>, +} + +enum BuilderGlobals { + Builtins(BuilderBuiltins), + Globals(Rc<GlobalsMap>), +} + +/// Builder for building an [`Evaluation`]. +/// +/// Construct an [`EvaluationBuilder`] by calling one of: +/// +/// - [`Evaluation::builder`] / [`EvaluationBuilder::new`] +/// - [`Evaluation::builder_impure`] [`EvaluationBuilder::new_impure`] +/// - [`Evaluation::builder_pure`] [`EvaluationBuilder::new_pure`] +/// +/// Then configure the fields by calling the various methods on [`EvaluationBuilder`], and finally +/// call [`build`](Self::build) to construct an [`Evaluation`] +pub struct EvaluationBuilder<'co, 'ro, 'env, IO> { + source_map: Option<SourceCode>, + globals: BuilderGlobals, + env: Option<&'env FxHashMap<SmolStr, Value>>, + io_handle: IO, + enable_import: bool, + mode: EvalMode, + nix_path: Option<String>, + compiler_observer: Option<&'co mut dyn CompilerObserver>, + runtime_observer: Option<&'ro mut dyn RuntimeObserver>, +} + +impl<'co, 'ro, 'env, IO> EvaluationBuilder<'co, 'ro, 'env, IO> +where + IO: AsRef<dyn EvalIO> + 'static, +{ + /// Build an [`Evaluation`] based on the configuration in this builder. + /// + /// This: + /// + /// - Adds a `"storeDir"` builtin containing the store directory of the configured IO handle + /// - Sets up globals based on the configured builtins + /// - Copies all other configured fields to the [`Evaluation`] + pub fn build(self) -> Evaluation<'co, 'ro, 'env, IO> { + let source_map = self.source_map.unwrap_or_default(); + + let globals = match self.globals { + BuilderGlobals::Globals(globals) => globals, + BuilderGlobals::Builtins(BuilderBuiltins { + mut builtins, + src_builtins, + }) => { + // Insert a storeDir builtin *iff* a store directory is present. + if let Some(store_dir) = self.io_handle.as_ref().store_dir() { + builtins.push(("storeDir", store_dir.into())); + } + + crate::compiler::prepare_globals( + builtins, + src_builtins, + source_map.clone(), + self.enable_import, + ) + } + }; + + Evaluation { + source_map, + globals, + env: self.env, + io_handle: self.io_handle, + mode: self.mode, + nix_path: self.nix_path, + compiler_observer: self.compiler_observer, + runtime_observer: self.runtime_observer, + } + } +} + +// NOTE(aspen): The methods here are intentionally incomplete; feel free to add new ones (ideally +// with similar naming conventions to the ones already present) but don't expose fields publically! +impl<'co, 'ro, 'env, IO> EvaluationBuilder<'co, 'ro, 'env, IO> { + pub fn new(io_handle: IO) -> Self { + let mut builtins = builtins::pure_builtins(); + builtins.extend(builtins::placeholders()); // these are temporary + + Self { + source_map: None, + enable_import: false, + io_handle, + globals: BuilderGlobals::Builtins(BuilderBuiltins { + builtins, + src_builtins: vec![], + }), + env: None, + mode: Default::default(), + nix_path: None, + compiler_observer: None, + runtime_observer: None, + } + } + + pub fn io_handle<IO2>(self, io_handle: IO2) -> EvaluationBuilder<'co, 'ro, 'env, IO2> { + EvaluationBuilder { + io_handle, + source_map: self.source_map, + globals: self.globals, + env: self.env, + enable_import: self.enable_import, + mode: self.mode, + nix_path: self.nix_path, + compiler_observer: self.compiler_observer, + runtime_observer: self.runtime_observer, + } + } + + pub fn with_enable_import(self, enable_import: bool) -> Self { + Self { + enable_import, + ..self + } + } + + pub fn disable_import(self) -> Self { + self.with_enable_import(false) + } + + pub fn enable_import(self) -> Self { + self.with_enable_import(true) + } + + fn builtins_mut(&mut self) -> &mut BuilderBuiltins { + match &mut self.globals { + BuilderGlobals::Builtins(builtins) => builtins, + BuilderGlobals::Globals(_) => { + panic!("Cannot modify builtins on an EvaluationBuilder with globals configured") + } + } + } + + /// Add additional builtins (represented as tuples of name and [`Value`]) to this evaluation + /// builder. + /// + /// # Panics + /// + /// Panics if this evaluation builder has had globals set via [`with_globals`] + pub fn add_builtins<I>(mut self, builtins: I) -> Self + where + I: IntoIterator<Item = (&'static str, Value)>, + { + self.builtins_mut().builtins.extend(builtins); + self + } + + /// Add additional builtins that are implemented in Nix source code (represented as tuples of + /// name and nix source) to this evaluation builder. + /// + /// # Panics + /// + /// Panics if this evaluation builder has had globals set via [`with_globals`] + pub fn add_src_builtin(mut self, name: &'static str, src: &'static str) -> Self { + self.builtins_mut().src_builtins.push((name, src)); + self + } + + /// Set the globals for this evaluation builder to a previously-constructed globals map. + /// Intended to allow sharing globals across multiple evaluations (eg for the REPL). + /// + /// Discards any builtins previously configured via [`add_builtins`] and [`add_src_builtins`]. + /// If either of those methods is called on the evaluation builder after this one, they will + /// panic. + pub fn with_globals(self, globals: Rc<GlobalsMap>) -> Self { + Self { + globals: BuilderGlobals::Globals(globals), + ..self + } + } + + pub fn with_source_map(self, source_map: SourceCode) -> Self { + debug_assert!( + self.source_map.is_none(), + "Cannot set the source_map on an EvaluationBuilder twice" + ); + Self { + source_map: Some(source_map), + ..self + } + } + + pub fn mode(self, mode: EvalMode) -> Self { + Self { mode, ..self } + } + + pub fn nix_path(self, nix_path: Option<String>) -> Self { + Self { nix_path, ..self } + } + + pub fn env(self, env: Option<&'env FxHashMap<SmolStr, Value>>) -> Self { + Self { env, ..self } + } + + pub fn compiler_observer( + self, + compiler_observer: Option<&'co mut dyn CompilerObserver>, + ) -> Self { + Self { + compiler_observer, + ..self + } + } + + pub fn set_compiler_observer( + &mut self, + compiler_observer: Option<&'co mut dyn CompilerObserver>, + ) { + self.compiler_observer = compiler_observer; + } + + pub fn runtime_observer(self, runtime_observer: Option<&'ro mut dyn RuntimeObserver>) -> Self { + Self { + runtime_observer, + ..self + } + } + + pub fn set_runtime_observer(&mut self, runtime_observer: Option<&'ro mut dyn RuntimeObserver>) { + self.runtime_observer = runtime_observer; + } +} + +impl<'co, 'ro, 'env, IO> EvaluationBuilder<'co, 'ro, 'env, IO> { + pub fn source_map(&mut self) -> &SourceCode { + self.source_map.get_or_insert_with(SourceCode::default) + } +} + +impl<'co, 'ro, 'env> EvaluationBuilder<'co, 'ro, 'env, Box<dyn EvalIO>> { + /// Initialize an `Evaluation`, without the import statement available, and + /// all IO operations stubbed out. + pub fn new_pure() -> Self { + Self::new(Box::new(DummyIO) as Box<dyn EvalIO>).with_enable_import(false) + } + + #[cfg(feature = "impure")] + /// Configure an `Evaluation` to have impure features available + /// with the given I/O implementation. + /// + /// If no I/O implementation is supplied, [`StdIO`] is used by + /// default. + pub fn enable_impure(mut self, io: Option<Box<dyn EvalIO>>) -> Self { + self.io_handle = io.unwrap_or_else(|| Box::new(StdIO) as Box<dyn EvalIO>); + self.enable_import = true; + self.builtins_mut() + .builtins + .extend(builtins::impure_builtins()); + + // Make `NIX_PATH` resolutions work by default, unless the + // user already overrode this with something else. + if self.nix_path.is_none() { + self.nix_path = std::env::var("NIX_PATH").ok(); + } + self + } + + #[cfg(feature = "impure")] + /// Initialise an `Evaluation`, with all impure features turned on by default. + pub fn new_impure() -> Self { + Self::new_pure().enable_impure(None) + } +} + /// An `Evaluation` represents how a piece of Nix code is evaluated. It can be /// instantiated and configured directly, or it can be accessed through the /// various simplified helper methods available below. /// /// Public fields are intended to be set by the caller. Setting all /// fields is optional. -pub struct Evaluation<'co, 'ro, IO> { +pub struct Evaluation<'co, 'ro, 'env, IO> { /// Source code map used for error reporting. source_map: SourceCode, - /// Set of all builtins that should be available during the - /// evaluation. - /// - /// This defaults to all pure builtins. Users might want to add - /// the set of impure builtins, or other custom builtins. - pub builtins: Vec<(&'static str, Value)>, + /// Set of all global values available at the top-level scope + globals: Rc<GlobalsMap>, - /// Set of builtins that are implemented in Nix itself and should - /// be compiled and inserted in the builtins set. - pub src_builtins: Vec<(&'static str, &'static str)>, + /// Top-level variables to define in the evaluation + env: Option<&'env FxHashMap<SmolStr, Value>>, /// Implementation of file-IO to use during evaluation, e.g. for /// impure builtins. /// /// Defaults to [`DummyIO`] if not set explicitly. - pub io_handle: IO, - - /// Determines whether the `import` builtin should be made - /// available. Note that this depends on the `io_handle` being - /// able to read the files specified as arguments to `import`. - pub enable_import: bool, + io_handle: IO, - /// Determines whether the returned value should be strictly - /// evaluated, that is whether its list and attribute set elements - /// should be forced recursively. - pub strict: bool, + /// Specification for how to handle top-level values returned by evaluation + /// + /// See the documentation for [`EvalMode`] for more information. + mode: EvalMode, /// (optional) Nix search path, e.g. the value of `NIX_PATH` used /// for resolving items on the search path (such as `<nixpkgs>`). - pub nix_path: Option<String>, + nix_path: Option<String>, /// (optional) compiler observer for reporting on compilation /// details, like the emitted bytecode. - pub compiler_observer: Option<&'co mut dyn CompilerObserver>, + compiler_observer: Option<&'co mut dyn CompilerObserver>, /// (optional) runtime observer, for reporting on execution steps /// of Nix code. - pub runtime_observer: Option<&'ro mut dyn RuntimeObserver>, + runtime_observer: Option<&'ro mut dyn RuntimeObserver>, } /// Result of evaluating a piece of Nix code. If evaluation succeeded, a value @@ -131,73 +393,44 @@ pub struct EvaluationResult { pub expr: Option<rnix::ast::Expr>, } -impl<'co, 'ro, IO> Evaluation<'co, 'ro, IO> -where - IO: AsRef<dyn EvalIO> + 'static, -{ - /// Initialize an `Evaluation`. - pub fn new(io_handle: IO, enable_import: bool) -> Self { - let mut builtins = builtins::pure_builtins(); - builtins.extend(builtins::placeholders()); // these are temporary +impl<'co, 'ro, 'env, IO> Evaluation<'co, 'ro, 'env, IO> { + /// Make a new [builder][] for configuring an evaluation + /// + /// [builder]: EvaluationBuilder + pub fn builder(io_handle: IO) -> EvaluationBuilder<'co, 'ro, 'env, IO> { + EvaluationBuilder::new(io_handle) + } - Self { - source_map: SourceCode::default(), - enable_import, - io_handle, - builtins, - src_builtins: vec![], - strict: false, - nix_path: None, - compiler_observer: None, - runtime_observer: None, - } + /// Clone the reference to the map of Nix globals for this evaluation. If [`Value`]s are shared + /// across subsequent [`Evaluation`]s, it is important that those evaluations all have the same + /// underlying globals map. + pub fn globals(&self) -> Rc<GlobalsMap> { + self.globals.clone() } -} -impl<'co, 'ro> Evaluation<'co, 'ro, Box<dyn EvalIO>> { - /// Initialize an `Evaluation`, without the import statement available, and - /// all IO operations stubbed out. - pub fn new_pure() -> Self { - Self::new(Box::new(DummyIO) as Box<dyn EvalIO>, false) + /// Clone the reference to the contained source code map. This is used after an evaluation for + /// pretty error printing. Also, if [`Value`]s are shared across subsequent [`Evaluation`]s, it + /// is important that those evaluations all have the same underlying source code map. + pub fn source_map(&self) -> SourceCode { + self.source_map.clone() } +} +impl<'co, 'ro, 'env> Evaluation<'co, 'ro, 'env, Box<dyn EvalIO>> { #[cfg(feature = "impure")] - /// Configure an `Evaluation` to have impure features available - /// with the given I/O implementation. - /// - /// If no I/O implementation is supplied, [`StdIO`] is used by - /// default. - pub fn enable_impure(&mut self, io: Option<Box<dyn EvalIO>>) { - self.io_handle = io.unwrap_or_else(|| Box::new(StdIO) as Box<dyn EvalIO>); - self.enable_import = true; - self.builtins.extend(builtins::impure_builtins()); - - // Make `NIX_PATH` resolutions work by default, unless the - // user already overrode this with something else. - if self.nix_path.is_none() { - self.nix_path = std::env::var("NIX_PATH").ok(); - } + pub fn builder_impure() -> EvaluationBuilder<'co, 'ro, 'env, Box<dyn EvalIO>> { + EvaluationBuilder::new_impure() } - #[cfg(feature = "impure")] - /// Initialise an `Evaluation`, with all impure features turned on by default. - pub fn new_impure() -> Self { - let mut eval = Self::new_pure(); - eval.enable_impure(None); - eval + pub fn builder_pure() -> EvaluationBuilder<'co, 'ro, 'env, Box<dyn EvalIO>> { + EvaluationBuilder::new_pure() } } -impl<'co, 'ro, IO> Evaluation<'co, 'ro, IO> +impl<'co, 'ro, 'env, IO> Evaluation<'co, 'ro, 'env, IO> where IO: AsRef<dyn EvalIO> + 'static, { - /// Clone the reference to the contained source code map. This is used after - /// an evaluation for pretty error printing. - pub fn source_map(&self) -> SourceCode { - self.source_map.clone() - } - /// Only compile the provided source code, at an optional location of the /// source code (i.e. path to the file it was read from; used for error /// reporting, and for resolving relative paths in impure functions) @@ -227,9 +460,8 @@ where file, location, source, - self.builtins, - self.src_builtins, - self.enable_import, + self.globals, + self.env, compiler_observer, ); @@ -257,20 +489,14 @@ where let mut noop_observer = observer::NoOpObserver::default(); let compiler_observer = self.compiler_observer.take().unwrap_or(&mut noop_observer); - // Insert a storeDir builtin *iff* a store directory is present. - if let Some(store_dir) = self.io_handle.as_ref().store_dir() { - self.builtins.push(("storeDir", store_dir.into())); - } - - let (lambda, globals) = match parse_compile_internal( + let lambda = match parse_compile_internal( &mut result, code.as_ref(), file.clone(), location, source.clone(), - self.builtins, - self.src_builtins, - self.enable_import, + self.globals.clone(), + self.env, compiler_observer, ) { None => return result, @@ -302,9 +528,9 @@ where self.io_handle, runtime_observer, source.clone(), - globals, + self.globals, lambda, - self.strict, + self.mode, ); match vm_result { @@ -339,11 +565,10 @@ fn parse_compile_internal( file: Arc<codemap::File>, location: Option<PathBuf>, source: SourceCode, - builtins: Vec<(&'static str, Value)>, - src_builtins: Vec<(&'static str, &'static str)>, - enable_import: bool, + globals: Rc<GlobalsMap>, + env: Option<&FxHashMap<SmolStr, Value>>, compiler_observer: &mut dyn CompilerObserver, -) -> Option<(Rc<Lambda>, Rc<GlobalsMap>)> { +) -> Option<Rc<Lambda>> { let parsed = rnix::ast::Root::parse(code); let parse_errors = parsed.errors(); @@ -361,13 +586,11 @@ fn parse_compile_internal( // the result, in case the caller needs it for something. result.expr = parsed.tree().expr(); - let builtins = - crate::compiler::prepare_globals(builtins, src_builtins, source.clone(), enable_import); - let compiler_result = match compiler::compile( result.expr.as_ref().unwrap(), location, - builtins, + globals, + env, &source, &file, compiler_observer, @@ -390,5 +613,5 @@ fn parse_compile_internal( // Return the lambda (for execution) and the globals map (to // ensure the invariant that the globals outlive the runtime). - Some((compiler_result.lambda, compiler_result.globals)) + Some(compiler_result.lambda) } diff --git a/tvix/eval/src/observer.rs b/tvix/eval/src/observer.rs index f5de399315c7..5e6526418b3b 100644 --- a/tvix/eval/src/observer.rs +++ b/tvix/eval/src/observer.rs @@ -13,7 +13,7 @@ use tabwriter::TabWriter; use crate::chunk::Chunk; use crate::generators::VMRequest; -use crate::opcode::{CodeIdx, OpCode}; +use crate::opcode::{CodeIdx, Op}; use crate::value::Lambda; use crate::SourceCode; use crate::Value; @@ -73,7 +73,7 @@ pub trait RuntimeObserver { /// Called when the runtime *begins* executing an instruction. The /// provided stack is the state at the beginning of the operation. - fn observe_execute_op(&mut self, _ip: CodeIdx, _: &OpCode, _: &[Value]) {} + fn observe_execute_op(&mut self, _ip: CodeIdx, _: &Op, _: &[Value]) {} } #[derive(Default)] @@ -112,8 +112,12 @@ impl<W: Write> DisassemblingObserver<W> { // calculate width of the widest address in the chunk let width = format!("{:#x}", chunk.code.len() - 1).len(); - for (idx, _) in chunk.code.iter().enumerate() { - let _ = chunk.disassemble_op(&mut self.writer, &self.source, width, CodeIdx(idx)); + let mut idx = 0; + while idx < chunk.code.len() { + let size = chunk + .disassemble_op(&mut self.writer, &self.source, width, CodeIdx(idx)) + .expect("writing debug output should work"); + idx += size; } } } @@ -304,7 +308,7 @@ impl<W: Write> RuntimeObserver for TracingObserver<W> { ); } - fn observe_execute_op(&mut self, ip: CodeIdx, op: &OpCode, stack: &[Value]) { + fn observe_execute_op(&mut self, ip: CodeIdx, op: &Op, stack: &[Value]) { self.maybe_write_time(); let _ = write!(&mut self.writer, "{:04} {:?}\t", ip.0, op); self.write_stack(stack); diff --git a/tvix/eval/src/opcode.rs b/tvix/eval/src/opcode.rs index f89c1c12e7fd..ddf1304b3aea 100644 --- a/tvix/eval/src/opcode.rs +++ b/tvix/eval/src/opcode.rs @@ -52,8 +52,7 @@ pub struct JumpOffset(pub usize); #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct Count(pub usize); -/// All variants of this enum carry a bounded amount of data to -/// ensure that no heap allocations are needed for an Opcode. +/// Op represents all instructions in the Tvix abstract machine. /// /// In documentation comments, stack positions are referred to by /// indices written in `{}` as such, where required: @@ -70,187 +69,182 @@ pub struct Count(pub usize); /// /// Unless otherwise specified, operations leave their result at the /// top of the stack. -#[warn(variant_size_differences)] -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub enum OpCode { +#[repr(u8)] +#[derive(Debug, PartialEq, Eq)] +pub enum Op { /// Push a constant onto the stack. - OpConstant(ConstantIdx), + Constant, - // Unary operators - /// Discard a value from the stack. - OpPop, + /// Discard the value on top of the stack. + Pop, /// Invert the boolean at the top of the stack. - OpInvert, + Invert, - // Binary operators /// Invert the sign of the number at the top of the stack. - OpNegate, + Negate, /// Sum up the two numbers at the top of the stack. - OpAdd, + Add, /// Subtract the number at {1} from the number at {2}. - OpSub, + Sub, /// Multiply the two numbers at the top of the stack. - OpMul, + Mul, /// Divide the two numbers at the top of the stack. - OpDiv, + Div, - // Comparison operators /// Check the two values at the top of the stack for Nix-equality. - OpEqual, + Equal, /// Check whether the value at {2} is less than {1}. - OpLess, + Less, /// Check whether the value at {2} is less than or equal to {1}. - OpLessOrEq, + LessOrEq, /// Check whether the value at {2} is greater than {1}. - OpMore, + More, /// Check whether the value at {2} is greater than or equal to {1}. - OpMoreOrEq, + MoreOrEq, - // Logical operators & generic jumps /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand. - OpJump(JumpOffset), + Jump, /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand, *if* the value at the top /// of the stack is `true`. - OpJumpIfTrue(JumpOffset), + JumpIfTrue, /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand, *if* the value at the top /// of the stack is `false`. - OpJumpIfFalse(JumpOffset), + JumpIfFalse, /// Pop one stack item and jump forward in the bytecode /// specified by the number of instructions in its usize /// operand, *if* the value at the top of the stack is a /// Value::Catchable. - OpJumpIfCatchable(JumpOffset), + JumpIfCatchable, /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand, *if* the value at the top /// of the stack is the internal value representing a missing /// attribute set key. - OpJumpIfNotFound(JumpOffset), + JumpIfNotFound, /// Jump forward in the bytecode specified by the number of /// instructions in its usize operand, *if* the value at the top /// of the stack is *not* the internal value requesting a /// stack value finalisation. - OpJumpIfNoFinaliseRequest(JumpOffset), + JumpIfNoFinaliseRequest, + + /// Construct an attribute set from the given number of key-value pairs on + /// the top of the stack. The operand gives the count of *pairs*, not the + /// number of *stack values* - the actual number of values popped off the + /// stack will be twice the argument to this op. + Attrs, - // Attribute sets - /// Construct an attribute set from the given number of key-value pairs on the top of the stack - /// - /// Note that this takes the count of *pairs*, not the number of *stack values* - the actual - /// number of values popped off the stack will be twice the argument to this op - OpAttrs(Count), /// Merge the attribute set at {2} into the attribute set at {1}, /// and leave the new set at the top of the stack. - OpAttrsUpdate, + AttrsUpdate, /// Select the attribute with the name at {1} from the set at {2}. - OpAttrsSelect, + AttrsSelect, /// Select the attribute with the name at {1} from the set at {2}, but leave /// a `Value::AttrNotFound` in the stack instead of failing if it is /// missing. - OpAttrsTrySelect, + AttrsTrySelect, /// Check for the presence of the attribute with the name at {1} in the set /// at {2}. - OpHasAttr, + HasAttr, /// Throw an error if the attribute set at the top of the stack has any attributes /// other than those listed in the formals of the current lambda /// /// Panics if the current frame is not a lambda with formals - OpValidateClosedFormals, + ValidateClosedFormals, - // `with`-handling /// Push a value onto the runtime `with`-stack to enable dynamic identifier /// resolution. The absolute stack index of the value is supplied as a usize /// operand. - OpPushWith(StackIdx), + PushWith, /// Pop the last runtime `with`-stack element. - OpPopWith, + PopWith, /// Dynamically resolve an identifier with the name at {1} from the runtime /// `with`-stack. - OpResolveWith, + ResolveWith, // Lists /// Construct a list from the given number of values at the top of the /// stack. - OpList(Count), + List, /// Concatenate the lists at {2} and {1}. - OpConcat, + Concat, // Strings /// Interpolate the given number of string fragments into a single string. - OpInterpolate(Count), + Interpolate, /// Force the Value on the stack and coerce it to a string - OpCoerceToString(crate::CoercionKind), + CoerceToString, // Paths /// Attempt to resolve the Value on the stack using the configured [`NixSearchPath`][] /// /// [`NixSearchPath`]: crate::nix_search_path::NixSearchPath - OpFindFile, + FindFile, /// Attempt to resolve a path literal relative to the home dir - OpResolveHomePath, + ResolveHomePath, // Type assertion operators /// Assert that the value at {1} is a boolean, and fail with a runtime error /// otherwise. - OpAssertBool, - OpAssertAttrs, + AssertBool, + AssertAttrs, /// Access local identifiers with statically known positions. - OpGetLocal(StackIdx), + GetLocal, /// Close scopes while leaving their expression value around. - OpCloseScope(Count), // number of locals to pop + CloseScope, /// Return an error indicating that an `assert` failed - OpAssertFail, + AssertFail, // Lambdas & closures /// Call the value at {1} in a new VM callframe - OpCall, + Call, /// Retrieve the upvalue at the given index from the closure or thunk /// currently under evaluation. - OpGetUpvalue(UpvalueIdx), + GetUpvalue, /// Construct a closure which has upvalues but no self-references - OpClosure(ConstantIdx), + Closure, /// Construct a closure which has self-references (direct or via upvalues) - OpThunkClosure(ConstantIdx), + ThunkClosure, /// Construct a suspended thunk, used to delay a computation for laziness. - OpThunkSuspended(ConstantIdx), + ThunkSuspended, /// Force the value at {1} until it is a `Thunk::Evaluated`. - OpForce, + Force, /// Finalise initialisation of the upvalues of the value in the given stack /// index (which must be a Value::Thunk) after the scope is fully bound. - OpFinalise(StackIdx), + Finalise, /// Final instruction emitted in a chunk. Does not have an /// inherent effect, but can simplify VM logic as a marker in some @@ -258,27 +252,140 @@ pub enum OpCode { /// /// Can be thought of as "returning" the value to the parent /// frame, hence the name. - OpReturn, - - // [`OpClosure`], [`OpThunkSuspended`], and [`OpThunkClosure`] have a - // variable number of arguments to the instruction, which is - // represented here by making their data part of the opcodes. - // Each of these two opcodes has a `ConstantIdx`, which must - // reference a `Value::Blueprint(Lambda)`. The `upvalue_count` - // field in that `Lambda` indicates the number of arguments it - // takes, and the opcode must be followed by exactly this number - // of `Data*` opcodes. The VM skips over these by advancing the - // instruction pointer. - // - // It is illegal for a `Data*` opcode to appear anywhere else. - /// Populate a static upvalue by copying from the stack immediately. - DataStackIdx(StackIdx), - /// Populate a static upvalue of a thunk by copying it the stack, but do - /// when the thunk is finalised (by OpFinalise) rather than immediately. - DataDeferredLocal(StackIdx), - /// Populate a static upvalue by copying it from the upvalues of an - /// enclosing scope. - DataUpvalueIdx(UpvalueIdx), - /// Populate dynamic upvalues by saving a copy of the with-stack. - DataCaptureWith, + Return, + + /// Sentinel value to signal invalid bytecode. This MUST always be the last + /// value in the enum. Do not move it! + Invalid, +} + +const _ASSERT_SMALL_OP: () = assert!(std::mem::size_of::<Op>() == 1); + +impl From<u8> for Op { + fn from(num: u8) -> Self { + if num >= Self::Invalid as u8 { + return Self::Invalid; + } + + // SAFETY: As long as `Invalid` remains the last variant of the enum, + // and as long as variant values are not specified manually, this + // conversion is safe. + unsafe { std::mem::transmute(num) } + } +} + +pub enum OpArg { + None, + Uvarint, + Fixed, + Custom, +} + +impl Op { + pub fn arg_type(&self) -> OpArg { + match self { + Op::Constant + | Op::Attrs + | Op::PushWith + | Op::List + | Op::Interpolate + | Op::GetLocal + | Op::CloseScope + | Op::GetUpvalue + | Op::Finalise => OpArg::Uvarint, + + Op::Jump + | Op::JumpIfTrue + | Op::JumpIfFalse + | Op::JumpIfCatchable + | Op::JumpIfNotFound + | Op::JumpIfNoFinaliseRequest => OpArg::Fixed, + + Op::CoerceToString | Op::Closure | Op::ThunkClosure | Op::ThunkSuspended => { + OpArg::Custom + } + _ => OpArg::None, + } + } +} + +/// Position is used to represent where to capture an upvalue from. +#[derive(Clone, Copy)] +pub struct Position(pub u64); + +impl Position { + pub fn stack_index(idx: StackIdx) -> Self { + Position((idx.0 as u64) << 2) + } + + pub fn deferred_local(idx: StackIdx) -> Self { + Position(((idx.0 as u64) << 2) | 1) + } + + pub fn upvalue_index(idx: UpvalueIdx) -> Self { + Position(((idx.0 as u64) << 2) | 2) + } + + pub fn runtime_stack_index(&self) -> Option<StackIdx> { + if (self.0 & 0b11) == 0 { + return Some(StackIdx((self.0 >> 2) as usize)); + } + + None + } + + pub fn runtime_deferred_local(&self) -> Option<StackIdx> { + if (self.0 & 0b11) == 1 { + return Some(StackIdx((self.0 >> 2) as usize)); + } + + None + } + + pub fn runtime_upvalue_index(&self) -> Option<UpvalueIdx> { + if (self.0 & 0b11) == 2 { + return Some(UpvalueIdx((self.0 >> 2) as usize)); + } + + None + } +} + +#[cfg(test)] +mod position_tests { + use super::Position; // he-he + use super::{StackIdx, UpvalueIdx}; + + #[test] + fn test_stack_index_position() { + let idx = StackIdx(42); + let pos = Position::stack_index(idx); + let result = pos.runtime_stack_index(); + + assert_eq!(result, Some(idx)); + assert_eq!(pos.runtime_deferred_local(), None); + assert_eq!(pos.runtime_upvalue_index(), None); + } + + #[test] + fn test_deferred_local_position() { + let idx = StackIdx(42); + let pos = Position::deferred_local(idx); + let result = pos.runtime_deferred_local(); + + assert_eq!(result, Some(idx)); + assert_eq!(pos.runtime_stack_index(), None); + assert_eq!(pos.runtime_upvalue_index(), None); + } + + #[test] + fn test_upvalue_index_position() { + let idx = UpvalueIdx(42); + let pos = Position::upvalue_index(idx); + let result = pos.runtime_upvalue_index(); + + assert_eq!(result, Some(idx)); + assert_eq!(pos.runtime_stack_index(), None); + assert_eq!(pos.runtime_deferred_local(), None); + } } diff --git a/tvix/eval/src/spans.rs b/tvix/eval/src/spans.rs index f422093b0d12..df2b9a725562 100644 --- a/tvix/eval/src/spans.rs +++ b/tvix/eval/src/spans.rs @@ -5,37 +5,6 @@ use codemap::{File, Span}; use rnix::ast; use rowan::ast::AstNode; -/// Helper struct to carry information required for making a span, but -/// without actually performing the (expensive) span lookup. -/// -/// This is used for tracking spans across thunk boundaries, as they -/// are frequently instantiated but spans are only used in error or -/// warning cases. -#[derive(Clone, Debug)] -pub enum LightSpan { - /// The span has already been computed and can just be used right - /// away. - Actual { span: Span }, -} - -impl LightSpan { - pub fn new_actual(span: Span) -> Self { - Self::Actual { span } - } - - pub fn span(&self) -> Span { - match self { - LightSpan::Actual { span } => *span, - } - } -} - -impl From<Span> for LightSpan { - fn from(span: Span) -> Self { - LightSpan::Actual { span } - } -} - /// Trait implemented by all types from which we can retrieve a span. pub trait ToSpan { fn span_for(&self, file: &File) -> Span; @@ -66,6 +35,33 @@ impl ToSpan for rnix::SyntaxNode { } } +/// A placeholder [`ToSpan`] implementation covering the entire source file. +#[derive(Debug, Clone, Copy)] +pub struct EntireFile; + +impl ToSpan for EntireFile { + fn span_for(&self, file: &File) -> Span { + file.span + } +} + +/// A placeholder [`ToSpan`] implementation which falls back to the entire file if its wrapped value +/// is [`None`] +#[derive(Debug, Clone, Copy)] +pub struct OrEntireFile<T>(pub Option<T>); + +impl<T> ToSpan for OrEntireFile<T> +where + T: ToSpan, +{ + fn span_for(&self, file: &File) -> Span { + match &self.0 { + Some(t) => t.span_for(file), + None => EntireFile.span_for(file), + } + } +} + /// Generates a `ToSpan` implementation for a type implementing /// `rowan::AstNode`. This is impossible to do as a blanket /// implementation because `rustc` forbids these implementations for diff --git a/tvix/eval/src/tests/nix_tests.rs b/tvix/eval/src/tests/nix_tests.rs index 17968e4bdb09..b17fe98bc9e4 100644 --- a/tvix/eval/src/tests/nix_tests.rs +++ b/tvix/eval/src/tests/nix_tests.rs @@ -37,6 +37,8 @@ mod mock_builtins { #[cfg(feature = "impure")] fn eval_test(code_path: PathBuf, expect_success: bool) { + use crate::vm::EvalMode; + std::env::set_var("TEST_VAR", "foo"); // for eval-okay-getenv.nix eprintln!("path: {}", code_path.display()); @@ -48,9 +50,10 @@ fn eval_test(code_path: PathBuf, expect_success: bool) { let code = std::fs::read_to_string(&code_path).expect("should be able to read test code"); - let mut eval = crate::Evaluation::new_impure(); - eval.strict = true; - eval.builtins.extend(mock_builtins::builtins()); + let eval = crate::Evaluation::builder_impure() + .mode(EvalMode::Strict) + .add_builtins(mock_builtins::builtins()) + .build(); let result = eval.evaluate(code, Some(code_path.clone())); let failed = match result.value { @@ -124,12 +127,14 @@ fn eval_test(code_path: PathBuf, expect_success: bool) { #[cfg(feature = "impure")] #[rstest] fn identity(#[files("src/tests/tvix_tests/identity-*.nix")] code_path: PathBuf) { - use crate::EvalIO; + use crate::{vm::EvalMode, EvalIO}; let code = std::fs::read_to_string(code_path).expect("should be able to read test code"); - let mut eval = crate::Evaluation::new(Box::new(crate::StdIO) as Box<dyn EvalIO>, false); - eval.strict = true; + let eval = crate::Evaluation::builder(Box::new(crate::StdIO) as Box<dyn EvalIO>) + .disable_import() + .mode(EvalMode::Strict) + .build(); let result = eval.evaluate(&code, None); assert!( diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-readDir.exp b/tvix/eval/src/tests/nix_tests/eval-okay-readDir.exp index 6413f6d4f9ec..6413f6d4f9ec 100644 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-readDir.exp +++ b/tvix/eval/src/tests/nix_tests/eval-okay-readDir.exp diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-readDir.nix.disabled b/tvix/eval/src/tests/nix_tests/eval-okay-readDir.nix index a7ec9292aae2..a7ec9292aae2 100644 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-readDir.nix.disabled +++ b/tvix/eval/src/tests/nix_tests/eval-okay-readDir.nix diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-readFileType.exp b/tvix/eval/src/tests/nix_tests/eval-okay-readFileType.exp index 6413f6d4f9ec..6413f6d4f9ec 100644 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-readFileType.exp +++ b/tvix/eval/src/tests/nix_tests/eval-okay-readFileType.exp diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-readFileType.nix b/tvix/eval/src/tests/nix_tests/eval-okay-readFileType.nix index 174fb6c3a028..174fb6c3a028 100644 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/eval-okay-readFileType.nix +++ b/tvix/eval/src/tests/nix_tests/eval-okay-readFileType.nix diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/readDir/bar b/tvix/eval/src/tests/nix_tests/readDir/bar index e69de29bb2d1..e69de29bb2d1 100644 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/readDir/bar +++ b/tvix/eval/src/tests/nix_tests/readDir/bar diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/readDir/foo/git-hates-directories b/tvix/eval/src/tests/nix_tests/readDir/foo/git-hates-directories index e69de29bb2d1..e69de29bb2d1 100644 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/readDir/foo/git-hates-directories +++ b/tvix/eval/src/tests/nix_tests/readDir/foo/git-hates-directories diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/readDir/ldir b/tvix/eval/src/tests/nix_tests/readDir/ldir index 19102815663d..19102815663d 120000 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/readDir/ldir +++ b/tvix/eval/src/tests/nix_tests/readDir/ldir diff --git a/tvix/eval/src/tests/nix_tests/notyetpassing/readDir/linked b/tvix/eval/src/tests/nix_tests/readDir/linked index c503f86a0cf7..c503f86a0cf7 120000 --- a/tvix/eval/src/tests/nix_tests/notyetpassing/readDir/linked +++ b/tvix/eval/src/tests/nix_tests/readDir/linked diff --git a/tvix/eval/src/tests/one_offs.rs b/tvix/eval/src/tests/one_offs.rs index 21e9144baf64..49ed713fc96c 100644 --- a/tvix/eval/src/tests/one_offs.rs +++ b/tvix/eval/src/tests/one_offs.rs @@ -5,8 +5,9 @@ fn test_source_builtin() { // Test an evaluation with a source-only builtin. The test ensures // that the artificially constructed thunking is correct. - let mut eval = Evaluation::new_pure(); - eval.src_builtins.push(("testSourceBuiltin", "42")); + let eval = Evaluation::builder_pure() + .add_src_builtin("testSourceBuiltin", "42") + .build(); let result = eval.evaluate("builtins.testSourceBuiltin", None); assert!( @@ -25,7 +26,9 @@ fn test_source_builtin() { #[test] fn skip_broken_bytecode() { - let result = Evaluation::new_pure().evaluate(/* code = */ "x", None); + let result = Evaluation::builder_pure() + .build() + .evaluate(/* code = */ "x", None); assert_eq!(result.errors.len(), 1); diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-to-json-propagate-catchable.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-to-json-propagate-catchable.exp index ca00e3c049d6..5f97039f94d1 100644 --- a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-to-json-propagate-catchable.exp +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-to-json-propagate-catchable.exp @@ -1 +1 @@ -[ false false false false ] +[ false false false false false ] diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-to-json-propagate-catchable.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-to-json-propagate-catchable.nix index 8ae5e48e9737..56fd1deb7b1b 100644 --- a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-to-json-propagate-catchable.nix +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-to-json-propagate-catchable.nix @@ -10,5 +10,8 @@ map (e: (builtins.tryEval (builtins.toJSON e)).success) [ x = 32; y = builtins.throw "second argument"; } + { + __toString = _: builtins.throw "__toString a"; + } # FIXME(raitobezarius): we would like to test coercions, i.e. `toFile` and `derivation` containing throwables. ] diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-path-exists-child-of-file.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-path-exists-child-of-file.exp new file mode 100644 index 000000000000..c508d5366f70 --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-path-exists-child-of-file.exp @@ -0,0 +1 @@ +false diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-path-exists-child-of-file.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-path-exists-child-of-file.nix new file mode 100644 index 000000000000..c756bff755ba --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-path-exists-child-of-file.nix @@ -0,0 +1 @@ +builtins.pathExists ("/dev/null/.") diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-readDir.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-readDir.exp deleted file mode 100644 index bf8d2c14ea4f..000000000000 --- a/tvix/eval/src/tests/tvix_tests/eval-okay-readDir.exp +++ /dev/null @@ -1 +0,0 @@ -{ bar = "regular"; foo = "directory"; } diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-readDir.nix.disabled b/tvix/eval/src/tests/tvix_tests/eval-okay-readDir.nix.disabled deleted file mode 100644 index a7ec9292aae2..000000000000 --- a/tvix/eval/src/tests/tvix_tests/eval-okay-readDir.nix.disabled +++ /dev/null @@ -1 +0,0 @@ -builtins.readDir ./readDir diff --git a/tvix/eval/src/tests/tvix_tests/readDir/bar b/tvix/eval/src/tests/tvix_tests/readDir/bar deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tvix/eval/src/tests/tvix_tests/readDir/bar +++ /dev/null diff --git a/tvix/eval/src/tests/tvix_tests/readDir/foo/.keep b/tvix/eval/src/tests/tvix_tests/readDir/foo/.keep deleted file mode 100644 index e69de29bb2d1..000000000000 --- a/tvix/eval/src/tests/tvix_tests/readDir/foo/.keep +++ /dev/null diff --git a/tvix/eval/src/value/arbitrary.rs b/tvix/eval/src/value/arbitrary.rs index bf53f4fcb28a..49b9f2eea3fb 100644 --- a/tvix/eval/src/value/arbitrary.rs +++ b/tvix/eval/src/value/arbitrary.rs @@ -1,6 +1,6 @@ //! Support for configurable generation of arbitrary nix values -use imbl::proptest::{ord_map, vector}; +use proptest::collection::{btree_map, vec}; use proptest::{prelude::*, strategy::BoxedStrategy}; use std::ffi::OsString; @@ -33,16 +33,16 @@ impl Arbitrary for NixAttrs { fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { prop_oneof![ // Empty attrs representation - Just(Self(AttrsRep::Empty)), + Just(AttrsRep::Empty.into()), // KV representation (name/value pairs) ( any_with::<Value>(args.clone()), any_with::<Value>(args.clone()) ) - .prop_map(|(name, value)| Self(AttrsRep::KV { name, value })), + .prop_map(|(name, value)| AttrsRep::KV { name, value }.into()), // Map representation - ord_map(NixString::arbitrary(), Value::arbitrary_with(args), 0..100) - .prop_map(|map| Self(AttrsRep::Im(map))) + btree_map(NixString::arbitrary(), Value::arbitrary_with(args), 0..100) + .prop_map(|map| AttrsRep::Map(map).into()) ] .boxed() } @@ -53,7 +53,7 @@ impl Arbitrary for NixList { type Strategy = BoxedStrategy<Self>; fn arbitrary_with(args: Self::Parameters) -> Self::Strategy { - vector(<Value as Arbitrary>::arbitrary_with(args), 0..100) + vec(<Value as Arbitrary>::arbitrary_with(args), 0..100) .prop_map(NixList::from) .boxed() } diff --git a/tvix/eval/src/value/attrs.rs b/tvix/eval/src/value/attrs.rs index 33259c8058eb..8d06240c65f5 100644 --- a/tvix/eval/src/value/attrs.rs +++ b/tvix/eval/src/value/attrs.rs @@ -6,11 +6,12 @@ //! Due to this, construction and management of attribute sets has //! some peculiarities that are encapsulated within this module. use std::borrow::Borrow; +use std::collections::{btree_map, BTreeMap}; use std::iter::FromIterator; +use std::rc::Rc; +use std::sync::LazyLock; use bstr::BStr; -use imbl::{ordmap, OrdMap}; -use lazy_static::lazy_static; use serde::de::{Deserializer, Error, Visitor}; use serde::Deserialize; @@ -21,12 +22,8 @@ use super::Value; use crate::errors::ErrorKind; use crate::CatchableErrorKind; -lazy_static! { - static ref NAME_S: NixString = "name".into(); - static ref NAME_REF: &'static NixString = &NAME_S; - static ref VALUE_S: NixString = "value".into(); - static ref VALUE_REF: &'static NixString = &VALUE_S; -} +static NAME: LazyLock<NixString> = LazyLock::new(|| "name".into()); +static VALUE: LazyLock<NixString> = LazyLock::new(|| "value".into()); #[cfg(test)] mod tests; @@ -36,7 +33,7 @@ pub(super) enum AttrsRep { #[default] Empty, - Im(OrdMap<NixString, Value>), + Map(BTreeMap<NixString, Value>), /// Warning: this represents a **two**-attribute attrset, with /// attribute names "name" and "value", like `{name="foo"; @@ -48,28 +45,6 @@ pub(super) enum AttrsRep { } impl AttrsRep { - /// Retrieve reference to a mutable map inside of an attrs, - /// optionally changing the representation if required. - fn map_mut(&mut self) -> &mut OrdMap<NixString, Value> { - match self { - AttrsRep::Im(m) => m, - - AttrsRep::Empty => { - *self = AttrsRep::Im(OrdMap::new()); - self.map_mut() - } - - AttrsRep::KV { name, value } => { - *self = AttrsRep::Im(ordmap! { - NAME_S.clone() => name.clone(), - VALUE_S.clone() => value.clone() - }); - - self.map_mut() - } - } - } - fn select(&self, key: &BStr) -> Option<&Value> { match self { AttrsRep::Empty => None, @@ -80,7 +55,7 @@ impl AttrsRep { _ => None, }, - AttrsRep::Im(map) => map.get(key), + AttrsRep::Map(map) => map.get(key), } } @@ -88,18 +63,18 @@ impl AttrsRep { match self { AttrsRep::Empty => false, AttrsRep::KV { .. } => key == "name" || key == "value", - AttrsRep::Im(map) => map.contains_key(key), + AttrsRep::Map(map) => map.contains_key(key), } } } #[repr(transparent)] #[derive(Clone, Debug, Default)] -pub struct NixAttrs(pub(super) AttrsRep); +pub struct NixAttrs(pub(super) Rc<AttrsRep>); -impl From<OrdMap<NixString, Value>> for NixAttrs { - fn from(map: OrdMap<NixString, Value>) -> Self { - NixAttrs(AttrsRep::Im(map)) +impl From<AttrsRep> for NixAttrs { + fn from(rep: AttrsRep) -> Self { + NixAttrs(Rc::new(rep)) } } @@ -112,34 +87,41 @@ where where T: IntoIterator<Item = (K, V)>, { - NixAttrs(AttrsRep::Im(iter.into_iter().collect())) + AttrsRep::Map( + iter.into_iter() + .map(|(k, v)| (k.into(), v.into())) + .collect(), + ) + .into() + } +} + +impl From<BTreeMap<NixString, Value>> for NixAttrs { + fn from(map: BTreeMap<NixString, Value>) -> Self { + AttrsRep::Map(map).into() } } impl TotalDisplay for NixAttrs { fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result { - f.write_str("{ ")?; - - match &self.0 { - AttrsRep::KV { name, value } => { - f.write_str("name = ")?; - name.total_fmt(f, set)?; - f.write_str("; ")?; - - f.write_str("value = ")?; - value.total_fmt(f, set)?; - f.write_str("; ")?; - } - - AttrsRep::Im(map) => { - for (name, value) in map { - write!(f, "{} = ", name.ident_str())?; - value.total_fmt(f, set)?; - f.write_str("; ")?; + if let Some(Value::String(s)) = self.select("type") { + if *s == "derivation" { + write!(f, "«derivation ")?; + if let Some(p) = self.select("drvPath") { + p.total_fmt(f, set)?; + } else { + write!(f, "???")?; } + return write!(f, "»"); } + } - AttrsRep::Empty => { /* no values to print! */ } + f.write_str("{ ")?; + + for (name, value) in self.iter_sorted() { + write!(f, "{} = ", name.ident_str())?; + value.total_fmt(f, set)?; + f.write_str("; ")?; } f.write_str("}") @@ -183,25 +165,22 @@ impl<'de> Deserialize<'de> for NixAttrs { impl NixAttrs { pub fn empty() -> Self { - Self(AttrsRep::Empty) + AttrsRep::Empty.into() } /// Compare two attribute sets by pointer equality. Only makes - /// sense for some attribute set reprsentations, i.e. returning + /// sense for some attribute set representations, i.e. returning /// `false` does not mean that the two attribute sets do not have /// equal *content*. pub fn ptr_eq(&self, other: &Self) -> bool { - match (&self.0, &other.0) { - (AttrsRep::Im(lhs), AttrsRep::Im(rhs)) => lhs.ptr_eq(rhs), - _ => false, - } + Rc::ptr_eq(&self.0, &other.0) } /// Return an attribute set containing the merge of the two /// provided sets. Keys from the `other` set have precedence. pub fn update(self, other: Self) -> Self { // Short-circuit on some optimal cases: - match (&self.0, &other.0) { + match (self.0.as_ref(), other.0.as_ref()) { (AttrsRep::Empty, AttrsRep::Empty) => return self, (AttrsRep::Empty, _) => return other, (_, AttrsRep::Empty) => return self, @@ -210,41 +189,44 @@ impl NixAttrs { // Explicitly handle all branches instead of falling // through, to ensure that we get at least some compiler // errors if variants are modified. - (AttrsRep::Im(_), AttrsRep::Im(_)) - | (AttrsRep::Im(_), AttrsRep::KV { .. }) - | (AttrsRep::KV { .. }, AttrsRep::Im(_)) => {} + (AttrsRep::Map(_), AttrsRep::Map(_)) + | (AttrsRep::Map(_), AttrsRep::KV { .. }) + | (AttrsRep::KV { .. }, AttrsRep::Map(_)) => {} }; // Slightly more advanced, but still optimised updates - match (self.0, other.0) { - (AttrsRep::Im(mut m), AttrsRep::KV { name, value }) => { - m.insert(NAME_S.clone(), name); - m.insert(VALUE_S.clone(), value); - NixAttrs(AttrsRep::Im(m)) + match (Rc::unwrap_or_clone(self.0), Rc::unwrap_or_clone(other.0)) { + (AttrsRep::Map(mut m), AttrsRep::KV { name, value }) => { + m.insert(NAME.clone(), name); + m.insert(VALUE.clone(), value); + AttrsRep::Map(m).into() } - (AttrsRep::KV { name, value }, AttrsRep::Im(mut m)) => { - match m.entry(NAME_S.clone()) { - imbl::ordmap::Entry::Vacant(e) => { + (AttrsRep::KV { name, value }, AttrsRep::Map(mut m)) => { + match m.entry(NAME.clone()) { + btree_map::Entry::Vacant(e) => { e.insert(name); } - imbl::ordmap::Entry::Occupied(_) => { /* name from `m` has precedence */ } + btree_map::Entry::Occupied(_) => { /* name from `m` has precedence */ } }; - match m.entry(VALUE_S.clone()) { - imbl::ordmap::Entry::Vacant(e) => { + match m.entry(VALUE.clone()) { + btree_map::Entry::Vacant(e) => { e.insert(value); } - imbl::ordmap::Entry::Occupied(_) => { /* value from `m` has precedence */ } + btree_map::Entry::Occupied(_) => { /* value from `m` has precedence */ } }; - NixAttrs(AttrsRep::Im(m)) + AttrsRep::Map(m).into() } // Plain merge of maps. - (AttrsRep::Im(m1), AttrsRep::Im(m2)) => NixAttrs(AttrsRep::Im(m2.union(m1))), + (AttrsRep::Map(mut m1), AttrsRep::Map(m2)) => { + m1.extend(m2); + AttrsRep::Map(m1).into() + } // Cases handled above by the borrowing match: _ => unreachable!(), @@ -253,16 +235,16 @@ impl NixAttrs { /// Return the number of key-value entries in an attrset. pub fn len(&self) -> usize { - match &self.0 { - AttrsRep::Im(map) => map.len(), + match self.0.as_ref() { + AttrsRep::Map(map) => map.len(), AttrsRep::Empty => 0, AttrsRep::KV { .. } => 2, } } pub fn is_empty(&self) -> bool { - match &self.0 { - AttrsRep::Im(map) => map.is_empty(), + match self.0.as_ref() { + AttrsRep::Map(map) => map.is_empty(), AttrsRep::Empty => true, AttrsRep::KV { .. } => false, } @@ -298,8 +280,8 @@ impl NixAttrs { /// Construct an iterator over all the key-value pairs in the attribute set. #[allow(clippy::needless_lifetimes)] pub fn iter<'a>(&'a self) -> Iter<KeyValue<'a>> { - Iter(match &self.0 { - AttrsRep::Im(map) => KeyValue::Im(map.iter()), + Iter(match &self.0.as_ref() { + AttrsRep::Map(map) => KeyValue::Map(map.iter()), AttrsRep::Empty => KeyValue::Empty, AttrsRep::KV { @@ -327,10 +309,12 @@ impl NixAttrs { /// Construct an iterator over all the keys of the attribute set pub fn keys(&self) -> Keys { - Keys(match &self.0 { + Keys(match self.0.as_ref() { AttrsRep::Empty => KeysInner::Empty, - AttrsRep::Im(m) => KeysInner::Im(m.keys()), AttrsRep::KV { .. } => KeysInner::KV(IterKV::default()), + + // TODO(tazjin): only sort when required, not always. + AttrsRep::Map(m) => KeysInner::Map(m.keys()), }) } @@ -349,7 +333,7 @@ impl NixAttrs { // Optimisation: Empty attribute set if count == 0 { - return Ok(Ok(NixAttrs(AttrsRep::Empty))); + return Ok(Ok(AttrsRep::Empty.into())); } // Optimisation: KV pattern @@ -359,14 +343,14 @@ impl NixAttrs { } } - let mut attrs = NixAttrs(AttrsRep::Im(OrdMap::new())); + let mut attrs_map = BTreeMap::new(); for _ in 0..count { let value = stack_slice.pop().unwrap(); let key = stack_slice.pop().unwrap(); match key { - Value::String(ks) => set_attr(&mut attrs, ks, value)?, + Value::String(ks) => set_attr(&mut attrs_map, ks, value)?, Value::Null => { // This is in fact valid, but leads to the value @@ -381,13 +365,13 @@ impl NixAttrs { } } - Ok(Ok(attrs)) + Ok(Ok(AttrsRep::Map(attrs_map).into())) } /// Construct an optimized "KV"-style attribute set given the value for the /// `"name"` key, and the value for the `"value"` key pub(crate) fn from_kv(name: Value, value: Value) -> Self { - NixAttrs(AttrsRep::KV { name, value }) + AttrsRep::KV { name, value }.into() } } @@ -396,12 +380,12 @@ impl IntoIterator for NixAttrs { type IntoIter = OwnedAttrsIterator; fn into_iter(self) -> Self::IntoIter { - match self.0 { + match Rc::unwrap_or_clone(self.0) { AttrsRep::Empty => OwnedAttrsIterator(IntoIterRepr::Empty), AttrsRep::KV { name, value } => OwnedAttrsIterator(IntoIterRepr::Finite( - vec![(NAME_REF.clone(), name), (VALUE_REF.clone(), value)].into_iter(), + vec![(NAME.clone(), name), (VALUE.clone(), value)].into_iter(), )), - AttrsRep::Im(map) => OwnedAttrsIterator(IntoIterRepr::Im(map.into_iter())), + AttrsRep::Map(map) => OwnedAttrsIterator(IntoIterRepr::Map(map.into_iter())), } } } @@ -422,8 +406,8 @@ impl IntoIterator for NixAttrs { fn attempt_optimise_kv(slice: &mut [Value]) -> Option<NixAttrs> { let (name_idx, value_idx) = { match (&slice[2], &slice[0]) { - (Value::String(s1), Value::String(s2)) if (*s1 == *NAME_S && *s2 == *VALUE_S) => (3, 1), - (Value::String(s1), Value::String(s2)) if (*s1 == *VALUE_S && *s2 == *NAME_S) => (1, 3), + (Value::String(s1), Value::String(s2)) if (*s1 == *NAME && *s2 == *VALUE) => (3, 1), + (Value::String(s1), Value::String(s2)) if (*s1 == *VALUE && *s2 == *NAME) => (1, 3), // Technically this branch lets type errors pass, // but they will be caught during normal attribute @@ -440,13 +424,17 @@ fn attempt_optimise_kv(slice: &mut [Value]) -> Option<NixAttrs> { /// Set an attribute on an in-construction attribute set, while /// checking against duplicate keys. -fn set_attr(attrs: &mut NixAttrs, key: NixString, value: Value) -> Result<(), ErrorKind> { - match attrs.0.map_mut().entry(key) { - imbl::ordmap::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey { +fn set_attr( + map: &mut BTreeMap<NixString, Value>, + key: NixString, + value: Value, +) -> Result<(), ErrorKind> { + match map.entry(key) { + btree_map::Entry::Occupied(entry) => Err(ErrorKind::DuplicateAttrsKey { key: entry.key().to_string(), }), - imbl::ordmap::Entry::Vacant(entry) => { + btree_map::Entry::Vacant(entry) => { entry.insert(value); Ok(()) } @@ -484,7 +472,7 @@ pub enum KeyValue<'a> { at: IterKV, }, - Im(imbl::ordmap::Iter<'a, NixString, Value>), + Map(btree_map::Iter<'a, NixString, Value>), } /// Iterator over a Nix attribute set. @@ -498,18 +486,18 @@ impl<'a> Iterator for Iter<KeyValue<'a>> { fn next(&mut self) -> Option<Self::Item> { match &mut self.0 { - KeyValue::Im(inner) => inner.next(), + KeyValue::Map(inner) => inner.next(), KeyValue::Empty => None, KeyValue::KV { name, value, at } => match at { IterKV::Name => { at.next(); - Some((&NAME_REF, name)) + Some((&NAME, name)) } IterKV::Value => { at.next(); - Some((&VALUE_REF, value)) + Some((&VALUE, value)) } IterKV::Done => None, @@ -523,7 +511,7 @@ impl<'a> ExactSizeIterator for Iter<KeyValue<'a>> { match &self.0 { KeyValue::Empty => 0, KeyValue::KV { .. } => 2, - KeyValue::Im(inner) => inner.len(), + KeyValue::Map(inner) => inner.len(), } } } @@ -531,7 +519,7 @@ impl<'a> ExactSizeIterator for Iter<KeyValue<'a>> { enum KeysInner<'a> { Empty, KV(IterKV), - Im(imbl::ordmap::Keys<'a, NixString, Value>), + Map(btree_map::Keys<'a, NixString, Value>), } pub struct Keys<'a>(KeysInner<'a>); @@ -544,14 +532,14 @@ impl<'a> Iterator for Keys<'a> { KeysInner::Empty => None, KeysInner::KV(at @ IterKV::Name) => { at.next(); - Some(&NAME_REF) + Some(&NAME) } KeysInner::KV(at @ IterKV::Value) => { at.next(); - Some(&VALUE_REF) + Some(&VALUE) } KeysInner::KV(IterKV::Done) => None, - KeysInner::Im(m) => m.next(), + KeysInner::Map(m) => m.next(), } } } @@ -571,7 +559,7 @@ impl<'a> ExactSizeIterator for Keys<'a> { match &self.0 { KeysInner::Empty => 0, KeysInner::KV(_) => 2, - KeysInner::Im(m) => m.len(), + KeysInner::Map(m) => m.len(), } } } @@ -580,7 +568,7 @@ impl<'a> ExactSizeIterator for Keys<'a> { pub enum IntoIterRepr { Empty, Finite(std::vec::IntoIter<(NixString, Value)>), - Im(imbl::ordmap::ConsumingIter<(NixString, Value)>), + Map(btree_map::IntoIter<NixString, Value>), } /// Wrapper type which hides the internal implementation details from @@ -595,7 +583,7 @@ impl Iterator for OwnedAttrsIterator { match &mut self.0 { IntoIterRepr::Empty => None, IntoIterRepr::Finite(inner) => inner.next(), - IntoIterRepr::Im(inner) => inner.next(), + IntoIterRepr::Map(m) => m.next(), } } } @@ -605,7 +593,7 @@ impl ExactSizeIterator for OwnedAttrsIterator { match &self.0 { IntoIterRepr::Empty => 0, IntoIterRepr::Finite(inner) => inner.len(), - IntoIterRepr::Im(inner) => inner.len(), + IntoIterRepr::Map(inner) => inner.len(), } } } @@ -615,7 +603,7 @@ impl DoubleEndedIterator for OwnedAttrsIterator { match &mut self.0 { IntoIterRepr::Empty => None, IntoIterRepr::Finite(inner) => inner.next_back(), - IntoIterRepr::Im(inner) => inner.next_back(), + IntoIterRepr::Map(inner) => inner.next_back(), } } } diff --git a/tvix/eval/src/value/attrs/tests.rs b/tvix/eval/src/value/attrs/tests.rs index 534b78a00d10..e8798797fda7 100644 --- a/tvix/eval/src/value/attrs/tests.rs +++ b/tvix/eval/src/value/attrs/tests.rs @@ -9,7 +9,7 @@ fn test_empty_attrs() { .unwrap(); assert!( - matches!(attrs, NixAttrs(AttrsRep::Empty)), + matches!(attrs.0.as_ref(), AttrsRep::Empty), "empty attribute set should use optimised representation" ); } @@ -21,7 +21,7 @@ fn test_simple_attrs() { .unwrap(); assert!( - matches!(attrs, NixAttrs(AttrsRep::Im(_))), + matches!(attrs.0.as_ref(), AttrsRep::Map(_)), "simple attribute set should use map representation", ) } @@ -45,8 +45,8 @@ fn test_kv_attrs() { .expect("constructing K/V pair attrs should succeed") .unwrap(); - match kv_attrs { - NixAttrs(AttrsRep::KV { name, value }) + match kv_attrs.0.as_ref() { + AttrsRep::KV { name, value } if name.to_str().unwrap() == meaning_val.to_str().unwrap() || value.to_str().unwrap() == forty_two_val.to_str().unwrap() => {} @@ -84,10 +84,10 @@ fn test_kv_attrs_iter() { let mut iter = kv_attrs.iter().collect::<Vec<_>>().into_iter(); let (k, v) = iter.next().unwrap(); - assert!(k == *NAME_REF); + assert!(*k == *NAME); assert!(v.to_str().unwrap() == meaning_val.to_str().unwrap()); let (k, v) = iter.next().unwrap(); - assert!(k == *VALUE_REF); + assert!(*k == *VALUE); assert!(v.as_int().unwrap() == forty_two_val.as_int().unwrap()); assert!(iter.next().is_none()); } diff --git a/tvix/eval/src/value/json.rs b/tvix/eval/src/value/json.rs index 24a6bcaf6f21..807dc55f97cc 100644 --- a/tvix/eval/src/value/json.rs +++ b/tvix/eval/src/value/json.rs @@ -4,7 +4,7 @@ /// as there is internal Nix logic that must happen within the /// serialisation methods. use super::{CoercionKind, Value}; -use crate::errors::{CatchableErrorKind, ErrorKind}; +use crate::errors::ErrorKind; use crate::generators::{self, GenCo}; use crate::NixContext; @@ -17,10 +17,7 @@ impl Value { /// Transforms the structure into a JSON /// and accumulate all encountered context in the second's element /// of the return type. - pub async fn into_contextful_json( - self, - co: &GenCo, - ) -> Result<Result<(Json, NixContext), CatchableErrorKind>, ErrorKind> { + pub async fn into_contextful_json(self, co: &GenCo) -> Result<(Json, NixContext), ErrorKind> { let self_forced = generators::request_force(co, self).await; let mut context = NixContext::new(); @@ -43,16 +40,12 @@ impl Value { } Value::List(l) => { - let mut out = vec![]; + let mut out = Vec::with_capacity(l.len()); for val in l.into_iter() { - match generators::request_to_json(co, val).await { - Ok((v, ctx)) => { - context.extend(ctx.into_iter()); - out.push(v) - } - Err(cek) => return Ok(Err(cek)), - } + let (json_item, ctx) = Box::pin(val.into_contextful_json(co)).await?; + context.extend(ctx.into_iter()); + out.push(json_item); } Json::Array(out) @@ -75,14 +68,13 @@ impl Value { ) .await? { - Value::Catchable(cek) => return Ok(Err(*cek)), + Value::Catchable(cek) => return Err(ErrorKind::CatchableError(*cek)), Value::String(s) => { // We need a fresh context here because `__toString` will discard // everything. let mut fresh = NixContext::new(); fresh.mimic(&s); - - return Ok(Ok((Json::String(s.to_str()?.to_owned()), fresh))); + return Ok((Json::String(s.to_str()?.to_owned()), fresh)); } _ => panic!("Value::coerce_to_string_() returned a non-string!"), } @@ -92,27 +84,23 @@ impl Value { // serialise to a JSON serialisation of that inner // value (regardless of what it is!). if let Some(out_path) = attrs.select("outPath") { - return Ok(generators::request_to_json(co, out_path.clone()).await); + let (json_out_path, ctx) = + Box::pin(out_path.clone().into_contextful_json(co)).await?; + context.extend(ctx.into_iter()); + return Ok((json_out_path, context)); } let mut out = Map::with_capacity(attrs.len()); for (name, value) in attrs.into_iter_sorted() { - out.insert( - name.to_str()?.to_owned(), - match generators::request_to_json(co, value).await { - Ok((v, ctx)) => { - context.extend(ctx.into_iter()); - v - } - Err(cek) => return Ok(Err(cek)), - }, - ); + let (json_value, ctx) = Box::pin(value.into_contextful_json(co)).await?; + context.extend(ctx.into_iter()); + out.insert(name.to_str()?.to_owned(), json_value); } Json::Object(out) } - Value::Catchable(c) => return Ok(Err(*c)), + Value::Catchable(c) => return Err(ErrorKind::CatchableError(*c)), val @ Value::Closure(_) | val @ Value::Thunk(_) @@ -121,34 +109,10 @@ impl Value { | val @ Value::Blueprint(_) | val @ Value::DeferredUpvalue(_) | val @ Value::UnresolvedPath(_) - | val @ Value::Json(..) | val @ Value::FinaliseRequest(_) => { return Err(ErrorKind::NotSerialisableToJson(val.type_of())) } }; - - Ok(Ok((value, context))) - } - - /// Generator version of the above, which wraps responses in - /// [`Value::Json`]. - pub(crate) async fn into_contextful_json_generator( - self, - co: GenCo, - ) -> Result<Value, ErrorKind> { - match self.into_contextful_json(&co).await? { - Err(cek) => Ok(Value::from(cek)), - Ok((json, ctx)) => Ok(Value::Json(Box::new((json, ctx)))), - } - } - - /// Transforms the structure into a JSON - /// All the accumulated context is ignored, use [`into_contextful_json`] - /// to obtain the resulting context of the JSON object. - pub async fn into_json( - self, - co: &GenCo, - ) -> Result<Result<Json, CatchableErrorKind>, ErrorKind> { - Ok(self.into_contextful_json(co).await?.map(|(json, _)| json)) + Ok((value, context)) } } diff --git a/tvix/eval/src/value/list.rs b/tvix/eval/src/value/list.rs index 2b8b3de28d19..3e4b23a93f42 100644 --- a/tvix/eval/src/value/list.rs +++ b/tvix/eval/src/value/list.rs @@ -2,8 +2,6 @@ use std::ops::Index; use std::rc::Rc; -use imbl::{vector, Vector}; - use serde::Deserialize; use super::thunk::ThunkSet; @@ -12,7 +10,7 @@ use super::Value; #[repr(transparent)] #[derive(Clone, Debug, Deserialize)] -pub struct NixList(Rc<Vector<Value>>); +pub struct NixList(Rc<Vec<Value>>); impl TotalDisplay for NixList { fn total_fmt(&self, f: &mut std::fmt::Formatter<'_>, set: &mut ThunkSet) -> std::fmt::Result { @@ -27,8 +25,8 @@ impl TotalDisplay for NixList { } } -impl From<Vector<Value>> for NixList { - fn from(vs: Vector<Value>) -> Self { +impl From<Vec<Value>> for NixList { + fn from(vs: Vec<Value>) -> Self { Self(Rc::new(vs)) } } @@ -54,10 +52,10 @@ impl NixList { stack_slice.len(), ); - NixList(Rc::new(Vector::from_iter(stack_slice))) + NixList(Rc::new(stack_slice)) } - pub fn iter(&self) -> vector::Iter<Value> { + pub fn iter(&self) -> std::slice::Iter<Value> { self.0.iter() } @@ -65,19 +63,14 @@ impl NixList { Rc::ptr_eq(&self.0, &other.0) } - pub fn into_inner(self) -> Vector<Value> { + pub fn into_inner(self) -> Vec<Value> { Rc::try_unwrap(self.0).unwrap_or_else(|rc| (*rc).clone()) } - - #[deprecated(note = "callers should avoid constructing from Vec")] - pub fn from_vec(vs: Vec<Value>) -> Self { - Self(Rc::new(Vector::from_iter(vs))) - } } impl IntoIterator for NixList { type Item = Value; - type IntoIter = imbl::vector::ConsumingIter<Value>; + type IntoIter = std::vec::IntoIter<Value>; fn into_iter(self) -> Self::IntoIter { self.into_inner().into_iter() @@ -86,7 +79,7 @@ impl IntoIterator for NixList { impl<'a> IntoIterator for &'a NixList { type Item = &'a Value; - type IntoIter = imbl::vector::Iter<'a, Value>; + type IntoIter = std::slice::Iter<'a, Value>; fn into_iter(self) -> Self::IntoIter { self.0.iter() diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index dfad0cd8391b..1775c6f71adb 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -5,8 +5,10 @@ use std::fmt::Display; use std::num::{NonZeroI32, NonZeroUsize}; use std::path::PathBuf; use std::rc::Rc; +use std::sync::LazyLock; use bstr::{BString, ByteVec}; +use codemap::Span; use lexical_core::format::CXX_LITERAL; use serde::Deserialize; @@ -23,7 +25,6 @@ mod thunk; use crate::errors::{CatchableErrorKind, ErrorKind}; use crate::opcode::StackIdx; -use crate::spans::LightSpan; use crate::vm::generators::{self, GenCo}; use crate::AddContext; pub use attrs::NixAttrs; @@ -37,8 +38,6 @@ pub use thunk::Thunk; pub use self::thunk::ThunkSet; -use lazy_static::lazy_static; - #[warn(variant_size_differences)] #[derive(Clone, Debug, Deserialize)] #[serde(untagged)] @@ -77,14 +76,11 @@ pub enum Value { DeferredUpvalue(StackIdx), #[serde(skip)] UnresolvedPath(Box<PathBuf>), - #[serde(skip)] - Json(Box<(serde_json::Value, NixContext)>), #[serde(skip)] FinaliseRequest(bool), #[serde(skip)] - // TODO(tazjin): why is this in a Box? Catchable(Box<CatchableErrorKind>), } @@ -108,16 +104,15 @@ where } } -lazy_static! { - static ref WRITE_FLOAT_OPTIONS: lexical_core::WriteFloatOptions = - lexical_core::WriteFloatOptionsBuilder::new() - .trim_floats(true) - .round_mode(lexical_core::write_float_options::RoundMode::Round) - .positive_exponent_break(Some(NonZeroI32::new(5).unwrap())) - .max_significant_digits(Some(NonZeroUsize::new(6).unwrap())) - .build() - .unwrap(); -} +static WRITE_FLOAT_OPTIONS: LazyLock<lexical_core::WriteFloatOptions> = LazyLock::new(|| { + lexical_core::WriteFloatOptionsBuilder::new() + .trim_floats(true) + .round_mode(lexical_core::write_float_options::RoundMode::Round) + .positive_exponent_break(Some(NonZeroI32::new(5).unwrap())) + .max_significant_digits(Some(NonZeroUsize::new(6).unwrap())) + .build() + .unwrap() +}); // Helper macros to generate the to_*/as_* macros while accounting for // thunks. @@ -187,6 +182,21 @@ pub struct CoercionKind { pub import_paths: bool, } +impl From<CoercionKind> for u8 { + fn from(k: CoercionKind) -> u8 { + k.strong as u8 | (k.import_paths as u8) << 1 + } +} + +impl From<u8> for CoercionKind { + fn from(byte: u8) -> Self { + CoercionKind { + strong: byte & 0x01 != 0, + import_paths: byte & 0x02 != 0, + } + } +} + impl<T> From<T> for Value where T: Into<NixString>, @@ -196,14 +206,6 @@ where } } -/// Constructors -impl Value { - /// Construct a [`Value::Attrs`] from a [`NixAttrs`]. - pub fn attrs(attrs: NixAttrs) -> Self { - Self::Attrs(Box::new(attrs)) - } -} - /// Controls what kind of by-pointer equality comparison is allowed. /// /// See `//tvix/docs/value-pointer-equality.md` for details. @@ -220,11 +222,16 @@ pub enum PointerEquality { } impl Value { + /// Construct a [`Value::Attrs`] from a [`NixAttrs`]. + pub fn attrs(attrs: NixAttrs) -> Self { + Self::Attrs(Box::new(attrs)) + } + /// Deeply forces a value, traversing e.g. lists and attribute sets and forcing /// their contents, too. /// /// This is a generator function. - pub(super) async fn deep_force(self, co: GenCo, span: LightSpan) -> Result<Value, ErrorKind> { + pub(super) async fn deep_force(self, co: GenCo, span: Span) -> Result<Value, ErrorKind> { if let Some(v) = Self::deep_force_(self.clone(), co, span).await? { Ok(v) } else { @@ -233,11 +240,7 @@ impl Value { } /// Returns Some(v) or None to indicate the returned value is myself - async fn deep_force_( - myself: Value, - co: GenCo, - span: LightSpan, - ) -> Result<Option<Value>, ErrorKind> { + async fn deep_force_(myself: Value, co: GenCo, span: Span) -> Result<Option<Value>, ErrorKind> { // This is a stack of values which still remain to be forced. let mut vals = vec![myself]; @@ -256,7 +259,7 @@ impl Value { if !thunk_set.insert(t) { continue; } - Thunk::force_(t.clone(), &co, span.clone()).await? + Thunk::force_(t.clone(), &co, span).await? } else { v }; @@ -294,7 +297,6 @@ impl Value { | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(..) | Value::FinaliseRequest(_) => panic!( "Tvix bug: internal value left on stack: {}", value.type_of() @@ -307,7 +309,7 @@ impl Value { self, co: GenCo, kind: CoercionKind, - span: LightSpan, + span: Span, ) -> Result<Value, ErrorKind> { self.coerce_to_string_(&co, kind, span).await } @@ -318,7 +320,7 @@ impl Value { self, co: &GenCo, kind: CoercionKind, - span: LightSpan, + span: Span, ) -> Result<Value, ErrorKind> { let mut result = BString::default(); let mut vals = vec![self]; @@ -331,7 +333,7 @@ impl Value { loop { let value = if let Some(v) = vals.pop() { - v.force(co, span.clone()).await? + v.force(co, span).await? } else { return Ok(Value::String(NixString::new_context_from(context, result))); }; @@ -377,7 +379,7 @@ impl Value { // `__toString` is preferred. (Value::Attrs(attrs), kind) => { if let Some(to_string) = attrs.select("__toString") { - let callable = to_string.clone().force(co, span.clone()).await?; + let callable = to_string.clone().force(co, span).await?; // Leave the attribute set on the stack as an argument // to the function call. @@ -444,7 +446,6 @@ impl Value { | (Value::Blueprint(_), _) | (Value::DeferredUpvalue(_), _) | (Value::UnresolvedPath(_), _) - | (Value::Json(..), _) | (Value::FinaliseRequest(_), _) => { panic!("tvix bug: .coerce_to_string() called on internal value") } @@ -467,7 +468,7 @@ impl Value { other: Value, co: GenCo, ptr_eq: PointerEquality, - span: LightSpan, + span: Span, ) -> Result<Value, ErrorKind> { self.nix_eq(other, &co, ptr_eq, span).await } @@ -487,7 +488,7 @@ impl Value { other: Value, co: &GenCo, ptr_eq: PointerEquality, - span: LightSpan, + span: Span, ) -> Result<Value, ErrorKind> { // this is a stack of ((v1,v2),peq) triples to be compared; // after each triple is popped off of the stack, v1 is @@ -513,13 +514,13 @@ impl Value { } }; - Thunk::force_(thunk, co, span.clone()).await? + Thunk::force_(thunk, co, span).await? } _ => a, }; - let b = b.force(co, span.clone()).await?; + let b = b.force(co, span).await?; debug_assert!(!matches!(a, Value::Thunk(_))); debug_assert!(!matches!(b, Value::Thunk(_))); @@ -568,11 +569,11 @@ impl Value { #[allow(clippy::single_match)] // might need more match arms later match (a1.select("type"), a2.select("type")) { (Some(v1), Some(v2)) => { - let s1 = v1.clone().force(co, span.clone()).await?; + let s1 = v1.clone().force(co, span).await?; if s1.is_catchable() { return Ok(s1); } - let s2 = v2.clone().force(co, span.clone()).await?; + let s2 = v2.clone().force(co, span).await?; if s2.is_catchable() { return Ok(s2); } @@ -593,8 +594,8 @@ impl Value { .context("comparing derivations")? .clone(); - let out1 = out1.clone().force(co, span.clone()).await?; - let out2 = out2.clone().force(co, span.clone()).await?; + let out1 = out1.clone().force(co, span).await?; + let out2 = out2.clone().force(co, span).await?; if out1.is_catchable() { return Ok(out1); @@ -681,7 +682,6 @@ impl Value { Value::Blueprint(_) => "internal[blueprint]", Value::DeferredUpvalue(_) => "internal[deferred_upvalue]", Value::UnresolvedPath(_) => "internal[unresolved_path]", - Value::Json(..) => "internal[json]", Value::FinaliseRequest(_) => "internal[finaliser_sentinel]", Value::Catchable(_) => "internal[catchable]", } @@ -745,7 +745,7 @@ impl Value { self, other: Self, co: GenCo, - span: LightSpan, + span: Span, ) -> Result<Result<Ordering, CatchableErrorKind>, ErrorKind> { Self::nix_cmp_ordering_(self, other, co, span).await } @@ -754,7 +754,7 @@ impl Value { myself: Self, other: Self, co: GenCo, - span: LightSpan, + span: Span, ) -> Result<Result<Ordering, CatchableErrorKind>, ErrorKind> { // this is a stack of ((v1,v2),peq) triples to be compared; // after each triple is popped off of the stack, v1 is @@ -770,14 +770,14 @@ impl Value { }; if ptr_eq == PointerEquality::AllowAll { if a.clone() - .nix_eq(b.clone(), &co, PointerEquality::AllowAll, span.clone()) + .nix_eq(b.clone(), &co, PointerEquality::AllowAll, span) .await? .as_bool()? { continue; } - a = a.force(&co, span.clone()).await?; - b = b.force(&co, span.clone()).await?; + a = a.force(&co, span).await?; + b = b.force(&co, span).await?; } let result = match (a, b) { (Value::Catchable(c), _) => return Ok(Err(*c)), @@ -820,7 +820,7 @@ impl Value { } // TODO(amjoseph): de-asyncify this (when called directly by the VM) - pub async fn force(self, co: &GenCo, span: LightSpan) -> Result<Value, ErrorKind> { + pub async fn force(self, co: &GenCo, span: Span) -> Result<Value, ErrorKind> { if let Value::Thunk(thunk) = self { // TODO(amjoseph): use #[tailcall::mutual] return Thunk::force_(thunk, co, span).await; @@ -829,7 +829,7 @@ impl Value { } // need two flavors, because async - pub async fn force_owned_genco(self, co: GenCo, span: LightSpan) -> Result<Value, ErrorKind> { + pub async fn force_owned_genco(self, co: GenCo, span: Span) -> Result<Value, ErrorKind> { if let Value::Thunk(thunk) = self { // TODO(amjoseph): use #[tailcall::mutual] return Thunk::force_(thunk, &co, span).await; @@ -877,10 +877,15 @@ impl Value { | Value::Blueprint(_) | Value::DeferredUpvalue(_) | Value::UnresolvedPath(_) - | Value::Json(..) | Value::FinaliseRequest(_) => "an internal Tvix evaluator value".into(), } } + + /// Constructs a thunk that will be evaluated lazily at runtime. This lets + /// users of Tvix implement their own lazy builtins and so on. + pub fn suspended_native_thunk(native: Box<dyn Fn() -> Result<Value, ErrorKind>>) -> Self { + Value::Thunk(Thunk::new_suspended_native(native)) + } } trait TotalDisplay { @@ -991,7 +996,6 @@ impl TotalDisplay for Value { Value::Blueprint(_) => f.write_str("internal[blueprint]"), Value::DeferredUpvalue(_) => f.write_str("internal[deferred_upvalue]"), Value::UnresolvedPath(_) => f.write_str("internal[unresolved_path]"), - Value::Json(..) => f.write_str("internal[json]"), Value::FinaliseRequest(_) => f.write_str("internal[finaliser_sentinel]"), // Delegate thunk display to the type, as it must handle diff --git a/tvix/eval/src/value/string/context.rs b/tvix/eval/src/value/string/context.rs new file mode 100644 index 000000000000..e1c04735ddde --- /dev/null +++ b/tvix/eval/src/value/string/context.rs @@ -0,0 +1,161 @@ +use rustc_hash::FxHashSet; +use serde::Serialize; + +use super::NixString; + +#[derive(Clone, Debug, Serialize, Hash, PartialEq, Eq)] +pub enum NixContextElement { + /// A plain store path (e.g. source files copied to the store) + Plain(String), + + /// Single output of a derivation, represented by its name and its derivation path. + Single { name: String, derivation: String }, + + /// A reference to a complete derivation + /// including its source and its binary closure. + /// It is used for the `drvPath` attribute context. + /// The referred string is the store path to + /// the derivation path. + Derivation(String), +} + +/// Nix context strings representation in Tvix. This tracks a set of different kinds of string +/// dependencies that we can come across during manipulation of our language primitives, mostly +/// strings. There's some simple algebra of context strings and how they propagate w.r.t. primitive +/// operations, e.g. concatenation, interpolation and other string operations. +#[repr(transparent)] +#[derive(Clone, Debug, Serialize, Default)] +pub struct NixContext(FxHashSet<NixContextElement>); + +impl From<NixContextElement> for NixContext { + fn from(value: NixContextElement) -> Self { + let mut set = FxHashSet::default(); + set.insert(value); + Self(set) + } +} + +impl From<FxHashSet<NixContextElement>> for NixContext { + fn from(value: FxHashSet<NixContextElement>) -> Self { + Self(value) + } +} + +impl<const N: usize> From<[NixContextElement; N]> for NixContext { + fn from(value: [NixContextElement; N]) -> Self { + let mut set = FxHashSet::default(); + for elt in value { + set.insert(elt); + } + Self(set) + } +} + +impl NixContext { + /// Creates an empty context that can be populated + /// and passed to form a contextful [NixString], albeit + /// if the context is concretly empty, the resulting [NixString] + /// will be contextless. + pub fn new() -> Self { + Self::default() + } + + /// For internal consumers, we let people observe + /// if the [NixContext] is actually empty or not + /// to decide whether they want to skip the allocation + /// of a full blown [HashSet]. + pub(crate) fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Consumes a new [NixContextElement] and add it if not already + /// present in this context. + pub fn append(mut self, other: NixContextElement) -> Self { + self.0.insert(other); + self + } + + /// Extends the existing context with more context elements. + pub fn extend<T>(&mut self, iter: T) + where + T: IntoIterator<Item = NixContextElement>, + { + self.0.extend(iter) + } + + /// Copies from another [NixString] its context strings + /// in this context. + pub fn mimic(&mut self, other: &NixString) { + if let Some(context) = other.context() { + self.extend(context.iter().cloned()); + } + } + + /// Iterates over "plain" context elements, e.g. sources imported + /// in the store without more information, i.e. `toFile` or coerced imported paths. + /// It yields paths to the store. + pub fn iter_plain(&self) -> impl Iterator<Item = &str> { + self.iter().filter_map(|elt| { + if let NixContextElement::Plain(s) = elt { + Some(s.as_str()) + } else { + None + } + }) + } + + /// Iterates over "full derivations" context elements, e.g. something + /// referring to their `drvPath`, i.e. their full sources and binary closure. + /// It yields derivation paths. + pub fn iter_derivation(&self) -> impl Iterator<Item = &str> { + self.iter().filter_map(|elt| { + if let NixContextElement::Derivation(s) = elt { + Some(s.as_str()) + } else { + None + } + }) + } + + /// Iterates over "single" context elements, e.g. single derived paths, + /// or also known as the single output of a given derivation. + /// The first element of the tuple is the output name + /// and the second element is the derivation path. + pub fn iter_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { + self.iter().filter_map(|elt| { + if let NixContextElement::Single { name, derivation } = elt { + Some((name.as_str(), derivation.as_str())) + } else { + None + } + }) + } + + /// Iterates over any element of the context. + pub fn iter(&self) -> impl Iterator<Item = &NixContextElement> { + self.0.iter() + } + + /// Produces a list of owned references to this current context, + /// no matter its type. + pub fn to_owned_references(self) -> Vec<String> { + self.0 + .into_iter() + .map(|ctx| match ctx { + NixContextElement::Derivation(drv_path) => drv_path, + NixContextElement::Plain(store_path) => store_path, + NixContextElement::Single { derivation, .. } => derivation, + }) + .collect() + } +} + +impl IntoIterator for NixContext { + type Item = NixContextElement; + + type IntoIter = std::collections::hash_set::IntoIter<NixContextElement>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string/mod.rs index 163e140a19c4..0f41ce9dc73c 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string/mod.rs @@ -1,173 +1,30 @@ //! This module implements Nix language strings. //! -//! Nix language strings never need to be modified on the language -//! level, allowing us to shave off some memory overhead and only -//! paying the cost when creating new strings. +//! See [`NixString`] for more information about the internals of string values + use bstr::{BStr, BString, ByteSlice, Chars}; +use nohash_hasher::BuildNoHashHasher; use rnix::ast; -use std::alloc::{alloc, dealloc, handle_alloc_error, Layout}; +#[cfg(feature = "no_leak")] +use rustc_hash::FxHashSet; +use rustc_hash::FxHasher; +use std::alloc::dealloc; +use std::alloc::{alloc, handle_alloc_error, Layout}; use std::borrow::{Borrow, Cow}; -use std::collections::HashSet; +use std::cell::RefCell; use std::ffi::c_void; use std::fmt::{self, Debug, Display}; -use std::hash::Hash; +use std::hash::{Hash, Hasher}; use std::ops::Deref; use std::ptr::{self, NonNull}; use std::slice; use serde::de::{Deserializer, Visitor}; -use serde::{Deserialize, Serialize}; - -#[derive(Clone, Debug, Serialize, Hash, PartialEq, Eq)] -pub enum NixContextElement { - /// A plain store path (e.g. source files copied to the store) - Plain(String), - - /// Single output of a derivation, represented by its name and its derivation path. - Single { name: String, derivation: String }, - - /// A reference to a complete derivation - /// including its source and its binary closure. - /// It is used for the `drvPath` attribute context. - /// The referred string is the store path to - /// the derivation path. - Derivation(String), -} - -/// Nix context strings representation in Tvix. This tracks a set of different kinds of string -/// dependencies that we can come across during manipulation of our language primitives, mostly -/// strings. There's some simple algebra of context strings and how they propagate w.r.t. primitive -/// operations, e.g. concatenation, interpolation and other string operations. -#[repr(transparent)] -#[derive(Clone, Debug, Serialize, Default)] -pub struct NixContext(HashSet<NixContextElement>); - -impl From<NixContextElement> for NixContext { - fn from(value: NixContextElement) -> Self { - Self([value].into()) - } -} - -impl From<HashSet<NixContextElement>> for NixContext { - fn from(value: HashSet<NixContextElement>) -> Self { - Self(value) - } -} - -impl<const N: usize> From<[NixContextElement; N]> for NixContext { - fn from(value: [NixContextElement; N]) -> Self { - Self(HashSet::from(value)) - } -} - -impl NixContext { - /// Creates an empty context that can be populated - /// and passed to form a contextful [NixString], albeit - /// if the context is concretly empty, the resulting [NixString] - /// will be contextless. - pub fn new() -> Self { - Self::default() - } - - /// For internal consumers, we let people observe - /// if the [NixContext] is actually empty or not - /// to decide whether they want to skip the allocation - /// of a full blown [HashSet]. - pub(crate) fn is_empty(&self) -> bool { - self.0.is_empty() - } - - /// Consumes a new [NixContextElement] and add it if not already - /// present in this context. - pub fn append(mut self, other: NixContextElement) -> Self { - self.0.insert(other); - self - } - - /// Extends the existing context with more context elements. - pub fn extend<T>(&mut self, iter: T) - where - T: IntoIterator<Item = NixContextElement>, - { - self.0.extend(iter) - } - - /// Copies from another [NixString] its context strings - /// in this context. - pub fn mimic(&mut self, other: &NixString) { - if let Some(context) = other.context() { - self.extend(context.iter().cloned()); - } - } +use serde::Deserialize; - /// Iterates over "plain" context elements, e.g. sources imported - /// in the store without more information, i.e. `toFile` or coerced imported paths. - /// It yields paths to the store. - pub fn iter_plain(&self) -> impl Iterator<Item = &str> { - self.iter().filter_map(|elt| { - if let NixContextElement::Plain(s) = elt { - Some(s.as_str()) - } else { - None - } - }) - } - - /// Iterates over "full derivations" context elements, e.g. something - /// referring to their `drvPath`, i.e. their full sources and binary closure. - /// It yields derivation paths. - pub fn iter_derivation(&self) -> impl Iterator<Item = &str> { - self.iter().filter_map(|elt| { - if let NixContextElement::Derivation(s) = elt { - Some(s.as_str()) - } else { - None - } - }) - } +mod context; - /// Iterates over "single" context elements, e.g. single derived paths, - /// or also known as the single output of a given derivation. - /// The first element of the tuple is the output name - /// and the second element is the derivation path. - pub fn iter_single_outputs(&self) -> impl Iterator<Item = (&str, &str)> { - self.iter().filter_map(|elt| { - if let NixContextElement::Single { name, derivation } = elt { - Some((name.as_str(), derivation.as_str())) - } else { - None - } - }) - } - - /// Iterates over any element of the context. - pub fn iter(&self) -> impl Iterator<Item = &NixContextElement> { - self.0.iter() - } - - /// Produces a list of owned references to this current context, - /// no matter its type. - pub fn to_owned_references(self) -> Vec<String> { - self.0 - .into_iter() - .map(|ctx| match ctx { - NixContextElement::Derivation(drv_path) => drv_path, - NixContextElement::Plain(store_path) => store_path, - NixContextElement::Single { derivation, .. } => derivation, - }) - .collect() - } -} - -impl IntoIterator for NixContext { - type Item = NixContextElement; - - type IntoIter = std::collections::hash_set::IntoIter<NixContextElement>; - - fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() - } -} +pub use context::{NixContext, NixContextElement}; /// This type is never instantiated, but serves to document the memory layout of the actual heap /// allocation for Nix strings. @@ -389,6 +246,59 @@ impl NixStringInner { } } +#[derive(Default)] +struct InternerInner { + #[allow(clippy::disallowed_types)] // Not using the default hasher + map: std::collections::HashMap<u64, NonNull<c_void>, BuildNoHashHasher<u64>>, + #[cfg(feature = "no_leak")] + #[allow(clippy::disallowed_types)] // Not using the default hasher + interned_strings: FxHashSet<NonNull<c_void>>, +} + +unsafe impl Send for InternerInner {} + +fn hash<T>(s: T) -> u64 +where + T: Hash, +{ + let mut hasher = FxHasher::default(); + s.hash(&mut hasher); + hasher.finish() +} + +impl InternerInner { + pub fn intern(&mut self, s: &[u8]) -> NixString { + let hash = hash(s); + if let Some(s) = self.map.get(&hash) { + return NixString(*s); + } + + let string = NixString::new_inner(s, None); + self.map.insert(hash, string.0); + #[cfg(feature = "no_leak")] + self.interned_strings.insert(string.0); + string + } +} + +#[derive(Default)] +struct Interner(RefCell<InternerInner>); + +impl Interner { + pub fn intern(&self, s: &[u8]) -> NixString { + self.0.borrow_mut().intern(s) + } + + #[cfg(feature = "no_leak")] + pub fn is_interned_string(&self, string: &NixString) -> bool { + self.0.borrow().interned_strings.contains(&string.0) + } +} + +thread_local! { + static INTERNER: Interner = Interner::default(); +} + /// Nix string values /// /// # Internals @@ -408,7 +318,23 @@ unsafe impl Send for NixString {} unsafe impl Sync for NixString {} impl Drop for NixString { + #[cfg(not(feature = "no_leak"))] fn drop(&mut self) { + if self.context().is_some() { + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of dealloc + unsafe { + NixStringInner::dealloc(self.0); + } + } + } + + #[cfg(feature = "no_leak")] + fn drop(&mut self) { + if INTERNER.with(|i| i.is_interned_string(self)) { + return; + } + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct // according to the rules of dealloc unsafe { @@ -419,9 +345,17 @@ impl Drop for NixString { impl Clone for NixString { fn clone(&self) -> Self { - // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct - // according to the rules of clone - unsafe { Self(NixStringInner::clone(self.0)) } + if cfg!(feature = "no_leak") || self.context().is_some() { + // SAFETY: There's no way to construct a NixString that doesn't leave the allocation correct + // according to the rules of clone + unsafe { Self(NixStringInner::clone(self.0)) } + } else { + // SAFETY: + // + // - NixStrings are never mutated + // - NixStrings are never freed + Self(self.0) + } } } @@ -440,7 +374,7 @@ impl Debug for NixString { impl PartialEq for NixString { fn eq(&self, other: &Self) -> bool { - self.as_bstr() == other.as_bstr() + self.0 == other.0 || self.as_bstr() == other.as_bstr() } } @@ -466,6 +400,9 @@ impl PartialOrd for NixString { impl Ord for NixString { fn cmp(&self, other: &Self) -> std::cmp::Ordering { + if self.0 == other.0 { + return std::cmp::Ordering::Equal; + } self.as_bstr().cmp(other.as_bstr()) } } @@ -518,15 +455,6 @@ impl From<String> for NixString { } } -impl<T> From<(T, Option<Box<NixContext>>)> for NixString -where - NixString: From<T>, -{ - fn from((s, ctx): (T, Option<Box<NixContext>>)) -> Self { - Self::new(NixString::from(s).as_ref(), ctx) - } -} - impl From<Box<str>> for NixString { fn from(s: Box<str>) -> Self { s.into_boxed_bytes().into() @@ -629,6 +557,9 @@ mod arbitrary { } } +/// Set non-scientifically. TODO(aspen): think more about what this should be +const INTERN_THRESHOLD: usize = 32; + impl NixString { fn new(contents: &[u8], context: Option<Box<NixContext>>) -> Self { debug_assert!( @@ -636,6 +567,20 @@ impl NixString { "BUG: initialized with empty context" ); + if !cfg!(feature = "no_leak") /* It's only safe to intern if we leak strings, since there's + * nothing yet preventing interned strings from getting freed + * (and then used by other copies) otherwise + */ + && contents.len() <= INTERN_THRESHOLD + && context.is_none() + { + return INTERNER.with(|i| i.intern(contents)); + } + + Self::new_inner(contents, context) + } + + fn new_inner(contents: &[u8], context: Option<Box<NixContext>>) -> Self { // SAFETY: We're always fully initializing a NixString here: // // 1. NixStringInner::alloc sets up the len for us diff --git a/tvix/eval/src/value/thunk.rs b/tvix/eval/src/value/thunk.rs index a67537f945a9..4b915019d47c 100644 --- a/tvix/eval/src/value/thunk.rs +++ b/tvix/eval/src/value/thunk.rs @@ -18,17 +18,16 @@ //! object, but when forcing a thunk, the runtime *must* mutate the //! memoisable slot. +use rustc_hash::FxHashSet; use std::{ cell::{Ref, RefCell, RefMut}, - collections::HashSet, fmt::Debug, rc::Rc, }; use crate::{ errors::ErrorKind, - opcode::OpCode, - spans::LightSpan, + opcode::Op, upvalues::Upvalues, value::Closure, vm::generators::{self, GenCo}, @@ -59,7 +58,7 @@ enum ThunkRepr { Suspended { lambda: Rc<Lambda>, upvalues: Rc<Upvalues>, - light_span: LightSpan, + span: Span, }, /// Thunk is a suspended native computation. @@ -69,10 +68,10 @@ enum ThunkRepr { /// value means that infinite recursion has occured. Blackhole { /// Span at which the thunk was first forced. - forced_at: LightSpan, + forced_at: Span, /// Span at which the thunk was originally suspended. - suspended_at: Option<LightSpan>, + suspended_at: Option<Span>, /// Span of the first instruction of the actual code inside /// the thunk. @@ -143,11 +142,11 @@ impl Thunk { ))))) } - pub fn new_suspended(lambda: Rc<Lambda>, light_span: LightSpan) -> Self { + pub fn new_suspended(lambda: Rc<Lambda>, span: Span) -> Self { Thunk(Rc::new(RefCell::new(ThunkRepr::Suspended { upvalues: Rc::new(Upvalues::with_capacity(lambda.upvalue_count)), lambda: lambda.clone(), - light_span, + span, }))) } @@ -162,9 +161,8 @@ impl Thunk { /// particularly useful in builtin implementations if the result of calling /// a function does not need to be forced immediately, because e.g. it is /// stored in an attribute set. - pub fn new_suspended_call(callee: Value, arg: Value, light_span: LightSpan) -> Self { + pub fn new_suspended_call(callee: Value, arg: Value, span: Span) -> Self { let mut lambda = Lambda::default(); - let span = light_span.span(); let arg_idx = lambda.chunk().push_constant(arg); let f_idx = lambda.chunk().push_constant(callee); @@ -172,28 +170,28 @@ impl Thunk { // This is basically a recreation of compile_apply(): // We need to push the argument onto the stack and then the function. // The function (not the argument) needs to be forced before calling. - lambda.chunk.push_op(OpCode::OpConstant(arg_idx), span); - lambda.chunk().push_op(OpCode::OpConstant(f_idx), span); - lambda.chunk.push_op(OpCode::OpForce, span); - lambda.chunk.push_op(OpCode::OpCall, span); + lambda.chunk.push_op(Op::Constant, span); + lambda.chunk.push_uvarint(arg_idx.0 as u64); + lambda.chunk.push_op(Op::Constant, span); + lambda.chunk.push_uvarint(f_idx.0 as u64); + lambda.chunk.push_op(Op::Force, span); + lambda.chunk.push_op(Op::Call, span); // Inform the VM that the chunk has ended - lambda.chunk.push_op(OpCode::OpReturn, span); + lambda.chunk.push_op(Op::Return, span); Thunk(Rc::new(RefCell::new(ThunkRepr::Suspended { upvalues: Rc::new(Upvalues::with_capacity(0)), lambda: Rc::new(lambda), - light_span, + span, }))) } - fn prepare_blackhole(&self, forced_at: LightSpan) -> ThunkRepr { + fn prepare_blackhole(&self, forced_at: Span) -> ThunkRepr { match &*self.0.borrow() { - ThunkRepr::Suspended { - light_span, lambda, .. - } => ThunkRepr::Blackhole { + ThunkRepr::Suspended { span, lambda, .. } => ThunkRepr::Blackhole { forced_at, - suspended_at: Some(light_span.clone()), + suspended_at: Some(*span), content_span: Some(lambda.chunk.first_span()), }, @@ -205,14 +203,10 @@ impl Thunk { } } - pub async fn force(myself: Thunk, co: GenCo, span: LightSpan) -> Result<Value, ErrorKind> { + pub async fn force(myself: Thunk, co: GenCo, span: Span) -> Result<Value, ErrorKind> { Self::force_(myself, &co, span).await } - pub async fn force_( - mut myself: Thunk, - co: &GenCo, - span: LightSpan, - ) -> Result<Value, ErrorKind> { + pub async fn force_(mut myself: Thunk, co: &GenCo, span: Span) -> Result<Value, ErrorKind> { // This vector of "thunks which point to the thunk-being-forced", to // be updated along with it, is necessary in order to write this // function in iterative (and later, mutual-tail-call) form. @@ -232,7 +226,7 @@ impl Thunk { // Begin evaluation of this thunk by marking it as a blackhole, meaning // that any other forcing frame encountering this thunk before its // evaluation is completed detected an evaluation cycle. - let inner = myself.0.replace(myself.prepare_blackhole(span.clone())); + let inner = myself.0.replace(myself.prepare_blackhole(span)); match inner { // If there was already a blackhole in the thunk, this is an @@ -243,8 +237,8 @@ impl Thunk { content_span, } => { return Err(ErrorKind::InfiniteRecursion { - first_force: forced_at.span(), - suspended_at: suspended_at.map(|s| s.span()), + first_force: forced_at, + suspended_at, content_span, }) } @@ -262,13 +256,12 @@ impl Thunk { ThunkRepr::Suspended { lambda, upvalues, - light_span, + span, } => { // TODO(amjoseph): use #[tailcall::mutual] here. This can // be turned into a tailcall to vm::execute_bytecode() by // passing `also_update` to it. - let value = - generators::request_enter_lambda(co, lambda, upvalues, light_span).await; + let value = generators::request_enter_lambda(co, lambda, upvalues, span).await; myself.0.replace(ThunkRepr::Evaluated(value)); continue; } @@ -422,7 +415,7 @@ impl TotalDisplay for Thunk { /// The inner `HashSet` is not available on the outside, as it would be /// potentially unsafe to interact with the pointers in the set. #[derive(Default)] -pub struct ThunkSet(HashSet<*const ThunkRepr>); +pub struct ThunkSet(FxHashSet<*const ThunkRepr>); impl ThunkSet { /// Check whether the given thunk has already been seen. Will mark the thunk diff --git a/tvix/eval/src/vm/generators.rs b/tvix/eval/src/vm/generators.rs index dbf7703bf002..b47f20bae289 100644 --- a/tvix/eval/src/vm/generators.rs +++ b/tvix/eval/src/vm/generators.rs @@ -81,7 +81,7 @@ pub enum VMRequest { EnterLambda { lambda: Rc<Lambda>, upvalues: Rc<Upvalues>, - light_span: LightSpan, + span: Span, }, /// Emit a runtime warning (already containing a span) through the VM. @@ -118,9 +118,8 @@ pub enum VMRequest { /// [`VM::catch_result`] for an explanation of how this works. TryForce(Value), - /// Request serialisation of a value to JSON, according to the - /// slightly odd Nix evaluation rules. - ToJson(Value), + /// Request the VM for the file type of the given path. + ReadFileType(PathBuf), } /// Human-readable representation of a generator message, used by observers. @@ -177,7 +176,7 @@ impl Display for VMRequest { VMRequest::ReadDir(p) => write!(f, "read_dir({})", p.to_string_lossy()), VMRequest::Span => write!(f, "span"), VMRequest::TryForce(v) => write!(f, "try_force({})", v.type_of()), - VMRequest::ToJson(v) => write!(f, "to_json({})", v.type_of()), + VMRequest::ReadFileType(p) => write!(f, "read_file_type({})", p.to_string_lossy()), } } } @@ -198,10 +197,12 @@ pub enum VMResponse { Directory(Vec<(bytes::Bytes, FileType)>), /// VM response with a span to use at the current point. - Span(LightSpan), + Span(Span), /// [std::io::Reader] produced by the VM in response to some IO operation. Reader(Box<dyn std::io::Read>), + + FileType(FileType), } impl Display for VMResponse { @@ -213,6 +214,7 @@ impl Display for VMResponse { VMResponse::Directory(d) => write!(f, "dir(len = {})", d.len()), VMResponse::Span(_) => write!(f, "span"), VMResponse::Reader(_) => write!(f, "reader"), + VMResponse::FileType(t) => write!(f, "file_type({})", t), } } } @@ -234,7 +236,7 @@ where { /// Helper function to re-enqueue the current generator while it /// is awaiting a value. - fn reenqueue_generator(&mut self, name: &'static str, span: LightSpan, generator: Generator) { + fn reenqueue_generator(&mut self, name: &'static str, span: Span, generator: Generator) { self.frames.push(Frame::Generator { name, generator, @@ -244,7 +246,7 @@ where } /// Helper function to enqueue a new generator. - pub(super) fn enqueue_generator<F, G>(&mut self, name: &'static str, span: LightSpan, gen: G) + pub(super) fn enqueue_generator<F, G>(&mut self, name: &'static str, span: Span, gen: G) where F: Future<Output = Result<Value, ErrorKind>> + 'static, G: FnOnce(GenCo) -> F, @@ -265,7 +267,7 @@ where pub(crate) fn run_generator( &mut self, name: &'static str, - span: LightSpan, + span: Span, frame_id: usize, state: GeneratorState, mut generator: Generator, @@ -302,8 +304,8 @@ where // this function prepares the frame stack and yields // back to the outer VM loop. VMRequest::ForceValue(value) => { - self.reenqueue_generator(name, span.clone(), generator); - self.enqueue_generator("force", span.clone(), |co| { + self.reenqueue_generator(name, span, generator); + self.enqueue_generator("force", span, |co| { value.force_owned_genco(co, span) }); return Ok(false); @@ -311,8 +313,8 @@ where // Generator has requested a deep-force. VMRequest::DeepForceValue(value) => { - self.reenqueue_generator(name, span.clone(), generator); - self.enqueue_generator("deep_force", span.clone(), |co| { + self.reenqueue_generator(name, span, generator); + self.enqueue_generator("deep_force", span, |co| { value.deep_force(co, span) }); return Ok(false); @@ -322,10 +324,10 @@ where // Logic is similar to `ForceValue`, except with the // value being taken from that stack. VMRequest::WithValue(idx) => { - self.reenqueue_generator(name, span.clone(), generator); + self.reenqueue_generator(name, span, generator); let value = self.stack[self.with_stack[idx]].clone(); - self.enqueue_generator("force", span.clone(), |co| { + self.enqueue_generator("force", span, |co| { value.force_owned_genco(co, span) }); @@ -336,13 +338,13 @@ where // with-stack. Logic is same as above, except for the // value being from that stack. VMRequest::CapturedWithValue(idx) => { - self.reenqueue_generator(name, span.clone(), generator); + self.reenqueue_generator(name, span, generator); let call_frame = self.last_call_frame() .expect("Tvix bug: generator requested captured with-value, but there is no call frame"); let value = call_frame.upvalues.with_stack().unwrap()[idx].clone(); - self.enqueue_generator("force", span.clone(), |co| { + self.enqueue_generator("force", span, |co| { value.force_owned_genco(co, span) }); @@ -351,23 +353,23 @@ where VMRequest::NixEquality(values, ptr_eq) => { let values = *values; - self.reenqueue_generator(name, span.clone(), generator); - self.enqueue_generator("nix_eq", span.clone(), |co| { + self.reenqueue_generator(name, span, generator); + self.enqueue_generator("nix_eq", span, |co| { values.0.nix_eq_owned_genco(values.1, co, ptr_eq, span) }); return Ok(false); } VMRequest::StringCoerce(val, kind) => { - self.reenqueue_generator(name, span.clone(), generator); - self.enqueue_generator("coerce_to_string", span.clone(), |co| { + self.reenqueue_generator(name, span, generator); + self.enqueue_generator("coerce_to_string", span, |co| { val.coerce_to_string(co, kind, span) }); return Ok(false); } VMRequest::Call(callable) => { - self.reenqueue_generator(name, span.clone(), generator); + self.reenqueue_generator(name, span, generator); self.call_value(span, None, callable)?; return Ok(false); } @@ -375,12 +377,12 @@ where VMRequest::EnterLambda { lambda, upvalues, - light_span, + span, } => { self.reenqueue_generator(name, span, generator); self.frames.push(Frame::CallFrame { - span: light_span, + span, call_frame: CallFrame { lambda, upvalues, @@ -424,7 +426,7 @@ where path: Some(path), error: e.into(), }) - .with_span(&span, self)?; + .with_span(span, self)?; message = VMResponse::Path(imported); } @@ -438,7 +440,7 @@ where path: Some(path), error: e.into(), }) - .with_span(&span, self)?; + .with_span(span, self)?; message = VMResponse::Reader(reader) } @@ -453,7 +455,7 @@ where error: e.into(), }) .map(Value::Bool) - .with_span(&span, self)?; + .with_span(span, self)?; message = VMResponse::Value(exists); } @@ -467,35 +469,41 @@ where path: Some(path), error: e.into(), }) - .with_span(&span, self)?; + .with_span(span, self)?; message = VMResponse::Directory(dir); } VMRequest::Span => { - message = VMResponse::Span(self.reasonable_light_span()); + message = VMResponse::Span(self.reasonable_span); } VMRequest::TryForce(value) => { self.try_eval_frames.push(frame_id); - self.reenqueue_generator(name, span.clone(), generator); + self.reenqueue_generator(name, span, generator); debug_assert!( self.frames.len() == frame_id + 1, "generator should be reenqueued with the same frame ID" ); - self.enqueue_generator("force", span.clone(), |co| { + self.enqueue_generator("force", span, |co| { value.force_owned_genco(co, span) }); return Ok(false); } - VMRequest::ToJson(value) => { - self.reenqueue_generator(name, span.clone(), generator); - self.enqueue_generator("to_json", span, |co| { - value.into_contextful_json_generator(co) - }); - return Ok(false); + VMRequest::ReadFileType(path) => { + let file_type = self + .io_handle + .as_ref() + .file_type(&path) + .map_err(|e| ErrorKind::IO { + path: Some(path), + error: e.into(), + }) + .with_span(span, self)?; + + message = VMResponse::FileType(file_type); } } } @@ -503,7 +511,7 @@ where // Generator has completed, and its result value should // be left on the stack. genawaiter::GeneratorState::Complete(result) => { - let value = result.with_span(&span, self)?; + let value = result.with_span(span, self)?; self.stack.push(value); return Ok(true); } @@ -683,12 +691,12 @@ pub(crate) async fn request_enter_lambda( co: &GenCo, lambda: Rc<Lambda>, upvalues: Rc<Upvalues>, - light_span: LightSpan, + span: Span, ) -> Value { let msg = VMRequest::EnterLambda { lambda, upvalues, - light_span, + span, }; match co.yield_(msg).await { @@ -767,7 +775,7 @@ pub(crate) async fn request_read_dir(co: &GenCo, path: PathBuf) -> Vec<(bytes::B } } -pub(crate) async fn request_span(co: &GenCo) -> LightSpan { +pub(crate) async fn request_span(co: &GenCo) -> Span { match co.yield_(VMRequest::Span).await { VMResponse::Span(span) => span, msg => panic!( @@ -777,13 +785,10 @@ pub(crate) async fn request_span(co: &GenCo) -> LightSpan { } } -pub(crate) async fn request_to_json( - co: &GenCo, - value: Value, -) -> Result<(serde_json::Value, NixContext), CatchableErrorKind> { - match co.yield_(VMRequest::ToJson(value)).await { - VMResponse::Value(Value::Json(json_with_ctx)) => Ok(*json_with_ctx), - VMResponse::Value(Value::Catchable(cek)) => Err(*cek), +#[cfg_attr(not(feature = "impure"), allow(unused))] +pub(crate) async fn request_read_file_type(co: &GenCo, path: PathBuf) -> FileType { + match co.yield_(VMRequest::ReadFileType(path)).await { + VMResponse::FileType(file_type) => file_type, msg => panic!( "Tvix bug: VM responded with incorrect generator message: {}", msg diff --git a/tvix/eval/src/vm/macros.rs b/tvix/eval/src/vm/macros.rs index d8a09706ab9c..f9c084d41f91 100644 --- a/tvix/eval/src/vm/macros.rs +++ b/tvix/eval/src/vm/macros.rs @@ -49,7 +49,7 @@ macro_rules! cmp_op { } } - let gen_span = $frame.current_light_span(); + let gen_span = $frame.current_span(); $vm.push_call_frame($span, $frame); $vm.enqueue_generator("compare", gen_span, |co| compare(a, b, co)); return Ok(false); diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 48dcdfc8df47..0630ed4174e8 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -14,8 +14,9 @@ mod macros; use bstr::{BString, ByteSlice, ByteVec}; use codemap::Span; +use rustc_hash::FxHashMap; use serde_json::json; -use std::{cmp::Ordering, collections::HashMap, ops::DerefMut, path::PathBuf, rc::Rc}; +use std::{cmp::Ordering, ops::DerefMut, path::PathBuf, rc::Rc}; use crate::{ arithmetic_op, @@ -27,8 +28,7 @@ use crate::{ lifted_pop, nix_search_path::NixSearchPath, observer::RuntimeObserver, - opcode::{CodeIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx}, - spans::LightSpan, + opcode::{CodeIdx, Op, Position, UpvalueIdx}, upvalues::Upvalues, value::{ Builtin, BuiltinResult, Closure, CoercionKind, Lambda, NixAttrs, NixContext, NixList, @@ -51,7 +51,7 @@ trait GetSpan { impl<'o, IO> GetSpan for &VM<'o, IO> { fn get_span(self) -> Span { - self.reasonable_span.span() + self.reasonable_span } } @@ -61,9 +61,9 @@ impl GetSpan for &CallFrame { } } -impl GetSpan for &LightSpan { +impl GetSpan for &Span { fn get_span(self) -> Span { - self.span() + *self } } @@ -94,7 +94,7 @@ impl<T, S: GetSpan, IO> WithSpan<T, S, IO> for Result<T, ErrorKind> { Frame::CallFrame { span, .. } => { error = Error::new( ErrorKind::BytecodeError(Box::new(error)), - span.span(), + *span, vm.source.clone(), ); } @@ -104,7 +104,7 @@ impl<T, S: GetSpan, IO> WithSpan<T, S, IO> for Result<T, ErrorKind> { err: Box::new(error), gen_type: name, }, - span.span(), + *span, vm.source.clone(), ); } @@ -146,10 +146,32 @@ impl CallFrame { /// Increment this frame's instruction pointer and return the operation that /// the pointer moved past. - fn inc_ip(&mut self) -> OpCode { - let op = self.chunk()[self.ip]; + fn inc_ip(&mut self) -> Op { + debug_assert!( + self.ip.0 < self.chunk().code.len(), + "out of bounds code at IP {} in {:p}", + self.ip.0, + self.lambda + ); + + let op = self.chunk().code[self.ip.0]; self.ip += 1; - op + op.into() + } + + /// Read a varint-encoded operand and return it. The frame pointer is + /// incremented internally. + fn read_uvarint(&mut self) -> u64 { + let (arg, size) = self.chunk().read_uvarint(self.ip.0); + self.ip += size; + arg + } + + /// Read a fixed-size u16 and increment the frame pointer. + fn read_u16(&mut self) -> u16 { + let arg = self.chunk().read_u16(self.ip.0); + self.ip += 2; + arg } /// Construct an error result from the given ErrorKind and the source span @@ -163,13 +185,6 @@ impl CallFrame { pub fn current_span(&self) -> Span { self.chunk().get_span(self.ip - 1) } - - /// Returns the information needed to calculate the current span, - /// but without performing that calculation. - // TODO: why pub? - pub(crate) fn current_light_span(&self) -> LightSpan { - LightSpan::new_actual(self.current_span()) - } } /// A frame represents an execution state of the VM. The VM has a stack of @@ -187,7 +202,7 @@ enum Frame { call_frame: CallFrame, /// Span from which the call frame was launched. - span: LightSpan, + span: Span, }, /// Generator represents a frame that can yield further @@ -201,7 +216,7 @@ enum Frame { name: &'static str, /// Span from which the generator was launched. - span: LightSpan, + span: Span, state: GeneratorState, @@ -211,15 +226,15 @@ enum Frame { } impl Frame { - pub fn span(&self) -> LightSpan { + pub fn span(&self) -> Span { match self { - Frame::CallFrame { span, .. } | Frame::Generator { span, .. } => span.clone(), + Frame::CallFrame { span, .. } | Frame::Generator { span, .. } => *span, } } } #[derive(Default)] -struct ImportCache(HashMap<PathBuf, Value>); +struct ImportCache(FxHashMap<PathBuf, Value>); /// The `ImportCache` holds the `Value` resulting from `import`ing a certain /// file, so that the same file doesn't need to be re-evaluated multiple times. @@ -309,7 +324,7 @@ struct VM<'o, IO> { /// /// The VM should update this whenever control flow changes take place (i.e. /// entering or exiting a frame to yield control somewhere). - reasonable_span: LightSpan, + reasonable_span: Span, /// This field is responsible for handling `builtins.tryEval`. When that /// builtin is encountered, it sends a special message to the VM which @@ -343,7 +358,7 @@ where observer: &'o mut dyn RuntimeObserver, source: SourceCode, globals: Rc<GlobalsMap>, - reasonable_span: LightSpan, + reasonable_span: Span, ) -> Self { Self { nix_search_path, @@ -362,7 +377,7 @@ where } /// Push a call frame onto the frame stack. - fn push_call_frame(&mut self, span: LightSpan, call_frame: CallFrame) { + fn push_call_frame(&mut self, span: Span, call_frame: CallFrame) { self.frames.push(Frame::CallFrame { span, call_frame }) } @@ -434,8 +449,8 @@ where /// stack. In this case, the frame is not returned to the frame stack. /// /// 2. The code encounters a generator, in which case the frame in its - /// current state is pushed back on the stack, and the generator is left on - /// top of it for the outer loop to execute. + /// current state is pushed back on the stack, and the generator is left + /// on top of it for the outer loop to execute. /// /// 3. An error is encountered. /// @@ -444,27 +459,35 @@ where /// /// The return value indicates whether the bytecode has been executed to /// completion, or whether it has been suspended in favour of a generator. - fn execute_bytecode(&mut self, span: LightSpan, mut frame: CallFrame) -> EvalResult<bool> { + fn execute_bytecode(&mut self, span: Span, mut frame: CallFrame) -> EvalResult<bool> { loop { let op = frame.inc_ip(); self.observer.observe_execute_op(frame.ip, &op, &self.stack); match op { - OpCode::OpThunkSuspended(idx) | OpCode::OpThunkClosure(idx) => { - let blueprint = match &frame.chunk()[idx] { + Op::ThunkSuspended | Op::ThunkClosure => { + let idx = frame.read_uvarint() as usize; + + let blueprint = match &frame.chunk().constants[idx] { Value::Blueprint(lambda) => lambda.clone(), _ => panic!("compiler bug: non-blueprint in blueprint slot"), }; - let upvalue_count = blueprint.upvalue_count; - let thunk = if matches!(op, OpCode::OpThunkClosure(_)) { + let upvalue_count = frame.read_uvarint(); + + debug_assert!( + (upvalue_count >> 1) == blueprint.upvalue_count as u64, + "TODO: new upvalue count not correct", + ); + + let thunk = if op == Op::ThunkClosure { debug_assert!( - upvalue_count > 0, - "OpThunkClosure should not be called for plain lambdas" + (((upvalue_count >> 1) > 0) || (upvalue_count & 0b1 == 1)), + "OpThunkClosure should not be called for plain lambdas", ); Thunk::new_closure(blueprint) } else { - Thunk::new_suspended(blueprint, frame.current_light_span()) + Thunk::new_suspended(blueprint, frame.current_span()) }; let upvalues = thunk.upvalues_mut(); self.stack.push(Value::Thunk(thunk.clone())); @@ -477,17 +500,17 @@ where self.populate_upvalues(&mut frame, upvalue_count, upvalues)?; } - OpCode::OpForce => { + Op::Force => { if let Some(Value::Thunk(_)) = self.stack.last() { let thunk = match self.stack_pop() { Value::Thunk(t) => t, _ => unreachable!(), }; - let gen_span = frame.current_light_span(); + let gen_span = frame.current_span(); self.push_call_frame(span, frame); - self.enqueue_generator("force", gen_span.clone(), |co| { + self.enqueue_generator("force", gen_span, |co| { Thunk::force(thunk, co, gen_span) }); @@ -495,27 +518,37 @@ where } } - OpCode::OpGetUpvalue(upv_idx) => { - let value = frame.upvalue(upv_idx).clone(); + Op::GetUpvalue => { + let idx = UpvalueIdx(frame.read_uvarint() as usize); + let value = frame.upvalue(idx).clone(); self.stack.push(value); } // Discard the current frame. - OpCode::OpReturn => { + Op::Return => { // TODO(amjoseph): I think this should assert `==` rather // than `<=` but it fails with the stricter condition. debug_assert!(self.stack.len() - 1 <= frame.stack_offset); return Ok(true); } - OpCode::OpConstant(idx) => { - let c = frame.chunk()[idx].clone(); + Op::Constant => { + let idx = frame.read_uvarint() as usize; + + debug_assert!( + idx < frame.chunk().constants.len(), + "out of bounds constant at IP {} in {:p}", + frame.ip.0, + frame.lambda + ); + + let c = frame.chunk().constants[idx].clone(); self.stack.push(c); } - OpCode::OpCall => { + Op::Call => { let callable = self.stack_pop(); - self.call_value(frame.current_light_span(), Some((span, frame)), callable)?; + self.call_value(frame.current_span(), Some((span, frame)), callable)?; // exit this loop and let the outer loop enter the new call return Ok(true); @@ -523,7 +556,8 @@ where // Remove the given number of elements from the stack, // but retain the top value. - OpCode::OpCloseScope(Count(count)) => { + Op::CloseScope => { + let count = frame.read_uvarint() as usize; // Immediately move the top value into the right // position. let target_idx = self.stack.len() - 1 - count; @@ -535,15 +569,22 @@ where } } - OpCode::OpClosure(idx) => { - let blueprint = match &frame.chunk()[idx] { + Op::Closure => { + let idx = frame.read_uvarint() as usize; + let blueprint = match &frame.chunk().constants[idx] { Value::Blueprint(lambda) => lambda.clone(), _ => panic!("compiler bug: non-blueprint in blueprint slot"), }; - let upvalue_count = blueprint.upvalue_count; + let upvalue_count = frame.read_uvarint(); + debug_assert!( - upvalue_count > 0, + (upvalue_count >> 1) == blueprint.upvalue_count as u64, + "TODO: new upvalue count not correct in closure", + ); + + debug_assert!( + ((upvalue_count >> 1) > 0 || (upvalue_count & 0b1 == 1)), "OpClosure should not be called for plain lambdas" ); @@ -556,7 +597,7 @@ where )))); } - OpCode::OpAttrsSelect => lifted_pop! { + Op::AttrsSelect => lifted_pop! { self(key, attrs) => { let key = key.to_str().with_span(&frame, self)?; let attrs = attrs.to_attrs().with_span(&frame, self)?; @@ -576,21 +617,24 @@ where } }, - OpCode::OpJumpIfFalse(JumpOffset(offset)) => { + Op::JumpIfFalse => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); if !self.stack_peek(0).as_bool().with_span(&frame, self)? { frame.ip += offset; } } - OpCode::OpJumpIfCatchable(JumpOffset(offset)) => { + Op::JumpIfCatchable => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); if self.stack_peek(0).is_catchable() { frame.ip += offset; } } - OpCode::OpJumpIfNoFinaliseRequest(JumpOffset(offset)) => { + Op::JumpIfNoFinaliseRequest => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); match self.stack_peek(0) { Value::FinaliseRequest(finalise) => { @@ -602,11 +646,11 @@ where } } - OpCode::OpPop => { + Op::Pop => { self.stack.pop(); } - OpCode::OpAttrsTrySelect => { + Op::AttrsTrySelect => { let key = self.stack_pop().to_str().with_span(&frame, self)?; let value = match self.stack_pop() { Value::Attrs(attrs) => match attrs.select(&key) { @@ -620,12 +664,14 @@ where self.stack.push(value); } - OpCode::OpGetLocal(StackIdx(local_idx)) => { + Op::GetLocal => { + let local_idx = frame.read_uvarint() as usize; let idx = frame.stack_offset + local_idx; self.stack.push(self.stack[idx].clone()); } - OpCode::OpJumpIfNotFound(JumpOffset(offset)) => { + Op::JumpIfNotFound => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); if matches!(self.stack_peek(0), Value::AttrNotFound) { self.stack_pop(); @@ -633,16 +679,17 @@ where } } - OpCode::OpJump(JumpOffset(offset)) => { + Op::Jump => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); frame.ip += offset; } - OpCode::OpEqual => lifted_pop! { + Op::Equal => lifted_pop! { self(b, a) => { - let gen_span = frame.current_light_span(); + let gen_span = frame.current_span(); self.push_call_frame(span, frame); - self.enqueue_generator("nix_eq", gen_span.clone(), |co| { + self.enqueue_generator("nix_eq", gen_span, |co| { a.nix_eq_owned_genco(b, co, PointerEquality::ForbidAll, gen_span) }); return Ok(false); @@ -653,7 +700,7 @@ where // top is not of the expected type. This is necessary // to implement some specific behaviours of Nix // exactly. - OpCode::OpAssertBool => { + Op::AssertBool => { let val = self.stack_peek(0); // TODO(edef): propagate this into is_bool, since bottom values *are* values of any type if !val.is_catchable() && !val.is_bool() { @@ -667,7 +714,7 @@ where } } - OpCode::OpAssertAttrs => { + Op::AssertAttrs => { let val = self.stack_peek(0); // TODO(edef): propagate this into is_attrs, since bottom values *are* values of any type if !val.is_catchable() && !val.is_attrs() { @@ -681,9 +728,9 @@ where } } - OpCode::OpAttrs(Count(count)) => self.run_attrset(&frame, count)?, + Op::Attrs => self.run_attrset(frame.read_uvarint() as usize, &frame)?, - OpCode::OpAttrsUpdate => lifted_pop! { + Op::AttrsUpdate => lifted_pop! { self(rhs, lhs) => { let rhs = rhs.to_attrs().with_span(&frame, self)?; let lhs = lhs.to_attrs().with_span(&frame, self)?; @@ -691,28 +738,30 @@ where } }, - OpCode::OpInvert => lifted_pop! { + Op::Invert => lifted_pop! { self(v) => { let v = v.as_bool().with_span(&frame, self)?; self.stack.push(Value::Bool(!v)); } }, - OpCode::OpList(Count(count)) => { + Op::List => { + let count = frame.read_uvarint() as usize; let list = NixList::construct(count, self.stack.split_off(self.stack.len() - count)); self.stack.push(Value::List(list)); } - OpCode::OpJumpIfTrue(JumpOffset(offset)) => { + Op::JumpIfTrue => { + let offset = frame.read_u16() as usize; debug_assert!(offset != 0); if self.stack_peek(0).as_bool().with_span(&frame, self)? { frame.ip += offset; } } - OpCode::OpHasAttr => lifted_pop! { + Op::HasAttr => lifted_pop! { self(key, attrs) => { let key = key.to_str().with_span(&frame, self)?; let result = match attrs { @@ -727,19 +776,20 @@ where } }, - OpCode::OpConcat => lifted_pop! { + Op::Concat => lifted_pop! { self(rhs, lhs) => { let rhs = rhs.to_list().with_span(&frame, self)?.into_inner(); - let lhs = lhs.to_list().with_span(&frame, self)?.into_inner(); - self.stack.push(Value::List(NixList::from(lhs + rhs))) + let mut lhs = lhs.to_list().with_span(&frame, self)?.into_inner(); + lhs.extend(rhs.into_iter()); + self.stack.push(Value::List(lhs.into())) } }, - OpCode::OpResolveWith => { + Op::ResolveWith => { let ident = self.stack_pop().to_str().with_span(&frame, self)?; // Re-enqueue this frame. - let op_span = frame.current_light_span(); + let op_span = frame.current_span(); self.push_call_frame(span, frame); // Construct a generator frame doing the lookup in constant @@ -762,27 +812,33 @@ where return Ok(false); } - OpCode::OpFinalise(StackIdx(idx)) => match &self.stack[frame.stack_offset + idx] { - Value::Closure(_) => panic!("attempted to finalise a closure"), - Value::Thunk(thunk) => thunk.finalise(&self.stack[frame.stack_offset..]), - _ => panic!("attempted to finalise a non-thunk"), - }, + Op::Finalise => { + let idx = frame.read_uvarint() as usize; + match &self.stack[frame.stack_offset + idx] { + Value::Closure(_) => panic!("attempted to finalise a closure"), + Value::Thunk(thunk) => thunk.finalise(&self.stack[frame.stack_offset..]), + _ => panic!("attempted to finalise a non-thunk"), + } + } + + Op::CoerceToString => { + let kind: CoercionKind = frame.chunk().code[frame.ip.0].into(); + frame.ip.0 += 1; - OpCode::OpCoerceToString(kind) => { let value = self.stack_pop(); - let gen_span = frame.current_light_span(); + let gen_span = frame.current_span(); self.push_call_frame(span, frame); - self.enqueue_generator("coerce_to_string", gen_span.clone(), |co| { + self.enqueue_generator("coerce_to_string", gen_span, |co| { value.coerce_to_string(co, kind, gen_span) }); return Ok(false); } - OpCode::OpInterpolate(Count(count)) => self.run_interpolate(&frame, count)?, + Op::Interpolate => self.run_interpolate(frame.read_uvarint(), &frame)?, - OpCode::OpValidateClosedFormals => { + Op::ValidateClosedFormals => { let formals = frame.lambda.formals.as_ref().expect( "OpValidateClosedFormals called within the frame of a lambda without formals", ); @@ -806,9 +862,9 @@ where } } - OpCode::OpAdd => lifted_pop! { + Op::Add => lifted_pop! { self(b, a) => { - let gen_span = frame.current_light_span(); + let gen_span = frame.current_span(); self.push_call_frame(span, frame); // OpAdd can add not just numbers, but also string-like @@ -819,21 +875,21 @@ where } }, - OpCode::OpSub => lifted_pop! { + Op::Sub => lifted_pop! { self(b, a) => { let result = arithmetic_op!(&a, &b, -).with_span(&frame, self)?; self.stack.push(result); } }, - OpCode::OpMul => lifted_pop! { + Op::Mul => lifted_pop! { self(b, a) => { let result = arithmetic_op!(&a, &b, *).with_span(&frame, self)?; self.stack.push(result); } }, - OpCode::OpDiv => lifted_pop! { + Op::Div => lifted_pop! { self(b, a) => { match b { Value::Integer(0) => return frame.error(self, ErrorKind::DivisionByZero), @@ -848,7 +904,7 @@ where } }, - OpCode::OpNegate => match self.stack_pop() { + Op::Negate => match self.stack_pop() { Value::Integer(i) => self.stack.push(Value::Integer(-i)), Value::Float(f) => self.stack.push(Value::Float(-f)), Value::Catchable(cex) => self.stack.push(Value::Catchable(cex)), @@ -863,12 +919,12 @@ where } }, - OpCode::OpLess => cmp_op!(self, frame, span, <), - OpCode::OpLessOrEq => cmp_op!(self, frame, span, <=), - OpCode::OpMore => cmp_op!(self, frame, span, >), - OpCode::OpMoreOrEq => cmp_op!(self, frame, span, >=), + Op::Less => cmp_op!(self, frame, span, <), + Op::LessOrEq => cmp_op!(self, frame, span, <=), + Op::More => cmp_op!(self, frame, span, >), + Op::MoreOrEq => cmp_op!(self, frame, span, >=), - OpCode::OpFindFile => match self.stack_pop() { + Op::FindFile => match self.stack_pop() { Value::UnresolvedPath(path) => { let resolved = self .nix_search_path @@ -880,7 +936,7 @@ where _ => panic!("tvix compiler bug: OpFindFile called on non-UnresolvedPath"), }, - OpCode::OpResolveHomePath => match self.stack_pop() { + Op::ResolveHomePath => match self.stack_pop() { Value::UnresolvedPath(path) => { match dirs::home_dir() { None => { @@ -903,24 +959,23 @@ where } }, - OpCode::OpPushWith(StackIdx(idx)) => self.with_stack.push(frame.stack_offset + idx), + Op::PushWith => self + .with_stack + .push(frame.stack_offset + frame.read_uvarint() as usize), - OpCode::OpPopWith => { + Op::PopWith => { self.with_stack.pop(); } - OpCode::OpAssertFail => { + Op::AssertFail => { self.stack .push(Value::from(CatchableErrorKind::AssertionFailed)); } - // Data-carrying operands should never be executed, - // that is a critical error in the VM/compiler. - OpCode::DataStackIdx(_) - | OpCode::DataDeferredLocal(_) - | OpCode::DataUpvalueIdx(_) - | OpCode::DataCaptureWith => { - panic!("Tvix bug: attempted to execute data-carrying operand") + // Encountering an invalid opcode is a critical error in the + // VM/compiler. + Op::Invalid => { + panic!("Tvix bug: attempted to execute invalid opcode") } } } @@ -940,7 +995,7 @@ where &self.stack[self.stack.len() - 1 - offset] } - fn run_attrset(&mut self, frame: &CallFrame, count: usize) -> EvalResult<()> { + fn run_attrset(&mut self, count: usize, frame: &CallFrame) -> EvalResult<()> { let attrs = NixAttrs::construct(count, self.stack.split_off(self.stack.len() - count * 2)) .with_span(frame, self)? .map(Value::attrs) @@ -978,7 +1033,7 @@ where /// Interpolate string fragments by popping the specified number of /// fragments of the stack, evaluating them to strings, and pushing /// the concatenated result string back on the stack. - fn run_interpolate(&mut self, frame: &CallFrame, count: usize) -> EvalResult<()> { + fn run_interpolate(&mut self, count: u64, frame: &CallFrame) -> EvalResult<()> { let mut out = BString::default(); // Interpolation propagates the context and union them. let mut context: NixContext = NixContext::new(); @@ -1004,12 +1059,6 @@ where Ok(()) } - /// Returns a reasonable light span for the current situation that the VM is - /// in. - pub fn reasonable_light_span(&self) -> LightSpan { - self.reasonable_span.clone() - } - /// Apply an argument from the stack to a builtin, and attempt to call it. /// /// All calls are tail-calls in Tvix, as every function application is a @@ -1017,7 +1066,7 @@ where /// /// Due to this, once control flow exits this function, the generator will /// automatically be run by the VM. - fn call_builtin(&mut self, span: LightSpan, mut builtin: Builtin) -> EvalResult<()> { + fn call_builtin(&mut self, span: Span, mut builtin: Builtin) -> EvalResult<()> { let builtin_name = builtin.name(); self.observer.observe_enter_builtin(builtin_name); @@ -1041,8 +1090,8 @@ where fn call_value( &mut self, - span: LightSpan, - parent: Option<(LightSpan, CallFrame)>, + span: Span, + parent: Option<(Span, CallFrame)>, callable: Value, ) -> EvalResult<()> { match callable { @@ -1098,69 +1147,79 @@ where Ok(()) } - v => Err(ErrorKind::NotCallable(v.type_of())).with_span(&span, self), + v => Err(ErrorKind::NotCallable(v.type_of())).with_span(span, self), } } /// Populate the upvalue fields of a thunk or closure under construction. + /// + /// See the closely tied function `emit_upvalue_data` in the compiler + /// implementation for details on the argument processing. fn populate_upvalues( &mut self, frame: &mut CallFrame, - count: usize, + count: u64, mut upvalues: impl DerefMut<Target = Upvalues>, ) -> EvalResult<()> { - for _ in 0..count { - match frame.inc_ip() { - OpCode::DataStackIdx(StackIdx(stack_idx)) => { - let idx = frame.stack_offset + stack_idx; - - let val = match self.stack.get(idx) { - Some(val) => val.clone(), - None => { - return frame.error( - self, - ErrorKind::TvixBug { - msg: "upvalue to be captured was missing on stack", - metadata: Some(Rc::new(json!({ - "ip": format!("{:#x}", frame.ip.0 - 1), - "stack_idx(relative)": stack_idx, - "stack_idx(absolute)": idx, - }))), - }, - ); - } - }; + // Determine whether to capture the with stack, and then shift the + // actual count of upvalues back. + let capture_with = count & 0b1 == 1; + let count = count >> 1; + if capture_with { + // Start the captured with_stack off of the + // current call frame's captured with_stack, ... + let mut captured_with_stack = frame + .upvalues + .with_stack() + .cloned() + // ... or make an empty one if there isn't one already. + .unwrap_or_else(|| Vec::with_capacity(self.with_stack.len())); + + for idx in &self.with_stack { + captured_with_stack.push(self.stack[*idx].clone()); + } - upvalues.deref_mut().push(val); - } + upvalues.deref_mut().set_with_stack(captured_with_stack); + } - OpCode::DataUpvalueIdx(upv_idx) => { - upvalues.deref_mut().push(frame.upvalue(upv_idx).clone()); - } + for _ in 0..count { + let pos = Position(frame.read_uvarint()); - OpCode::DataDeferredLocal(idx) => { - upvalues.deref_mut().push(Value::DeferredUpvalue(idx)); - } + if let Some(stack_idx) = pos.runtime_stack_index() { + let idx = frame.stack_offset + stack_idx.0; - OpCode::DataCaptureWith => { - // Start the captured with_stack off of the - // current call frame's captured with_stack, ... - let mut captured_with_stack = frame - .upvalues - .with_stack() - .cloned() - // ... or make an empty one if there isn't one already. - .unwrap_or_else(|| Vec::with_capacity(self.with_stack.len())); - - for idx in &self.with_stack { - captured_with_stack.push(self.stack[*idx].clone()); + let val = match self.stack.get(idx) { + Some(val) => val.clone(), + None => { + return frame.error( + self, + ErrorKind::TvixBug { + msg: "upvalue to be captured was missing on stack", + metadata: Some(Rc::new(json!({ + "ip": format!("{:#x}", frame.ip.0 - 1), + "stack_idx(relative)": stack_idx.0, + "stack_idx(absolute)": idx, + }))), + }, + ); } + }; - upvalues.deref_mut().set_with_stack(captured_with_stack); - } + upvalues.deref_mut().push(val); + continue; + } - _ => panic!("compiler error: missing closure operand"), + if let Some(idx) = pos.runtime_deferred_local() { + upvalues.deref_mut().push(Value::DeferredUpvalue(idx)); + continue; } + + if let Some(idx) = pos.runtime_upvalue_index() { + upvalues.deref_mut().push(frame.upvalue(idx).clone()); + continue; + } + + panic!("Tvix bug: invalid capture position emitted") } Ok(()) @@ -1319,6 +1378,18 @@ async fn final_deep_force(co: GenCo) -> Result<Value, ErrorKind> { Ok(generators::request_deep_force(&co, value).await) } +/// Specification for how to handle top-level values returned by evaluation +#[derive(Debug, Clone, Copy, Default)] +pub enum EvalMode { + /// The default. Values are returned from evaluations as-is, without any extra forcing or + /// special handling. + #[default] + Lazy, + + /// Strictly and deeply evaluate top-level values returned by evaluation. + Strict, +} + pub fn run_lambda<IO>( nix_search_path: NixSearchPath, io_handle: IO, @@ -1326,7 +1397,7 @@ pub fn run_lambda<IO>( source: SourceCode, globals: Rc<GlobalsMap>, lambda: Rc<Lambda>, - strict: bool, + mode: EvalMode, ) -> EvalResult<RuntimeResult> where IO: AsRef<dyn EvalIO> + 'static, @@ -1345,17 +1416,18 @@ where observer, source, globals, - root_span.into(), + root_span, ); // When evaluating strictly, synthesise a frame that will instruct // the VM to deep-force the final value before returning it. - if strict { - vm.enqueue_generator("final_deep_force", root_span.into(), final_deep_force); + match mode { + EvalMode::Lazy => {} + EvalMode::Strict => vm.enqueue_generator("final_deep_force", root_span, final_deep_force), } vm.frames.push(Frame::CallFrame { - span: root_span.into(), + span: root_span, call_frame: CallFrame { lambda, upvalues: Rc::new(Upvalues::with_capacity(0)), diff --git a/tvix/eval/tests/nix_oracle.rs b/tvix/eval/tests/nix_oracle.rs index 3d3abc73635f..137dff6b00f6 100644 --- a/tvix/eval/tests/nix_oracle.rs +++ b/tvix/eval/tests/nix_oracle.rs @@ -58,10 +58,16 @@ fn nix_eval(expr: &str, strictness: Strictness) -> String { #[track_caller] #[cfg(feature = "impure")] fn compare_eval(expr: &str, strictness: Strictness) { + use tvix_eval::{EvalIO, EvalMode}; + let nix_result = nix_eval(expr, strictness); - let mut eval = tvix_eval::Evaluation::new_pure(); - eval.strict = matches!(strictness, Strictness::Strict); - eval.io_handle = Box::new(tvix_eval::StdIO); + let mut eval_builder = tvix_eval::Evaluation::builder_pure(); + if matches!(strictness, Strictness::Strict) { + eval_builder = eval_builder.mode(EvalMode::Strict); + } + let eval = eval_builder + .io_handle(Box::new(tvix_eval::StdIO) as Box<dyn EvalIO>) + .build(); let tvix_result = eval .evaluate(expr, None) |