diff options
Diffstat (limited to 'tvix/eval')
34 files changed, 600 insertions, 1808 deletions
diff --git a/tvix/eval/Cargo.toml b/tvix/eval/Cargo.toml index 677ce6ab85..4cf8ea146c 100644 --- a/tvix/eval/Cargo.toml +++ b/tvix/eval/Cargo.toml @@ -30,7 +30,6 @@ smol_str = "0.2.0" tabwriter = "1.2" test-strategy = { version = "0.2.1", optional = true } toml = "0.6.0" -xml-rs = "0.8.4" sha2 = "0.10.8" sha1 = "0.10.6" md-5 = "0.10.6" @@ -43,6 +42,9 @@ pretty_assertions = "1.2.1" rstest = "0.19.0" tempfile = "3.3.0" +[target.'cfg(not(target_env = "msvc"))'.dev-dependencies] +tikv-jemallocator = "0.5" + [features] default = ["impure", "arbitrary", "nix_tests"] diff --git a/tvix/eval/benches/eval.rs b/tvix/eval/benches/eval.rs index 57d4eb71b5..1333f5018c 100644 --- a/tvix/eval/benches/eval.rs +++ b/tvix/eval/benches/eval.rs @@ -1,5 +1,11 @@ use criterion::{black_box, criterion_group, criterion_main, Criterion}; use itertools::Itertools; +#[cfg(not(target_env = "msvc"))] +use tikv_jemallocator::Jemalloc; + +#[cfg(not(target_env = "msvc"))] +#[global_allocator] +static GLOBAL: Jemalloc = Jemalloc; fn interpret(code: &str) { tvix_eval::Evaluation::new_pure().evaluate(code, None); diff --git a/tvix/eval/build.rs b/tvix/eval/build.rs index a9c9a78b06..b37a6e8a0c 100644 --- a/tvix/eval/build.rs +++ b/tvix/eval/build.rs @@ -5,5 +5,10 @@ fn main() { "cargo:rustc-env=TVIX_CURRENT_SYSTEM={}", &env::var("TARGET").unwrap() ); - println!("cargo:rerun-if-changed-env=TARGET") + println!("cargo:rerun-if-changed-env=TARGET"); + + // Pick up new test case files + // https://github.com/la10736/rstest/issues/256 + println!("cargo:rerun-if-changed=src/tests/nix_tests"); + println!("cargo:rerun-if-changed=src/tests/tvix_tests") } diff --git a/tvix/eval/default.nix b/tvix/eval/default.nix index 91661291f7..9dd5875f85 100644 --- a/tvix/eval/default.nix +++ b/tvix/eval/default.nix @@ -1,9 +1,16 @@ # TODO: find a way to build the benchmarks via crate2nix -{ depot, pkgs, ... }: +{ depot, pkgs, lib, ... }: -depot.tvix.crates.workspaceMembers.tvix-eval.build.override { +(depot.tvix.crates.workspaceMembers.tvix-eval.build.override { runTests = true; # Make C++ Nix available, to compare eval results against. testInputs = [ pkgs.nix ]; -} +}).overrideAttrs (old: rec { + meta.ci.targets = lib.filter (x: lib.hasPrefix "with-features" x || x == "no-features") (lib.attrNames passthru); + passthru = depot.tvix.utils.mkFeaturePowerset { + inherit (old) crateName; + features = [ "nix_tests" ]; + override.testInputs = [ pkgs.nix ]; + }; +}) diff --git a/tvix/eval/docs/abandoned/thread-local-vm.md b/tvix/eval/docs/abandoned/thread-local-vm.md deleted file mode 100644 index c6a2d5e07e..0000000000 --- a/tvix/eval/docs/abandoned/thread-local-vm.md +++ /dev/null @@ -1,233 +0,0 @@ -# We can't have nice things because IFD - -The thread-local VM work below was ultimately not merged because it -was decided that it would be harmful for `tvix::eval::Value` to -implement `Eq`, `Hash`, or any of the other `std` traits. - -Implementing `std` traits on `Value` was deemed harmful because IFD -can cause arbitrary amounts of compilation to occur, including -network transactions with builders. Obviously it would be -unexpected and error-prone to have a `PartialEq::eq()` which does -something like this. This problem does not manifest within the -"nixpkgs compatibility only" scope, or in any undeprecated language -feature other than IFD. Although IFD is outside the "nixpkgs -compatibility scope", it [has been added to the TVL compatibility -scope](https://cl.tvl.fyi/c/depot/+/7193/comment/3418997b_0dbd0b65/). - -This was the sole reason for not merging. - -The explanation below may be useful in case future circumstances -affect the relevance of the reasoning above. - -The implementation can be found in these CLs: - -- [refactor(tvix/eval): remove lifetime parameter from VM<'o>](https://cl.tvl.fyi/c/depot/+/7194) -- [feat(tvix/eval): [FOUNDLING] thread-local VM](https://cl.tvl.fyi/c/depot/+/7195) -- [feat(tvix/eval): [FOUNDLING] VM::vm_xxx convenience methods](https://cl.tvl.fyi/c/depot/+/7196) -- [refactor(tvix/eval): [FOUNDLING]: drop explicit `&mut vm` parameter](https://cl.tvl.fyi/c/depot/+/7197) - -# Thread-local storage for tvix::eval::vm::VM - -## The problem - -`Value::force()` takes a `&mut VM` argument, since forcing a value -requires executing opcodes. This means that `Value::nix_eq()` too -must take a `&mut VM`, since any sensible definition of equality -will have to force thunks. - -Unfortunately Rust's `PartialEq::eq()` function does not accept any -additional arguments like this, so `Value` cannot implement -`PartialEq`. Worse, structs which *contain* `Value`s can't -implement `PartialEq` either. This means `Value`, and anything -containing it, cannot be the key for a `BTreeMap` or `HashMap`. We -can't even insert `Value`s into a `HashSet`! - -There are other situations like this that don't involve `PartialEq`, -but it's the most glaring one. The main problem is that you need a -`VM` in order to force thunks, and thunks can be anywhere in a -`Value`. - -## Solving the problem with thread-locals - -We could avoid threading the `&mut VM` through the entire codebase -by making it a thread-local. - -To do this without a performance hit, we need to use LLVM -thread-locals, which are the same cost as references to `static`s -but load relative to -[`llvm.threadlocal.address`][threadlocal-intrinsic] instead of -relative to the data segment. Unfortunately `#[thread_local]` [is -unstable][thread-local-unstable] and [unsafe in -general][thread-local-unsafe] for most of the cases where we would -want to use it. There is one [exception][tls-const-init], however: -if a `!thread_local()` has a `const` initializer, the compiler will -insert a `#[thread_local]`; this special case is both safe and -stable. - -The difficult decision is what the type of the thread-local should -be. Since you can't get a mutable reference to a `thread_local!()` -it will have to be some interior-mutability-bestowing wrapper around -our current `struct VM`. Here are the choices: - -### `RefCell<VM>` - -This is the obvious first choice, since it lets you borrow a -`RefMut<Target=VM>`. The problem here is that we want to keep the -codebase written such that all the functions in `impl VM` still take -a `&mut self`. This means that there will be an active mutable -borrow for the duration of `VM::call_builtin()`. So if we implement -`PartialEq` by having `eq()` attempt a second mutable borrow from -the thread-local storage, it will fail since there is already an -active borrow. - -The problem here is that you can't "unborrow" a `RefMut` except by -dropping it. There's no way around this. - -#### Problem: Uglification - -The only solution here is to rewrite all the functions in `impl VM` -so they don't take any kind of `self` argument, and then have them -do a short-lived `.borrow_mut()` from the thread-local `RefCell` -*separately, each time* they want to modify one of the fields of -`VM` (currently `frames`, `stack`, `with_stack`, `warnings`). This -means that if you had a code sequence like this: - -``` -impl VM { - fn foo(&mut self, ...) { - ... - self.frame().ip += 1; - self.some_other_method(); - self.frame().ip += 1; -``` - -You would need to add *two separate `borrow_mut()`s*, one for each -of the `self.frame().ip+=1` statements. You can't just do one big -`borrow_mut()` because `some_other_method()` will call -`borrow_mut()` and panic. - -#### Problem: Performance - -The `RefCell<VM>` approach also has a fairly huge performance hit, -because every single modification to any part of `VM` will require a -reference count increment/decrement, and a conditional branch based -on the check (which will never fail) that the `RefCell` isn't -already mutably borrowed. It will also impede a lot of rustc's -optimizations. - -### `Cell<VM>` - -This is a non-starter because it means that in order to mutate any -field of `VM`, you have to move the entire `struct VM` out of the -`Cell`, mutate it, and move it back in. - -### `Cell<Box<VM>>` - -Now we're getting warmer. Here, we can move the `Box<VM>` out of -the cell with a single pointer-sized memory access. - -We don't want to do the "uglification" described in the previous -section. We are very fortunate that, sometime in mid-2019, the Rust -dieties [decreed by fiat][fiat-decree] that `&Cell<T>` and `&mut T` -are bit-for-bit identical, and even gave us mortals safe wrappers -[`from_mut()`][from_mut] and [`get_mut()`][get_mut] around -`mem::transmute()`. - -So now, when a `VM` method (which takes `&mut self`) calls out to -some external code (like a builtin), instead of passing the `&mut -self` to the external code it can call `Cell::from_mut(&mut self)`, -and then `Cell::swap()` that into the thread-local storage cell for -the duration of the external code. After the external code returns, -it can `Cell::swap()` it back. This whole dance gets wrapped in a -lexical block, and the borrow checker sees that the `&Cell<Box<VM>>` -returned by `Cell::from_mut()` lives only until the end of the -lexical block, *so we get the `&mut self` back after the close-brace -for that block*. NLL FTW. This sounds like a lot of work, but it -should compile down to two pointer-sized loads and two pointer-sized -stores, and it is incurred basically only for `OpBuiltin`. - -This all works, with only two issues: - -1. `vm.rs` needs to be very careful to do the thread-local cell swap - dance before calling anything that might call `PartialEq::eq()` - (or any other method that expects to be able to pull the `VM` out - of thread-local storage). There is no compile-time check that we - did the dance in all the right places. If we forget to do the - dance somewhere we'll get a runtime panic from `Option::expect()` - (see next section). - -2. Since we need to call `Cell::from_mut()` on a `Box<VM>` rather - than a bare `VM`, we still need to rewrite all of `vm.rs` so that - every function takes a `&mut Box<VM>` instead of a `&mut self`. - This creates a huge amount of "noise" in the code. - -Fortunately, it turns out that nearly all the "noise" that arises -from the second point can be eliminated by taking advantage of -[deref coercions][deref-coercions]! This was the last "shoe to -drop". - -There is still the issue of having to be careful about calls from -`vm.rs` to things outside that file, but it's manageable. - -### `Cell<Option<Box<VM>>>` - -In order to get the "safe and stable `#[thread_local]`" -[exception][tls-const-init] we need a `const` initializer, which -means we need to be able to put something into the `Cell` that isn't -a `VM`. So the type needs to be `Cell<Option<Box<VM>>>`. - -Recall that you can't turn an `Option<&T>` into an `&Option<T>`. -The latter type has the "is this a `Some` or `None`" bit immediately -adjacent to the bits representing `T`. So if I hand you a `t:&T` -and you wrap it as `Some(t)`, those bits aren't adjacent in memory. -This means that all the VM methods need to operate on an -`Option<Box<VM>>` -- we can't just wrap a `Some()` around `&mut -self` "at the last minute" before inserting it into the thread-local -storage cell. Fortunately deref coercions save the day here too -- -the coercion is inferred through both layers (`Box` and `Option`) of -wrapper, so there is no additional noise in the code. - -Note that Rust is clever and can find some sequence of bits that -aren't a valid `T`, so `sizeof(Option<T>)==sizeof(T)`. And in fact, -`Box<T>` is one of these cases (and this is guaranteed). So the -`Option` has no overhead. - -# Closing thoughts, language-level support - -This would have been easier with language-level support. - -## What wouldn't help - -Although it [it was decreed][fiat-decree] that `Cell<T>` and `&mut -T` are interchangeable, a `LocalKey<Cell<T>>` isn't quite the same -thing as a `Cell<T>`, so it wouldn't be safe for the standard -library to contain something like this: - -``` -impl<T> LocalKey<Cell<T>> { - fn get_mut(&self) -> &mut T { - unsafe { - // ... mem::transmute() voodoo goes here ... -``` - -The problem here is that you can call `LocalKey<Cell<T>>::get_mut()` twice and -end up with two `&mut T`s that point to the same thing (mutable aliasing) which -results in undefined behavior. - -## What would help - -The ideal solution is for Rust to let you call arbitrary methods -`T::foo(&mut self...)` on a `LocalKey<Cell<T>>`. This way you can -have one (and only one) `&mut T` at any syntactical point in your -program -- the `&mut self`. - - -[tls-const-init]: https://github.com/rust-lang/rust/pull/90774 -[thread-local-unstable]: https://github.com/rust-lang/rust/issues/29594 -[thread-local-unsafe-generally]: https://github.com/rust-lang/rust/issues/54366 -[fiat-decree]: https://github.com/rust-lang/rust/issues/43038 -[from_mut]: https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.from_mut -[get_mut]: https://doc.rust-lang.org/stable/std/cell/struct.Cell.html#method.get_mut -[thread-local-unsafe]: [https://github.com/rust-lang/rust/issues/54366] -[deref-coercions]: https://doc.rust-lang.org/book/ch15-02-deref.html#implicit-deref-coercions-with-functions-and-methods -[threadlocal-intrinsic]: https://llvm.org/docs/LangRef.html#llvm-threadlocal-address-intrinsic diff --git a/tvix/eval/docs/build-references.md b/tvix/eval/docs/build-references.md deleted file mode 100644 index badcea1155..0000000000 --- a/tvix/eval/docs/build-references.md +++ /dev/null @@ -1,254 +0,0 @@ -Build references in derivations -=============================== - -This document describes how build references are calculated in Tvix. Build -references are used to determine which store paths should be available to a -builder during the execution of a build (i.e. the full build closure of a -derivation). - -## String contexts in C++ Nix - -In C++ Nix, each string value in the evaluator carries an optional so-called -"string context". - -These contexts are themselves a list of strings that take one of the following -formats: - -1. `!<output_name>!<drv_path>` - - This format describes a build reference to a specific output of a derivation. - -2. `=<drv_path>` - - This format is used for a special case where a derivation attribute directly - refers to a derivation path (e.g. by accessing `.drvPath` on a derivation). - - Note: In C++ Nix this case is quite special and actually requires a - store-database query during evaluation. - -3. `<path>` - a non-descript store path input, usually a plain source file (e.g. - from something like `src = ./.` or `src = ./foo.txt`). - - In the case of `unsafeDiscardOutputDependency` this is used to pass a raw - derivation file, but *not* pull in its outputs. - -Lets introduce names for these (in the same order) to make them easier to -reference below: - -```rust -enum BuildReference { - /// !<output_name>!<drv_path> - SingleOutput(OutputName, DrvPath), - - /// =<drv_path> - DrvClosure(DrvPath), - - /// <path> - Path(StorePath), -} -``` - -String contexts are, broadly speaking, created whenever a string is the result -of a computation (e.g. string interpolation) that used a *computed* path or -derivation in any way. - -Note: This explicitly does *not* include simply writing a literal string -containing a store path (whether valid or not). That is only permitted through -the `storePath` builtin. - -## Derivation inputs - -Based on the data above, the fields `inputDrvs` and `inputSrcs` of derivations -are populated in `builtins.derivationStrict` (the function which -`builtins.derivation`, which isn't actually a builtin, wraps). - -`inputDrvs` is represented by a map of derivation paths to the set of their -outputs that were referenced by the context. - -TODO: What happens if the set is empty? Somebody claimed this means all outputs. - -`inputSrcs` is represented by a set of paths. - -These are populated by the above references as follows: - -* `SingleOutput` entries are merged into `inputDrvs` -* `Path` entries are inserted into `inputSrcs` -* `DrvClosure` leads to a special store computation (`computeFSClosure`), which - finds all paths referenced by the derivation and then inserts all of them into - the fields as above (derivations with _all_ their outputs) - -This is then serialised in the derivation and passed down the pipe. - -## Builtins interfacing with contexts - -C++ Nix has several builtins that interface directly with string contexts: - -* `unsafeDiscardStringContext`: throws away a string's string context (if - present) -* `hasContext`: returns `true`/`false` depending on whether the string has - context -* `unsafeDiscardOutputDependency`: drops dependencies on the *outputs* of a - `.drv` in the context, passing only the literal `.drv` itself - - Note: This is only used for special test-cases in nixpkgs, and deprecated Nix - commands like `nix-push`. -* `getContext`: returns the string context in serialised form as a Nix attribute - set -* `appendContext`: adds a given string context to the string in the same format - as returned by `getContext` - -Most of the string manipulation operations will propagate the context to the -result based on their parameters' contexts. - -## Placeholders - -C++ Nix has `builtins.placeholder`, which given the name of an output (e.g. -`out`) creates a hashed string representation of that output name. If that -string is used anywhere in input attributes, the builder will replace it with -the actual name of the corresponding output of the current derivation. - -C++ Nix does not use contexts for this, it blindly creates a rewrite map of -these placeholder strings to the names of all outputs, and runs the output -replacement logic on all environment variables it creates, attribute files it -passes etc. - -## Tvix & string contexts - -In the past, Tvix did not track string contexts in its evaluator at all, see -the historical section for more information about that. - -Tvix tracks string contexts in every `NixString` structure via a -`HashSet<BuildReference>` and offers an API to combine the references while -keeping the exact internal structure of that data private. - -## Historical attempt: Persistent reference tracking - -We were investigating implementing a system which allows us to drop string -contexts in favour of reference scanning derivation attributes. - -This means that instead of maintaining and passing around a string context data -structure in eval, we maintain a data structure of *known paths* from the same -evaluation elsewhere in Tvix, and scan each derivation attribute against this -set of known paths when instantiating derivations. - -We believed we could take the stance that the system of string contexts as -implemented in C++ Nix is likely an implementation detail that should not be -leaking to the language surface as it does now. - -### Tracking "known paths" - -Every time a Tvix evaluation does something that causes a store interaction, a -"known path" is created. On the language surface, this is the result of one of: - -1. Path literals (e.g. `src = ./.`). -2. Calls to `builtins.derivationStrict` yielding a derivation and its output - paths. -3. Calls to `builtins.path`. - -Whenever one of these occurs, some metadata that persists for the duration of -one evaluation should be created in Nix. This metadata needs to be available in -`builtins.derivationStrict`, and should be able to respond to these queries: - -1. What is the set of all known paths? (used for e.g. instantiating an - Aho-Corasick type string searcher) -2. What is the _type_ of a path? (derivation path, derivation output, source - file) -3. What are the outputs of a derivation? -4. What is the derivation of an output? - -These queries will need to be asked of the metadata when populating the -derivation fields. - -Note: Depending on how we implement `builtins.placeholder`, it might be useful -to track created placeholders in this metadata, too. - -### Context builtins - -Context-reading builtins can be implemented in Tvix by adding `hasContext` and -`getContext` with the appropriate reference-scanning logic. However, we should -evaluate how these are used in nixpkgs and whether their uses can be removed. - -Context-mutating builtins can be implemented by tracking their effects in the -value representation of Tvix, however we should consider not doing this at all. - -`unsafeDiscardOutputDependency` should probably never be used and we should warn -or error on it. - -`unsafeDiscardStringContext` is often used as a workaround for avoiding IFD in -inconvenient places (e.g. in the TVL depot pipeline generation). This is -unnecessary in Tvix. We should evaluate which other uses exist, and act on them -appropriately. - -The initial danger with diverging here is that we might cause derivation hash -discrepancies between Tvix and C++ Nix, which can make initial comparisons of -derivations generated by the two systems difficult. If this occurs we need to -discuss how to approach it, but initially we will implement the mutating -builtins as no-ops. - -### Why this did not work for us? - -Nix has a feature to perform environmental checks of your derivation, e.g. -"these derivation outputs should not be referenced in this derivation", this was -introduced in Nix 2.2 by -https://github.com/NixOS/nix/commit/3cd15c5b1f5a8e6de87d5b7e8cc2f1326b420c88. - -Unfortunately, this feature introduced a very unfortunate and critical bug: all -usage of this feature with contextful strings will actually force the -derivation to depend at least at build time on those specific paths, see -https://github.com/NixOS/nix/issues/4629. - -For example, if you wanted to `disallowedReferences` to a package and you used a -derivation as a path, you would actually register that derivation as a input -derivation of that derivation. - -This bug is still unfixed in Nix and it seems that fixing it would require -introducing different ways to evaluate Nix derivations to preserve the -output path calculation for Nix expressions so far. - -All of this would be fine if the bug behavior was uniform in the sense that no -one tried to force-workaround it. Since Nixpkgs 23.05, due to -https://github.com/NixOS/nixpkgs/pull/211783 this is not true anymore. - -If you let nixpkgs be the disjoint union of bootstrapping derivations $A$ and -`stdenv.mkDerivation`-built derivations $B$. - -$A$ suffers from the bug and $B$ doesn't by the forced usage of -`unsafeDiscardStringContext` on those special checking fields. - -This means that to build hash-compatible $A$ **and** $B$, we need to -distinguish $A$ and $B$. A lot of hacks could be imagined to support this -problem. - -Let's assume we have a solution to that problem, it means that we are able to -detect implicitly when a set of specific fields are -`unsafeDiscardStringContext`-ed. - -Thus, we could use that same trick to implement `unsafeDiscardStringContext` -entirely for all fields actually. - -Now, to implement `unsafeDiscardStringContext` in the persistent reference -tracking model, you will need to store a disallowed list of strings that should -not trigger a reference when we are scanning a derivation parameters. - -But assume you have something like: - -```nix -derivation { - buildInputs = [ - stdenv.cc - ]; - - disallowedReferences = [ stdenv.cc ]; -} -``` - -If you unregister naively the `stdenv.cc` reference, it will silence the fact -that it is part of the `buildInputs`, so you will observe that Nix will fail -the derivation during environmental check, but Tvix would silently force remove -that reference. - -Until proven otherwise, it seems highly difficult to have the fine-grained -information to prevent reference tracking of those specific fields. It is not a -failure of the persistent reference tracking, it is an unresolved critical bug -of Nix that only nixpkgs really workarounded for `stdenv.mkDerivation`-based -derivations. diff --git a/tvix/eval/docs/builtins.md b/tvix/eval/docs/builtins.md deleted file mode 100644 index dba4c48c65..0000000000 --- a/tvix/eval/docs/builtins.md +++ /dev/null @@ -1,138 +0,0 @@ -Nix builtins -============ - -Nix has a lot of built-in functions, some of which are accessible in -the global scope, and some of which are only accessible through the -global `builtins` attribute set. - -This document is an attempt to track all of these builtins, but -without documenting their functionality. - -See also https://nixos.org/manual/nix/stable/expressions/builtins.html - -The `impl` column indicates implementation status in tvix: -- implemented: "" (empty cell) -- not yet implemented, but not blocked: `todo` -- not yet implemented, but blocked by other prerequisites: - - `store`: awaiting eval<->store api(s) - - `context`: awaiting support for string contexts - -| name | global | arity | pure | impl | -|-------------------------------|--------|-------|-------|---------| -| abort | true | 1 | | | -| add | false | 2 | true | | -| addErrorContext | false | ? | | context | -| all | false | 2 | true | | -| any | false | 2 | true | | -| appendContext | false | ? | | | -| attrNames | false | 1 | true | | -| attrValues | false | | true | | -| baseNameOf | true | | | | -| bitAnd | false | | | | -| bitOr | false | | | | -| bitXor | false | | | | -| builtins | true | | | | -| catAttrs | false | | | | -| compareVersions | false | | | | -| concatLists | false | | | | -| concatMap | false | | | | -| concatStringsSep | false | | | | -| currentSystem | false | | | | -| currentTime | false | | false | | -| deepSeq | false | | | | -| derivation | true | | | store | -| derivationStrict | true | | | store | -| dirOf | true | | | | -| div | false | | | | -| elem | false | | | | -| elemAt | false | | | | -| false | true | | | | -| fetchGit | true | | | store | -| fetchMercurial | true | | | store | -| fetchTarball | true | | | store | -| fetchurl | false | | | store | -| filter | false | | | | -| filterSource | false | | | store | -| findFile | false | | false | todo | -| foldl' | false | | | | -| fromJSON | false | | | | -| fromTOML | true | | | | -| functionArgs | false | | | | -| genList | false | | | | -| genericClosure | false | | | todo | -| getAttr | false | | | | -| getContext | false | | | | -| getEnv | false | | false | | -| hasAttr | false | | | | -| hasContext | false | | | | -| hashFile | false | | false | | -| hashString | false | | | | -| head | false | | | | -| import | true | | | | -| intersectAttrs | false | | | | -| isAttrs | false | | | | -| isBool | false | | | | -| isFloat | false | | | | -| isFunction | false | | | | -| isInt | false | | | | -| isList | false | | | | -| isNull | true | | | | -| isPath | false | | | | -| isString | false | | | | -| langVersion | false | | | | -| length | false | | | | -| lessThan | false | | | | -| listToAttrs | false | | | | -| map | true | | | | -| mapAttrs | false | | | | -| match | false | | | | -| mul | false | | | | -| nixPath | false | | | todo | -| nixVersion | false | | | todo | -| null | true | | | | -| parseDrvName | false | | | | -| partition | false | | | | -| path | false | | sometimes | store | -| pathExists | false | | false | | -| placeholder | true | | | context | -| readDir | false | | false | | -| readFile | false | | false | | -| removeAttrs | true | | | | -| replaceStrings | false | | | | -| scopedImport | true | | | | -| seq | false | | | | -| sort | false | | | | -| split | false | | | | -| splitVersion | false | | | | -| storeDir | false | | | store | -| storePath | false | | | store | -| stringLength | false | | | | -| sub | false | | | | -| substring | false | | | | -| tail | false | | | | -| throw | true | | | | -| toFile | false | | | store | -| toJSON | false | | | | -| toPath | false | | | | -| toString | true | | | | -| toXML | true | | | | -| trace | false | | | | -| true | true | | | | -| tryEval | false | | | | -| typeOf | false | | | | -| unsafeDiscardOutputDependency | false | | | | -| unsafeDiscardStringContext | false | | | | -| unsafeGetAttrPos | false | | | todo | -| valueSize | false | | | todo | - -## Added after C++ Nix 2.3 (without Flakes enabled) - -| name | global | arity | pure | impl | -|---------------|--------|-------|-------|-------| -| break | false | 1 | | todo | -| ceil | false | 1 | true | | -| fetchTree | true | 1 | | todo | -| floor | false | 1 | true | | -| groupBy | false | 2 | true | | -| traceVerbose | false | 2 | | todo | -| zipAttrsWith | false | 2 | true | todo | diff --git a/tvix/eval/docs/catchable-errors.md b/tvix/eval/docs/catchable-errors.md deleted file mode 100644 index ce320a9217..0000000000 --- a/tvix/eval/docs/catchable-errors.md +++ /dev/null @@ -1,131 +0,0 @@ -# (Possible) Implementation(s) of Catchable Errors for `builtins.tryEval` - -## Terminology - -Talking about “catchable errors” in Nix in general is a bit precarious since -there is no properly established terminology. Also, the existing terms are less -than apt. The reason for this lies in the fact that catchable errors (or -whatever you want to call them) don't properly _exist_ in the language: While -Nix's `builtins.tryEval` is (originally) based on the C++ exception system, -it specifically lacks the ability of such systems to have an exception _value_ -whilst handling it. Consequently, these errors don't have an obvious name -as they never appear _in_ the Nix language. They just have to be named in the -respective Nix implementation: - -- In C++ Nix the only term for such errors is `AssertionError` which is the - name of the (C++) exception used in the implementation internally. This - term isn't great, though, as `AssertionError`s can not only be generated - using `assert`, but also using `throw` and failed `NIX_PATH` resolutions. - Were this terminology to be used in documentation addressing Nix language - users, it would probably only serve confusion. - -- Tvix currently (as of r/7573) uses the term catchable errors. This term - relates to nothing in the language as such: Errors are not caught, we rather - try to evaluate an expression. Catching also sort of implies that a value - representation of the error is attainable (like in an exception system) which - is untrue. - -In light of this I (sterni) would like to suggest “tryable errors” as an -alternative term going forward which isn't inaccurate and relates to terms -already established by language internal naming. - -However, this document will continue using the term catchable error until the -naming is adjusted in Tvix itself. - -## Implementation - -Below we discuss different implementation approaches in Tvix in order to arrive -at a proposal for the new one. The historical discussion is intended as a basis -for discussing the proposal: Are we committing to an old or current mistake? Are -we solving all problems that cropped up or were solved at any given point in -time? - -### Original - -The original implementation of `tryEval` in cl/6924 was quite straightforward: -It would simply interrupt the propagation of a potential catchable error to the -top level (which usually happened using the `?` operator) in the builtin and -construct the appropriate representation of an unsuccessful evaluation if the -error was deemed catchable. It had, however, multiple problems: - -- The VM was originally written without `tryEval` in mind, i.e. it largely - assumed that an error would always cause execution to be terminated. This - problem was later solved (cl/6940). -- Thunks could not be `tryEval`-ed multiple times (b/281). This was another - consequence of VM architecture at the time: Thunks would be blackholed - before evaluation was started and the error could occur. Due to the - interaction of the generator-based VM code and `Value::force` the part - of the code altering the thunk state would never be informed about the - evaluation result in case of a failure, so the thunk would remain - blackholed leading to a crash if the same thunk was `tryEval`-ed or - forced again. To solve this issue, amjoseph completely overhauled - the implementation. - -One key point about this implementation is that it is based on the assumption -that catchable errors can only be generated in thunks, i.e. expressions causing -them are never evaluated strictly. This can be illustrated using C++ Nix: - -```console -> nix-instantiate --eval -E '[ (assert false; true) (builtins.throw "") <nixpkgs> ]' -[ <CODE> <CODE> <CODE> ] -``` - -If this wasn't the case, the VM could encounter the error in a situation where -the error would not have needed to pass through the `tryEval` builtin, causing -evaluation to abort. - -### Present - -The current system (mostly implemented in cl/9289) uses a very different -approach: Instead of relying on the thunk boundary, catchable errors are no -longer errors, but special values. They are created at the relevant points (e.g. -`builtins.throw`) and propagated whenever they are encountered by VM ops or -builtins. Finally, they either encounter `builtins.tryEval` (and are converted to -an ordinary value again) or the top level where they become a normal error again. - -The problems with this mostly stem from the confusion between values and errors -that it necessitates: - -- In most circumstances, catchable errors end up being errors again, as `tryEval` - is not used a lot. So `throw`s usually end up causing evaluation to abort. - Consequently, not only `Value::Catchable` is necessary, but also a corresponding - error variant that is _only_ created if a catchable value remains at the end of - evaluation. A requirement that was missed until cl/10991 (!) which illustrate - how strange that architecture is. A consequence of this is that catchable - errors have no location information at all. -- `Value::Catchable` is similar to other internal values in Tvix, but is much - more problematic. Aside from thunks, internal values only exist for a brief - amount of time on the stack and it is very clear what parts of the VM or - builtins need to handle them. This means that the rest of the implementation - need to consider them, keeping the complexity caused by the internal value - low. `Value::Catchable`, on the other hand, may exist anywhere and be passed - to any VM op or builtin, so it needs to be correctly propagated _everywhere_. - This causes a lot of noise in the code as well as a big potential for bugs. - Essentially, catchable errors require as much attention by the Tvix developer - as laziness. This doesn't really correlate to the importance of the two - features to the Nix language. - -### Future? - -The core assumption of the original solution does offer a path forward: After -cl/9289 we should be in a better position to introspect an error occurring from -within the VM code, but we need a better way of storing such an error to prevent -another b/281. If catchable errors can only be generated in thunks, we can just -use the thunk representation for this. This would mean that `Thunk::force_` -would need to check if evaluation was successful and (in case of failure) -change the thunk representation - -- either to the original `ThunkRepr::Suspended` which would be simple, but of - course mean duplicated evaluation work in some expressions. In fact, this - would probably leave a lot of easy performance on the table for use cases we - would like to support, e.g. tree walkers for nixpkgs. -- or to a new `ThunkRepr` variant that stores the kind of the error and all - necessary location info so stack traces can work properly. This of course - reintroduces some of the difficulty of having two kinds of errors, but it is - hopefully less problematic, as the thunk boundary (i.e. `Thunk::force`) is - where errors would usually occur. - -Besides the question whether this proposal can actually be implemented, another -consideration is whether the underlying assumption will hold in the future, i.e. -can we implement optimizations for thunk elimination in a way that thunks that -generate catchable errors are never eliminated? diff --git a/tvix/eval/docs/known-optimisation-potential.md b/tvix/eval/docs/known-optimisation-potential.md deleted file mode 100644 index 0ab185fe1b..0000000000 --- a/tvix/eval/docs/known-optimisation-potential.md +++ /dev/null @@ -1,162 +0,0 @@ -Known Optimisation Potential -============================ - -There are several areas of the Tvix evaluator code base where -potentially large performance gains can be achieved through -optimisations that we are already aware of. - -The shape of most optimisations is that of moving more work into the -compiler to simplify the runtime execution of Nix code. This leads, in -some cases, to drastically higher complexity in both the compiler -itself and in invariants that need to be guaranteed between the -runtime and the compiler. - -For this reason, and because we lack the infrastructure to adequately -track their impact (WIP), we have not yet implemented these -optimisations, but note the most important ones here. - -* Use "open upvalues" [hard] - - Right now, Tvix will immediately close over all upvalues that are - created and clone them into the `Closure::upvalues` array. - - Instead of doing this, we can statically determine most locals that - are closed over *and escape their scope* (similar to how the - `compiler::scope::Scope` struct currently tracks whether locals are - used at all). - - If we implement the machinery to track this, we can implement some - upvalues at runtime by simply sticking stack indices in the upvalue - array and only copy the values where we know that they escape. - -* Avoid `with` value duplication [easy] - - If a `with` makes use of a local identifier in a scope that can not - close before the with (e.g. not across `LambdaCtx` boundaries), we - can avoid the allocation of the phantom value and duplication of the - `NixAttrs` value on the stack. In this case we simply push the stack - index of the known local. - -* Multiple attribute selection [medium] - - An instruction could be introduced that avoids repeatedly pushing an - attribute set to/from the stack if multiple keys are being selected - from it. This occurs, for example, when inheriting from an attribute - set or when binding function formals. - -* Split closure/function representation [easy] - - Functions have fewer fields that need to be populated at runtime and - can directly use the `value::function::Lambda` representation where - possible. - -* Apply `compiler::optimise_select` to other set operations [medium] - - In addition to selects, statically known attribute resolution could - also be used for things like `?` or `with`. The latter might be a - little more complicated but is worth investigating. - -* Inline fully applied builtins with equivalent operators [medium] - - Some `builtins` have equivalent operators, e.g. `builtins.sub` - corresponds to the `-` operator, `builtins.hasAttr` to the `?` - operator etc. These operators additionally compile to a primitive - VM opcode, so they should be just as cheap (if not cheaper) as - a builtin application. - - In case the compiler encounters a fully applied builtin (i.e. - no currying is occurring) and the `builtins` global is unshadowed, - it could compile the equivalent operator bytecode instead: For - example, `builtins.sub 20 22` would be compiled as `20 - 22`. - This would ensure that equivalent `builtins` can also benefit - from special optimisations we may implement for certain operators - (in the absence of currying). E.g. we could optimise access - to the `builtins` attribute set which a call to - `builtins.getAttr "foo" builtins` should also profit from. - -* Avoid nested `VM::run` calls [hard] - - Currently when encountering Nix-native callables (thunks, closures) - the VM's run loop will nest and return the value of the nested call - frame one level up. This makes the Rust call stack almost mirror the - Nix call stack, which is usually undesirable. - - It is possible to detect situations where this is avoidable and - instead set up the VM in such a way that it continues and produces - the desired result in the same run loop, but this is kind of tricky - to get right - especially while other parts are still in flux. - - For details consult the commit with Gerrit change ID - `I96828ab6a628136e0bac1bf03555faa4e6b74ece`, in which the initial - attempt at doing this was reverted. - -* Avoid thunks if only identifier closing is required [medium] - - Some constructs, like `with`, mostly do not change runtime behaviour - if thunked. However, they are wrapped in thunks to ensure that - deferred identifiers are resolved correctly. - - This can be avoided, as we statically analyse the scope and should - be able to tell whether any such logic was required. - -* Intern literals [easy] - - Currently, the compiler emits a separate entry in the constant - table for each literal. So the program `1 + 1 + 1` will have - three entries in its `Chunk::constants` instead of only one. - -* Do some list and attribute set operations in place [hard] - - Algorithms that can not do a lot of work inside `builtins` like `map`, - `filter` or `foldl'` usually perform terribly if they use data structures like - lists and attribute sets. - - `builtins` can do work in place on a copy of a `Value`, but naïvely expressed - recursive algorithms will usually use `//` and `++` to do a single change to a - `Value` at a time, requiring a full copy of the data structure each time. - It would be a big improvement if we could do some of these operations in place - without requiring a new copy. - - There are probably two approaches: We could determine statically if a value is - reachable from elsewhere and emit a special in place instruction if not. An - easier alternative is probably to rely on reference counting at runtime: If no - other reference to a value exists, we can extend the list or update the - attribute set in place. - - An **alternative** to this is using [persistent data - structures](https://en.wikipedia.org/wiki/Persistent_data_structure) or at the - very least [immutable data structures](https://docs.rs/im/latest/im/) that can - be copied more efficiently than the stock structures we are using at the - moment. - -* Skip finalising unfinalised thunks or non-thunks instead of crashing [easy] - - Currently `OpFinalise` crashes the VM if it is called on values that don't - need to be finalised. This helps catching miscompilations where `OpFinalise` - operates on the wrong `StackIdx`. In the case of function argument patterns, - however, this means extra VM stack and instruction overhead for dynamically - determining if finalisation is necessary or not. This wouldn't be necessary - if `OpFinalise` would just noop on any values that don't need to be finalised - (anymore). - -* Phantom binding for from expression of inherits [easy] - - The from expression of an inherit is reevaluated for each inherit. This can - be demonstrated using the following Nix expression which, counter-intuitively, - will print “plonk” twice. - - ```nix - let - inherit (builtins.trace "plonk" { a = null; b = null; }) a b; - in - builtins.seq a (builtins.seq b null) - ``` - - In most Nix code, the from expression is just an identifier, so it is not - terribly inefficient, but in some cases a more expensive expression may - be used. We should create a phantom binding for the from expression that - is reused in the inherits, so only a single thunk is created for the from - expression. - - Since we discovered this, C++ Nix has implemented a similar optimization: - <https://github.com/NixOS/nix/pull/9847>. diff --git a/tvix/eval/docs/language-issues.md b/tvix/eval/docs/language-issues.md deleted file mode 100644 index 152e6594a1..0000000000 --- a/tvix/eval/docs/language-issues.md +++ /dev/null @@ -1,46 +0,0 @@ -# Nix language issues - -In the absence of a language standard, what Nix (the language) is, is prescribed -by the behavior of the C++ Nix implementation. Still, there are reasons not to -accept some behavior: - -* Tvix aims for nixpkgs compatibility only. This means we can ignore behavior in - edge cases nixpkgs doesn't trigger as well as obscure features it doesn't use - (e.g. `__overrides`). -* Some behavior of the Nix evaluator seems to be unintentional or an - implementation detail leaking out into language behavior. - -Especially in the latter case, it makes sense to raise the respective issue and -maybe to get rid of the behavior in all implementations for good. Below is an -(incomplete) list of such issues: - -* [Behaviour of nested attribute sets depends on definition order][i7111] -* [Partially constructed attribute sets are observable during dynamic attr names construction][i7012] -* [Nix parsers merges multiple attribute set literals for the same key incorrectly depending on definition order][i7115] - -On the other hand, there is behavior that seems to violate one's expectation -about the language at first, but has good enough reasons from an implementor's -perspective to keep them: - -* Dynamic keys are forbidden in `let` and `inherit`. This makes sure that we - only need to do runtime identifier lookups for `with`. More dynamic (i.e. - runtime) lookups would make the scoping system even more complicated as well - as hurt performance. -* Dynamic attributes of `rec` sets are not added to its scope. This makes sense - for the same reason. -* Dynamic and nested attributes in attribute sets don't get merged. This is a - tricky one, but avoids doing runtime (recursive) merges of attribute sets. - Instead all necessary merging can be inferred statically, i.e. the C++ Nix - implementation already merges at parse time, making nested attribute keys - syntactic sugar effectively. - -Other behavior is just odd, surprising or underdocumented: - -* `builtins.foldl'` doesn't force the initial accumulator (but all other - intermediate accumulator values), differing from e.g. Haskell, see - the [relevant PR discussion][p7158]. - -[i7111]: https://github.com/NixOS/nix/issues/7111 -[i7012]: https://github.com/NixOS/nix/issues/7012 -[i7115]: https://github.com/NixOS/nix/issues/7115 -[p7158]: https://github.com/NixOS/nix/pull/7158 diff --git a/tvix/eval/docs/opcodes-attrsets.md b/tvix/eval/docs/opcodes-attrsets.md deleted file mode 100644 index 7026f3319d..0000000000 --- a/tvix/eval/docs/opcodes-attrsets.md +++ /dev/null @@ -1,122 +0,0 @@ -# attrset-opcodes - -The problem with attrset literals is twofold: - -1. The keys of attribute sets may be dynamically evaluated. - - Access: - - ```nix - let - k = "foo"; - attrs = { /* etc. */ }; - in attrs."${k}" - ``` - - Literal: - ```nix - let - k = "foo"; - in { - "${k}" = 42; - } - ``` - - The problem with this is that the attribute set key is not known at - compile time, and needs to be dynamically evaluated by the VM as an - expression. - - For the most part this should be pretty simple, assuming a - theoretical instruction set: - - ``` - 0000 OP_CONSTANT(0) # key "foo" - 0001 OP_CONSTANT(1) # value 42 - 0002 OP_ATTR_SET(1) # construct attrset from 2 stack values - ``` - - The operation pushing the key needs to be replaced with one that - leaves a single value (the key) on the stack, i.e. the code for the - expression, e.g.: - - ``` - 0000..000n <operations leaving a string value on the stack> - 000n+1 OP_CONSTANT(1) # value 42 - 000n+2 OP_ATTR_SET(1) # construct attrset from 2 stack values - ``` - - This is fairly easy to do by simply recursing in the compiler when - the key expression is encountered. - -2. The keys of attribute sets may be nested. - - This is the non-trivial part of dealing with attribute set - literals. Specifically, the nesting can be arbitrarily deep and the - AST does not guarantee that related set keys are located - adjacently. - - Furthermore, this frequently occurs in practice in Nix. We need a - bytecode representation that makes it possible to construct nested - attribute sets at runtime. - - Proposal: AttrPath values - - If we can leave a value representing an attribute path on the - stack, we can offload the construction of nested attribute sets to - the `OpAttrSet` operation. - - Under the hood, OpAttrSet in practice constructs a `Map<NixString, - Value>` attribute set in most cases. This means it expects to pop - the value of the key of the stack, but is otherwise free to do - whatever it wants with the underlying map. - - In a simple example, we could have code like this: - - ```nix - { - a.b = 15; - } - ``` - - This would be compiled to a new `OpAttrPath` instruction that - constructs and pushes an attribute path from a given number of - fragments (which are popped off the stack). - - For example, - - ``` - 0000 OP_CONSTANT(0) # key "a" - 0001 OP_CONSTANT(1) # key "b" - 0002 OP_ATTR_PATH(2) # construct attrpath from 2 fragments - 0003 OP_CONSTANT(2) # value 42 - 0004 OP_ATTRS(1) # construct attrset from one pair - ``` - - Right before `0004` the stack would be left like this: - - [ AttrPath[a,b], 42 ] - - Inside of the `OP_ATTRS` instruction we could then begin - construction of the map and insert the nested attribute sets as - required, as well as validate that there are no duplicate keys. - -3. Both of these cases can occur simultaneously, but this is not a - problem as the opcodes combine perfectly fine, e.g.: - - ```nix - let - k = "a"; - in { - "${k}".b = 42; - } - ``` - - results in - - ``` - 0000..000n <operations leaving a string value on the stack> - 000n+1 OP_CONSTANT(1) # key "b" - 000n+2 OP_ATTR_PATH(2) # construct attrpath from 2 fragments - 000n+3 OP_CONSTANT(2) # value 42 - 000n+4 OP_ATTR_SET(1) # construct attrset from 2 stack values - ``` diff --git a/tvix/eval/docs/recursive-attrs.md b/tvix/eval/docs/recursive-attrs.md deleted file mode 100644 index c30cfd33e6..0000000000 --- a/tvix/eval/docs/recursive-attrs.md +++ /dev/null @@ -1,68 +0,0 @@ -Recursive attribute sets -======================== - -The construction behaviour of recursive attribute sets is very -specific, and a bit peculiar. - -In essence, there are multiple "phases" of scoping that take place -during attribute set construction: - -1. Every inherited value without an explicit source is inherited only - from the **outer** scope in which the attribute set is enclosed. - -2. A new scope is opened in which all recursive keys are evaluated. - This only considers **statically known keys**, attributes can - **not** recurse into dynamic keys in `self`! - - For example, this code is invalid in C++ Nix: - - ``` - nix-repl> rec { ${"a"+""} = 2; b = a * 10; } - error: undefined variable 'a' at (string):1:26 - ``` - -3. Finally, a third scope is opened in which dynamic keys are - evaluated. - -This behaviour, while possibly a bit strange and unexpected, actually -simplifies the implementation of recursive attribute sets in Tvix as -well. - -Essentially, a recursive attribute set like this: - -```nix -rec { - inherit a; - b = a * 10; - ${"c" + ""} = b * 2; -} -``` - -Can be compiled like the following expression: - -```nix -let - inherit a; -in let - b = a * 10; - in { - inherit a b; - ${"c" + ""} = b * 2; - } -``` - -Completely deferring the resolution of recursive identifiers to the -existing handling of recursive scopes (i.e. deferred access) in let -bindings. - -In practice, we can further specialise this and compile each scope -directly into the form expected by `OpAttrs` (that is, leaving -attribute names on the stack) before each value's position. - -C++ Nix's Implementation ------------------------- - -* [`ExprAttrs`](https://github.com/NixOS/nix/blob/2097c30b08af19a9b42705fbc07463bea60dfb5b/src/libexpr/nixexpr.hh#L241-L268) - (AST representation of attribute sets) -* [`ExprAttrs::eval`](https://github.com/NixOS/nix/blob/075bf6e5565aff9fba0ea02f3333c82adf4dccee/src/libexpr/eval.cc#L1333-L1414) -* [`addAttr`](https://github.com/NixOS/nix/blob/master/src/libexpr/parser.y#L98-L156) (`ExprAttrs` construction in the parser) diff --git a/tvix/eval/docs/vm-loop.md b/tvix/eval/docs/vm-loop.md deleted file mode 100644 index 6266d34709..0000000000 --- a/tvix/eval/docs/vm-loop.md +++ /dev/null @@ -1,315 +0,0 @@ -tvix-eval VM loop -================= - -This document describes the new tvix-eval VM execution loop implemented in the -chain focusing around cl/8104. - -## Background - -The VM loop implemented in Tvix prior to cl/8104 had several functions: - -1. Advancing the instruction pointer for a chunk of Tvix bytecode and - executing instructions in a loop until a result was yielded. - -2. Tracking Nix call frames as functions/thunks were entered/exited. - -3. Catching trampoline requests returned from instructions to force suspended - thunks without increasing stack size *where possible*. - -4. Handling trampolines through an inner trampoline loop, switching between a - code execution mode and execution of subsequent trampolines. - -This implementation of the trampoline logic was added on to the existing VM, -which previously always recursed for thunk forcing. There are some cases (for -example values that need to be forced *inside* of the execution of a builtin) -where trampolines could not previously be used, and the VM recursed anyways. - -As a result of this trampoline logic being added "on top" of the existing VM -loop the code became quite difficult to understand. This led to several bugs, -for example: b/251, b/246, b/245, and b/238. - -These bugs were tricky to deal with, as we had to try and make the VM do -things that are somewhat difficult to fit into its model. We could of course -keep extending the trampoline logic to accommodate all sorts of concepts (such -as finalisers), but that seems like it does not solve the root problem. - -## New VM loop - -In cl/8104, a unified new solution is implemented with which the VM is capable -of evaluating everything without increasing the call stack size. - -This is done by introducing a new frame stack in the VM, on which execution -frames are enqueued that are either: - -1. A bytecode frame, consisting of Tvix bytecode that evaluates compiled Nix - code. -2. A generator frame, consisting of some VM logic implemented in pure Rust - code that can be *suspended* when it hits a point where the VM would - previously need to recurse. - -We do this by making use of the `async` *keyword* in Rust, but notably -*without* introducing asynchronous I/O or concurrency in tvix-eval (the -complexity of which is currently undesirable for us). - -Specifically, when writing a Rust function that uses the `async` keyword, such -as: - -```rust -async fn some_builtin(input: Value) -> Result<Value, ErrorKind> { - let mut out = NixList::new(); - - for element in input.to_list()? { - let result = do_something_that_requires_the_vm(element).await; - out.push(result); - } - - Ok(out) -} -``` - -The compiler actually generates a state-machine under-the-hood which allows -the execution of that function to be *suspended* whenever it hits an `await`. - -We use the [`genawaiter`][] crate that gives us a data structure and simple -interface for getting instances of these state machines that can be stored in -a struct (in our case, a *generator frame*). - -The execution of the VM then becomes the execution of an *outer loop*, which -is responsible for selecting the next generator frame to execute, and two -*inner loops*, which drive the execution of a bytecode frame or generator -frame forward until it either yields a value or asks to be suspended in favour -of another frame. - -All "communication" between frames happens solely through values left on the -stack: Whenever a frame of either type runs to completion, it is expected to -leave a *single* value on the stack. It follows that the whole VM, upon -completion of the last (or initial, depending on your perspective) frame -yields its result as the return value. - -The core of the VM restructuring is cl/8104, unfortunately one of the largest -single commit changes we've had to make yet, as it touches pretty much all -areas of tvix-eval. The introduction of the generators and the -message/response system we built to request something from the VM, suspend a -generator, and wait for the return is in cl/8148. - -The next sections describe in detail how the three different loops work. - -### Outer VM loop - -The outer VM loop is responsible for selecting the next frame to run, and -dispatching it correctly to inner loops, as well as determining when to shut -down the VM and return the final result. - -``` - ╭──────────────────╮ - ╭────────┤ match frame kind ├──────╮ - │ ╰──────────────────╯ │ - │ │ - ┏━━━━━━━━━━━━┷━━━━━┓ ╭───────────┴───────────╮ -───►┃ frame_stack.pop()┃ ▼ ▼ - ┗━━━━━━━━━━━━━━━━━━┛ ┏━━━━━━━━━━━━━━━━┓ ┏━━━━━━━━━━━━━━━━━┓ - ▲ ┃ bytecode frame ┃ ┃ generator frame ┃ - │ ┗━━━━━━━━┯━━━━━━━┛ ┗━━━━━━━━┯━━━━━━━━┛ - │[yes, cont.] │ │ - │ ▼ ▼ - ┏━━━━━━━━┓ │ ╔════════════════╗ ╔═════════════════╗ -◄───┨ return ┃ │ ║ inner bytecode ║ ║ inner generator ║ - ┗━━━━━━━━┛ │ ║ loop ║ ║ loop ║ - ▲ │ ╚════════╤═══════╝ ╚════════╤════════╝ - │ ╭────┴─────╮ │ │ - │ │ has next │ ╰───────────┬───────────╯ - [no] ╰───┤ frame? │ │ - ╰────┬─────╯ ▼ - │ ┏━━━━━━━━━━━━━━━━━┓ - │ ┃ frame completed ┃ - ╰─────────────────────────┨ or suspended ┃ - ┗━━━━━━━━━━━━━━━━━┛ -``` - -Initially, the VM always pops a frame from the frame stack and then inspects -the type of frame it found. As a consequence the next frame to execute is -always the frame at the top of the stack, and setting up a VM initially for -code execution is done by leaving a bytecode frame with the code to execute on -the stack and passing control to the outer loop. - -Control is dispatched to either of the inner loops (depending on the type of -frame) and the cycle continues once they return. - -When an inner loop returns, it has either finished its execution (and left its -result value on the *value stack*), or its frame has requested to be -suspended. - -Frames request suspension by re-enqueueing *themselves* through VM helper -methods, and then leaving the frame they want to run *on top* of themselves in -the frame stack before yielding control back to the outer loop. - -The inner control loops inform the outer loops about whether the frame has -been *completed* or *suspended* by returning a boolean. - -### Inner bytecode loop - -The inner bytecode loop drives the execution of some Tvix bytecode by -continously looking at the next instruction to execute, and dispatching to the -instruction handler. - -``` - ┏━━━━━━━━━━━━━┓ -◄──┨ return true ┃ - ┗━━━━━━━━━━━━━┛ - ▲ - ╔════╧═════╗ - ║ OpReturn ║ - ╚══════════╝ - ▲ - ╰──┬────────────────────────────╮ - │ ▼ - │ ╔═════════════════════╗ - ┏━━━━━━━━┷━━━━━┓ ║ execute instruction ║ -───►┃ inspect next ┃ ╚══════════╤══════════╝ - ┃ instruction ┃ │ - ┗━━━━━━━━━━━━━━┛ │ - ▲ ╭─────┴─────╮ - ╰──────────────────────┤ suspends? │ - [no] ╰─────┬─────╯ - │ - │ - ┏━━━━━━━━━━━━━━┓ │ -◄──┨ return false ┃───────────────────────╯ - ┗━━━━━━━━━━━━━━┛ [yes] -``` - -With this refactoring, the compiler now emits a special `OpReturn` instruction -at the end of bytecode chunks. This is a signal to the runtime that the chunk -has completed and that its current value should be returned, without having to -perform instruction pointer arithmetic. - -When `OpReturn` is encountered, the inner bytecode loop returns control to the -outer loop and informs it (by returning `true`) that the bytecode frame has -completed. - -Any other instruction may also request a suspension of the bytecode frame (for -example, instructions that need to force a value). In this case the inner loop -is responsible for setting up the frame stack correctly, and returning `false` -to inform the outer loop of the suspension - -### Inner generator loop - -The inner generator loop is responsible for driving the execution of a -generator frame by continously calling [`Gen::resume`][] until it requests a -suspension (as a result of which control is returned to the outer loop), or -until the generator is done and yields a value. - -``` - ┏━━━━━━━━━━━━━┓ -◄──┨ return true ┃ ◄───────────────────╮ - ┗━━━━━━━━━━━━━┛ │ - │ - [Done] │ - ╭──────────────────┴─────────╮ - │ inspect generator response │◄────────────╮ - ╰──────────────────┬─────────╯ │ - [yielded] │ ┏━━━━━━━━┷━━━━━━━━┓ - │ ┃ gen.resume(msg) ┃◄── - ▼ ┗━━━━━━━━━━━━━━━━━┛ - ╭────────────╮ ▲ - │ same-frame │ │ - │ request? ├────────────────╯ - ╰─────┬──────╯ [yes] - ┏━━━━━━━━━━━━━━┓ │ -◄──┨ return false ┃ ◄──────────────────╯ - ┗━━━━━━━━━━━━━━┛ [no] -``` - -On each execution of a generator frame, `resume_with` is called with a -[`VMResponse`][] (i.e. a message *from* the VM *to* the generator). For a newly -created generator, the initial message is just `Empty`. - -A generator may then respond by signaling that it has finished execution -(`Done`), in which case the inner generator loop returns control to the outer -loop and informs it that this generator is done (by returning `true`). - -A generator may also respond by signaling that it needs some data from the VM. -This is implemented through a request-response pattern, in which the generator -returns a `Yielded` message containing a [`VMRequest`][]. These requests can be -very simple ("Tell me the current store path") or more complex ("Call this Nix -function with these values"). - -Requests are divided into two classes: Same-frame requests (requests that can be -responded to *without* returning control to the outer loop, i.e. without -executing a *different* frame), and multi-frame generator requests. Based on the -type of request, the inner generator loop will either handle it right away and -send the response in a new `resume_with` call, or return `false` to the outer -generator loop after setting up the frame stack. - -Most of this logic is implemented in cl/8148. - -[`Gen::resume`]: https://docs.rs/genawaiter/0.99.1/genawaiter/rc/struct.Gen.html#method.resume_with -[`VMRequest`]: https://cs.tvl.fyi/depot@2696839770c1ccb62929ff2575a633c07f5c9593/-/blob/tvix/eval/src/vm/generators.rs?L44 -[`VMResponse`]: https://cs.tvl.fyi/depot@2696839770c1ccb62929ff2575a633c07f5c9593/-/blob/tvix/eval/src/vm/generators.rs?L169 - -## Advantages & Disadvantages of the approach - -This approach has several advantages: - -* The execution model is much simpler than before, making it fairly - straightforward to build up a mental model of what the VM does. - -* All "out of band requests" inside the VM are handled through the same - abstraction (generators). - -* Implementation is not difficult, albeit a little verbose in some cases (we - can argue about whether or not to introduce macros for simplifying it). - -* Several parts of the VM execution are now much easier to document, - potentially letting us onboard tvix-eval contributors faster. - -* The linear VM execution itself is much easier to trace now, with for example - the `RuntimeObserver` (and by extension `tvixbolt`) giving much clearer - output now. - -But it also comes with some disadvantages: - -* Even though we "only" use the `async` keyword without a full async-I/O - runtime, we still encounter many of the drawbacks of the fragmented Rust - async ecosystem. - - The biggest issue with this is that parts of the standard library become - unavailable to us, for example the built-in `Vec::sort_by` can no longer be - used for sorting in Nix because our comparators themselves are `async`. - - This led us to having to implement some logic on our own, as the design of - `async` in Rust even makes it difficult to provide usecase-generic - implementations of concepts like sorting. - -* We need to allocate quite a few new structures on the heap in order to drive - generators, as generators involve storing `Future` types (with unknown - sizes) inside of structs. - - In initial testing this seems to make no significant difference in - performance (our performance in an actual nixpkgs-eval is still bottlenecked - by I/O concerns and reference scanning), but is something to keep in mind - later on when we start optimising more after the low-hanging fruits have - been reaped. - -## Alternatives considered - -1. Tacking on more functionality onto the existing VM loop - implementation to accomodate problems as they show up. This is not - preferred as the code is already getting messy. - -2. Making tvix-eval a fully `async` project, pulling in something like Tokio - or `async-std` as a runtime. This is not preferred due to the massively - increased complexity of those solutions, and all the known issues of fully - buying in to the async ecosystem. - - tvix-eval fundamentally should work for use-cases besides building Nix - packages (e.g. for `//tvix/serde`), and its profile should be as slim as - possible. - -3. Convincing the Rust developers that Rust needs a way to guarantee - constant-stack-depth tail calls through something like a `tailcall` - keyword. - -4. ... ? - -[`genawaiter`]: https://docs.rs/genawaiter/ diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs index 04a0b3dd33..8fc80b2f9c 100644 --- a/tvix/eval/src/builtins/mod.rs +++ b/tvix/eval/src/builtins/mod.rs @@ -274,9 +274,10 @@ mod pure_builtins { list: Value, ) -> Result<Value, ErrorKind> { let mut separator = separator.to_contextful_str()?; + let mut context = NixContext::new(); - if let Some(sep_context) = separator.context_mut() { - context = context.join(sep_context); + if let Some(sep_context) = separator.take_context() { + context.extend(sep_context.into_iter()) } let list = list.to_list()?; let mut res = BString::default(); @@ -296,13 +297,8 @@ mod pure_builtins { { Ok(mut s) => { res.push_str(&s); - if let Some(ref mut other_context) = s.context_mut() { - // It is safe to consume the other context here - // because the `list` and `separator` are originally - // moved, here. - // We are not going to use them again - // because the result here is a string. - context = context.join(other_context); + if let Some(other_context) = s.take_context() { + context.extend(other_context.into_iter()); } } Err(c) => return Ok(Value::Catchable(Box::new(c))), @@ -764,9 +760,8 @@ mod pure_builtins { } if let Some(origin_ctx) = origin.context_mut() { - // FUTUREWORK(performance): avoid this clone - // and extend in-place. - *origin_ctx = origin_ctx.clone().join(&mut ctx_elements.into()); + origin_ctx.extend(ctx_elements) + // TODO: didn't we forget cases where origin had no context? } Ok(origin.into()) @@ -1169,8 +1164,8 @@ mod pure_builtins { let mut empty_string_replace = false; let mut context = NixContext::new(); - if let Some(string_context) = string.context_mut() { - context = context.join(string_context); + if let Some(string_context) = string.take_context() { + context.extend(string_context.into_iter()); } // This can't be implemented using Rust's string.replace() as @@ -1200,8 +1195,8 @@ mod pure_builtins { if string[i..i + from.len()] == *from { res.push_str(&to); i += from.len(); - if let Some(to_ctx) = to.context_mut() { - context = context.join(to_ctx); + if let Some(to_ctx) = to.take_context() { + context.extend(to_ctx.into_iter()); } // remember if we applied the empty from->to @@ -1232,8 +1227,8 @@ mod pure_builtins { if from.is_empty() { res.push_str(&to); - if let Some(to_ctx) = to.context_mut() { - context = context.join(to_ctx); + if let Some(to_ctx) = to.take_context() { + context.extend(to_ctx.into_iter()); } break; } @@ -1291,6 +1286,9 @@ mod pure_builtins { }) .collect(); ret.push_back(Value::List(NixList::from(v))); + if pos == text.len() { + break; + } pos = thematch.end(); } @@ -1504,8 +1502,19 @@ mod pure_builtins { } let mut buf: Vec<u8> = vec![]; - to_xml::value_to_xml(&mut buf, &value)?; - Ok(String::from_utf8(buf)?.into()) + let context = to_xml::value_to_xml(&mut buf, &value)?; + + Ok(( + buf, + // FUTUREWORK: We have a distinction between an empty context, and + // no context at all. Fix this. + if !context.is_empty() { + Some(Box::new(context)) + } else { + None + }, + ) + .into()) } #[builtin("placeholder")] @@ -1612,10 +1621,16 @@ pub fn pure_builtins() -> Vec<(&'static str, Value)> { crate::systems::llvm_triple_to_nix_double(CURRENT_PLATFORM).into(), )); - // TODO: implement for nixpkgs compatibility result.push(( "__curPos", - Value::from(CatchableErrorKind::UnimplementedFeature("__curPos".into())), + Value::Thunk(Thunk::new_suspended_native(Box::new(move || { + // TODO: implement for nixpkgs compatibility + Ok(Value::attrs(NixAttrs::from_iter([ + ("line", 42.into()), + ("column", 42.into()), + ("file", Value::String("/deep/thought".into())), + ]))) + }))), )); result @@ -1623,6 +1638,8 @@ pub fn pure_builtins() -> Vec<(&'static str, Value)> { #[builtins] mod placeholder_builtins { + use crate::NixContext; + use super::*; #[builtin("unsafeDiscardStringContext")] @@ -1671,24 +1688,17 @@ mod placeholder_builtins { .to_contextful_str()?; // If there's any context, we will swap any ... by a path one. - if let Some(ctx) = v.context_mut() { - let new_context: tvix_eval::NixContext = ctx - .iter() - .map(|elem| match elem { - // FUTUREWORK(performance): ideally, we should either: - // (a) do interior mutation of the existing context. - // (b) let the structural sharing make those clones cheap. - crate::NixContextElement::Derivation(drv_path) => { - crate::NixContextElement::Plain(drv_path.to_string()) - } - elem => elem.clone(), - }) - .collect::<HashSet<_>>() - .into(); + if let Some(c) = v.take_context() { + let mut context = NixContext::new(); + context.extend(c.into_iter().map(|elem| match elem { + crate::NixContextElement::Derivation(drv_path) => { + crate::NixContextElement::Plain(drv_path.to_string()) + } + elem => elem.clone(), + })); - *ctx = new_context; + return Ok(Value::String(NixString::new_context_from(context, v))); } - Ok(Value::from(v)) } @@ -1709,6 +1719,7 @@ mod placeholder_builtins { _name: Value, _attrset: Value, ) -> Result<Value, ErrorKind> { + // TODO: implement for nixpkgs compatibility generators::emit_warning_kind( &co, WarningKind::NotImplemented("builtins.unsafeGetAttrsPos"), diff --git a/tvix/eval/src/builtins/to_xml.rs b/tvix/eval/src/builtins/to_xml.rs index bb12cebfc9..093e127fe2 100644 --- a/tvix/eval/src/builtins/to_xml.rs +++ b/tvix/eval/src/builtins/to_xml.rs @@ -3,112 +3,98 @@ //! things in nixpkgs rely on. use bstr::ByteSlice; +use std::borrow::Cow; use std::{io::Write, rc::Rc}; -use xml::writer::events::XmlEvent; -use xml::writer::EmitterConfig; -use xml::writer::EventWriter; -use crate::{ErrorKind, Value}; +use crate::{ErrorKind, NixContext, NixContextElement, Value}; /// Recursively serialise a value to XML. The value *must* have been /// deep-forced before being passed to this function. -pub fn value_to_xml<W: Write>(mut writer: W, value: &Value) -> Result<(), ErrorKind> { - let config = EmitterConfig { - perform_indent: true, - pad_self_closing: true, - - // Nix uses single-quotes *only* in the document declaration, - // so we need to write it manually. - write_document_declaration: false, - ..Default::default() - }; - +/// On success, returns the NixContext. +pub fn value_to_xml<W: Write>(mut writer: W, value: &Value) -> Result<NixContext, ErrorKind> { // Write a literal document declaration, using C++-Nix-style // single quotes. writeln!(writer, "<?xml version='1.0' encoding='utf-8'?>")?; - let mut writer = EventWriter::new_with_config(writer, config); - - writer.write(XmlEvent::start_element("expr"))?; - value_variant_to_xml(&mut writer, value)?; - writer.write(XmlEvent::end_element())?; + let mut emitter = XmlEmitter::new(writer); - // Unwrap the writer to add the final newline that C++ Nix adds. - writeln!(writer.into_inner())?; + emitter.write_open_tag("expr", &[])?; + value_variant_to_xml(&mut emitter, value)?; + emitter.write_closing_tag("expr")?; - Ok(()) + Ok(emitter.into_context()) } fn write_typed_value<W: Write, V: ToString>( - w: &mut EventWriter<W>, - name: &str, + w: &mut XmlEmitter<W>, + name_unescaped: &str, value: V, ) -> Result<(), ErrorKind> { - w.write(XmlEvent::start_element(name).attr("value", &value.to_string()))?; - w.write(XmlEvent::end_element())?; + w.write_self_closing_tag(name_unescaped, &[("value", &value.to_string())])?; Ok(()) } -fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Result<(), ErrorKind> { +fn value_variant_to_xml<W: Write>(w: &mut XmlEmitter<W>, value: &Value) -> Result<(), ErrorKind> { match value { Value::Thunk(t) => return value_variant_to_xml(w, &t.value()), Value::Null => { - w.write(XmlEvent::start_element("null"))?; - w.write(XmlEvent::end_element()) + w.write_open_tag("null", &[])?; + w.write_closing_tag("null")?; } Value::Bool(b) => return write_typed_value(w, "bool", b), Value::Integer(i) => return write_typed_value(w, "int", i), Value::Float(f) => return write_typed_value(w, "float", f), - Value::String(s) => return write_typed_value(w, "string", s.to_str()?), + Value::String(s) => { + if let Some(context) = s.context() { + w.extend_context(context.iter().cloned()); + } + return write_typed_value(w, "string", s.to_str()?); + } Value::Path(p) => return write_typed_value(w, "path", p.to_string_lossy()), Value::List(list) => { - w.write(XmlEvent::start_element("list"))?; + w.write_open_tag("list", &[])?; for elem in list.into_iter() { value_variant_to_xml(w, elem)?; } - w.write(XmlEvent::end_element()) + w.write_closing_tag("list")?; } Value::Attrs(attrs) => { - w.write(XmlEvent::start_element("attrs"))?; + w.write_open_tag("attrs", &[])?; for elem in attrs.iter() { - w.write(XmlEvent::start_element("attr").attr("name", &elem.0.to_str_lossy()))?; + w.write_open_tag("attr", &[("name", &elem.0.to_str_lossy())])?; value_variant_to_xml(w, elem.1)?; - w.write(XmlEvent::end_element())?; + w.write_closing_tag("attr")?; } - w.write(XmlEvent::end_element()) + w.write_closing_tag("attrs")?; } Value::Closure(c) => { - w.write(XmlEvent::start_element("function"))?; + w.write_open_tag("function", &[])?; match &c.lambda.formals { Some(formals) => { - let mut attrspat = XmlEvent::start_element("attrspat"); + let mut attrs: Vec<(&str, &str)> = Vec::with_capacity(2); if formals.ellipsis { - attrspat = attrspat.attr("ellipsis", "1"); + attrs.push(("ellipsis", "1")); } if let Some(ref name) = &formals.name { - attrspat = attrspat.attr("name", name.as_str()); + attrs.push(("name", name.as_str())); } - w.write(attrspat)?; - + w.write_open_tag("attrspat", &attrs)?; for arg in formals.arguments.iter() { - w.write( - XmlEvent::start_element("attr").attr("name", &arg.0.to_str_lossy()), - )?; - w.write(XmlEvent::end_element())?; + w.write_self_closing_tag("attr", &[("name", &arg.0.to_str_lossy())])?; } - w.write(XmlEvent::end_element())?; + w.write_closing_tag("attrspat")?; } None => { // TODO(tazjin): tvix does not currently persist function @@ -120,17 +106,16 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu // If we don't want to persist the data, we can re-parse the // AST from the spans of the lambda's bytecode and figure it // out that way, but it needs some investigating. - w.write(XmlEvent::start_element("varpat").attr("name", /* fake: */ "x"))?; - w.write(XmlEvent::end_element())?; + w.write_self_closing_tag("varpat", &[("name", /* fake: */ "x")])?; } } - w.write(XmlEvent::end_element()) + w.write_closing_tag("function")?; } Value::Builtin(_) => { - w.write(XmlEvent::start_element("unevaluated"))?; - w.write(XmlEvent::end_element()) + w.write_open_tag("unevaluated", &[])?; + w.write_closing_tag("unevaluated")?; } Value::AttrNotFound @@ -148,7 +133,189 @@ fn value_variant_to_xml<W: Write>(w: &mut EventWriter<W>, value: &Value) -> Resu Value::Catchable(_) => { panic!("tvix bug: value_to_xml() called on a value which had not been deep-forced") } - }?; + }; Ok(()) } + +/// A simple-stupid XML emitter, which implements only the subset needed for byte-by-byte compat with C++ nix’ `builtins.toXML`. +struct XmlEmitter<W> { + /// The current indentation + cur_indent: usize, + writer: W, + context: NixContext, +} + +impl<W: Write> XmlEmitter<W> { + pub fn new(writer: W) -> Self { + XmlEmitter { + cur_indent: 0, + writer, + context: Default::default(), + } + } + + /// Write an open tag with the given name (which is not escaped!) + /// and attributes (Keys are not escaped! Only attribute values are.) + pub fn write_open_tag( + &mut self, + name_unescaped: &str, + attrs: &[(&str, &str)], + ) -> std::io::Result<()> { + self.add_indent()?; + self.writer.write_all(b"<")?; + self.writer.write_all(name_unescaped.as_bytes())?; + self.write_attrs_escape_vals(attrs)?; + self.writer.write_all(b">\n")?; + self.cur_indent += 2; + Ok(()) + } + + /// Write a self-closing open tag with the given name (which is not escaped!) + /// and attributes (Keys are not escaped! Only attribute values are.) + pub fn write_self_closing_tag( + &mut self, + name_unescaped: &str, + attrs: &[(&str, &str)], + ) -> std::io::Result<()> { + self.add_indent()?; + self.writer.write_all(b"<")?; + self.writer.write_all(name_unescaped.as_bytes())?; + self.write_attrs_escape_vals(attrs)?; + self.writer.write_all(b" />\n")?; + Ok(()) + } + + /// Write a closing tag with the given name (which is not escaped!) + pub fn write_closing_tag(&mut self, name_unescaped: &str) -> std::io::Result<()> { + self.cur_indent -= 2; + self.add_indent()?; + self.writer.write_all(b"</")?; + self.writer.write_all(name_unescaped.as_bytes())?; + self.writer.write_all(b">\n")?; + Ok(()) + } + + #[inline] + fn add_indent(&mut self) -> std::io::Result<()> { + self.writer.write_all(&b" ".repeat(self.cur_indent)) + } + + /// Write an attribute list + fn write_attrs_escape_vals(&mut self, attrs: &[(&str, &str)]) -> std::io::Result<()> { + for (name, val) in attrs { + self.writer.write_all(b" ")?; + self.writer.write_all(name.as_bytes())?; + self.writer.write_all(br#"=""#)?; + self.writer + .write_all(Self::escape_attr_value(val).as_bytes())?; + self.writer.write_all(b"\"")?; + } + Ok(()) + } + + /// Escape the given attribute value, making sure we only actually clone the string if we needed to replace something. + fn escape_attr_value(s: &str) -> Cow<str> { + let mut last_escape: usize = 0; + let mut res: Cow<str> = Cow::Borrowed(""); + // iterating via char_indices gives us the ability to index the original string slice at character boundaries + for (idx, c) in s.char_indices() { + match Self::should_escape_char(c) { + None => {} + Some(new) => { + // add characters since the last escape we did + res += &s[last_escape..idx]; + // add the escaped value + res += new; + last_escape = idx + 1; + } + } + } + // we did not need to escape anything, so borrow original string + if last_escape == 0 { + Cow::Borrowed(s) + } else { + // add the remaining characters + res += &s[last_escape..]; + res + } + } + + fn should_escape_char(c: char) -> Option<&'static str> { + match c { + '<' => Some("<"), + '>' => Some(">"), + '"' => Some("""), + '\'' => Some("'"), + '&' => Some("&"), + '\n' => Some("
"), + '\r' => Some("
"), + _ => None, + } + } + + /// Extends the existing context with more context elements. + fn extend_context<T>(&mut self, iter: T) + where + T: IntoIterator<Item = NixContextElement>, + { + self.context.extend(iter) + } + + /// Consumes [Self] and returns the [NixContext] collected. + fn into_context(self) -> NixContext { + self.context + } +} + +#[cfg(test)] +mod tests { + use bytes::buf::Writer; + use pretty_assertions::assert_eq; + + use crate::builtins::to_xml::XmlEmitter; + use std::borrow::Cow; + + #[test] + fn xml_gen() { + let mut buf = Vec::new(); + let mut x = XmlEmitter::new(&mut buf); + x.write_open_tag("hello", &[("hi", "it’s me"), ("no", "<escape>")]) + .unwrap(); + x.write_self_closing_tag("self-closing", &[("tag", "yay")]) + .unwrap(); + x.write_closing_tag("hello").unwrap(); + + assert_eq!( + std::str::from_utf8(&buf).unwrap(), + r##"<hello hi="it’s me" no="<escape>"> + <self-closing tag="yay" /> +</hello> +"## + ); + } + + #[test] + fn xml_escape() { + match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("ab<>c&de") { + Cow::Owned(s) => assert_eq!(s, "ab<>c&de".to_string(), "escape stuff"), + Cow::Borrowed(s) => panic!("s should be owned {}", s), + } + match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("") { + Cow::Borrowed(s) => assert_eq!(s, "", "empty escape is borrowed"), + Cow::Owned(s) => panic!("s should be borrowed {}", s), + } + match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!ŷbla") { + Cow::Borrowed(s) => assert_eq!(s, "hi!ŷbla", "no escape is borrowed"), + Cow::Owned(s) => panic!("s should be borrowed {}", s), + } + match XmlEmitter::<Writer<Vec<u8>>>::escape_attr_value("hi!<ŷ>bla") { + Cow::Owned(s) => assert_eq!( + s, + "hi!<ŷ>bla".to_string(), + "multi-byte chars are correctly used" + ), + Cow::Borrowed(s) => panic!("s should be owned {}", s), + } + } +} diff --git a/tvix/eval/src/errors.rs b/tvix/eval/src/errors.rs index 652252dadf..a5f79c235f 100644 --- a/tvix/eval/src/errors.rs +++ b/tvix/eval/src/errors.rs @@ -10,7 +10,6 @@ use std::{fmt::Debug, fmt::Display, num::ParseIntError}; use codemap::{File, Span}; use codemap_diagnostic::{ColorConfig, Diagnostic, Emitter, Level, SpanLabel, SpanStyle}; use smol_str::SmolStr; -use xml::writer::Error as XmlError; use crate::spans::ToSpan; use crate::value::{CoercionKind, NixString}; @@ -194,9 +193,6 @@ pub enum ErrorKind { /// Invalid UTF-8 was encoutered somewhere Utf8, - /// Errors while serialising to XML. - Xml(Rc<XmlError>), - /// Variant for errors that bubble up to eval from other Tvix /// components. TvixError(Rc<dyn error::Error>), @@ -248,7 +244,6 @@ impl error::Error for Error { errors.first().map(|e| e as &dyn error::Error) } ErrorKind::IO { error, .. } => Some(error.as_ref()), - ErrorKind::Xml(error) => Some(error.as_ref()), ErrorKind::TvixError(error) => Some(error.as_ref()), _ => None, } @@ -285,12 +280,6 @@ impl From<bstr::FromUtf8Error> for ErrorKind { } } -impl From<XmlError> for ErrorKind { - fn from(err: XmlError) -> Self { - Self::Xml(Rc::new(err)) - } -} - impl From<io::Error> for ErrorKind { fn from(e: io::Error) -> Self { ErrorKind::IO { @@ -506,8 +495,6 @@ to a missing value in the attribute set(s) included via `with`."#, write!(f, "Invalid UTF-8 in string") } - ErrorKind::Xml(error) => write!(f, "failed to serialise to XML: {error}"), - ErrorKind::TvixError(inner_error) => { write!(f, "{inner_error}") } @@ -823,7 +810,6 @@ impl Error { | ErrorKind::JsonError(_) | ErrorKind::NotSerialisableToJson(_) | ErrorKind::FromTomlError(_) - | ErrorKind::Xml(_) | ErrorKind::Utf8 | ErrorKind::TvixError(_) | ErrorKind::TvixBug { .. } @@ -870,7 +856,6 @@ impl Error { ErrorKind::UnexpectedArgument { .. } => "E031", ErrorKind::RelativePathResolution(_) => "E032", ErrorKind::DivisionByZero => "E033", - ErrorKind::Xml(_) => "E034", ErrorKind::FromTomlError(_) => "E035", ErrorKind::NotSerialisableToJson(_) => "E036", ErrorKind::UnexpectedContext => "E037", diff --git a/tvix/eval/src/io.rs b/tvix/eval/src/io.rs index f775077af8..58586030aa 100644 --- a/tvix/eval/src/io.rs +++ b/tvix/eval/src/io.rs @@ -16,14 +16,16 @@ //! how store paths are opened and so on. use std::{ - fs::File, io, path::{Path, PathBuf}, }; -#[cfg(target_family = "unix")] +#[cfg(all(target_family = "unix", feature = "impure"))] use std::os::unix::ffi::OsStringExt; +#[cfg(feature = "impure")] +use std::fs::File; + /// Types of files as represented by `builtins.readDir` in Nix. #[derive(Debug)] pub enum FileType { diff --git a/tvix/eval/src/lib.rs b/tvix/eval/src/lib.rs index 845964cb7e..398da4d6e2 100644 --- a/tvix/eval/src/lib.rs +++ b/tvix/eval/src/lib.rs @@ -29,7 +29,7 @@ mod vm; mod warnings; mod nix_search_path; -#[cfg(test)] +#[cfg(all(test, feature = "arbitrary"))] mod properties; #[cfg(test)] mod test_utils; diff --git a/tvix/eval/src/nix_search_path.rs b/tvix/eval/src/nix_search_path.rs index 566ca12238..369c5b6857 100644 --- a/tvix/eval/src/nix_search_path.rs +++ b/tvix/eval/src/nix_search_path.rs @@ -197,6 +197,8 @@ mod tests { } } + // this uses StdIO, which is only available with the impure feature. + #[cfg(feature = "impure")] mod resolve { use crate::StdIO; use path_clean::PathClean; diff --git a/tvix/eval/src/tests/mod.rs b/tvix/eval/src/tests/mod.rs index 5a7708e298..21b5d35e6a 100644 --- a/tvix/eval/src/tests/mod.rs +++ b/tvix/eval/src/tests/mod.rs @@ -1,203 +1,6 @@ -use crate::{value::Value, EvalIO}; -use builtin_macros::builtins; -use pretty_assertions::assert_eq; -use rstest::rstest; -use std::path::PathBuf; - /// Module for one-off tests which do not follow the rest of the /// test layout. mod one_offs; -#[builtins] -mod mock_builtins { - //! Builtins which are required by language tests, but should not - //! actually exist in //tvix/eval. - use crate as tvix_eval; - use crate::generators::GenCo; - use crate::*; - use genawaiter::rc::Gen; - - #[builtin("derivation")] - async fn builtin_derivation(co: GenCo, input: Value) -> Result<Value, ErrorKind> { - let input = input.to_attrs()?; - let attrs = input.update(NixAttrs::from_iter( - [ - ( - "outPath", - "/nix/store/00000000000000000000000000000000-mock", - ), - ( - "drvPath", - "/nix/store/00000000000000000000000000000000-mock.drv", - ), - ("type", "derivation"), - ] - .into_iter(), - )); - - Ok(Value::Attrs(Box::new(attrs))) - } -} - -fn eval_test(code_path: PathBuf, expect_success: bool) { - std::env::set_var("TEST_VAR", "foo"); // for eval-okay-getenv.nix - - eprintln!("path: {}", code_path.display()); - assert_eq!( - code_path.extension().unwrap(), - "nix", - "test files always end in .nix" - ); - - let code = std::fs::read_to_string(&code_path).expect("should be able to read test code"); - - let mut eval = crate::Evaluation::new_impure(); - eval.strict = true; - eval.builtins.extend(mock_builtins::builtins()); - - let result = eval.evaluate(code, Some(code_path.clone())); - let failed = match result.value { - Some(Value::Catchable(_)) => true, - _ => !result.errors.is_empty(), - }; - if expect_success && failed { - panic!( - "{}: evaluation of eval-okay test should succeed, but failed with {:?}", - code_path.display(), - result.errors, - ); - } - - if !expect_success && failed { - return; - } - // !expect_success can also mean the output differs, so don't panic if the - // evaluation didn't fail. - - let value = result.value.unwrap(); - let result_str = value.to_string(); - - let exp_path = code_path.with_extension("exp"); - if exp_path.exists() { - // If there's an .exp file provided alongside, compare it with the - // output of the NixValue .to_string() method. - let exp_str = std::fs::read_to_string(&exp_path).expect("unable to read .exp file"); - - if expect_success { - assert_eq!( - result_str, - exp_str.trim(), - "{}: result value representation (left) must match expectation (right)", - code_path.display() - ); - } else { - assert_ne!( - result_str, - exp_str.trim(), - "{}: test passed unexpectedly! consider moving it out of notyetpassing", - code_path.display() - ); - - // Early return here, we don't compare .xml outputs if this is a ! - // expect_success test. - return; - } - } - - let exp_xml_path = code_path.with_extension("exp.xml"); - if exp_xml_path.exists() { - // If there's an XML file provided alongside, compare it with the - // output produced when serializing the Value as XML. - let exp_xml_str = std::fs::read_to_string(exp_xml_path).expect("unable to read .xml file"); - - let mut xml_actual_buf = Vec::new(); - crate::builtins::value_to_xml(&mut xml_actual_buf, &value).expect("value_to_xml failed"); - - assert_eq!( - String::from_utf8(xml_actual_buf).expect("to_xml produced invalid utf-8"), - exp_xml_str, - "{}: result value representation (left) must match expectation (right)", - code_path.display() - ); - } -} - -// identity-* tests contain Nix code snippets which should evaluate to -// themselves exactly (i.e. literals). -#[rstest] -fn identity(#[files("src/tests/tvix_tests/identity-*.nix")] code_path: PathBuf) { - let code = std::fs::read_to_string(code_path).expect("should be able to read test code"); - - let mut eval = crate::Evaluation::new(Box::new(crate::StdIO) as Box<dyn EvalIO>, false); - eval.strict = true; - - let result = eval.evaluate(&code, None); - assert!( - result.errors.is_empty(), - "evaluation of identity test failed: {:?}", - result.errors - ); - - let result_str = result.value.unwrap().to_string(); - - assert_eq!( - result_str, - code.trim(), - "result value representation (left) must match expectation (right)" - ) -} - -// eval-okay-* tests contain a snippet of Nix code, and an expectation -// of the produced string output of the evaluator. -// -// These evaluations are always supposed to succeed, i.e. all snippets -// are guaranteed to be valid Nix code. -#[rstest] -fn eval_okay(#[files("src/tests/tvix_tests/eval-okay-*.nix")] code_path: PathBuf) { - eval_test(code_path, true) -} - -// eval-okay-* tests from the original Nix test suite. -#[cfg(feature = "nix_tests")] -#[rstest] -fn nix_eval_okay(#[files("src/tests/nix_tests/eval-okay-*.nix")] code_path: PathBuf) { - eval_test(code_path, true) -} - -// eval-okay-* tests from the original Nix test suite which do not yet pass for tvix -// -// Eventually there will be none of these left, and this function -// will disappear :) -// -// Please don't submit failing tests unless they're in -// notyetpassing; this makes the test suite much more useful for -// regression testing, since there should always be zero non-ignored -// failing tests. -#[rstest] -fn nix_eval_okay_currently_failing( - #[files("src/tests/nix_tests/notyetpassing/eval-okay-*.nix")] code_path: PathBuf, -) { - eval_test(code_path, false) -} - -#[rstest] -fn eval_okay_currently_failing( - #[files("src/tests/tvix_tests/notyetpassing/eval-okay-*.nix")] code_path: PathBuf, -) { - eval_test(code_path, false) -} - -// eval-fail-* tests contain a snippet of Nix code, which is -// expected to fail evaluation. The exact type of failure -// (assertion, parse error, etc) is not currently checked. -#[rstest] -fn eval_fail(#[files("src/tests/tvix_tests/eval-fail-*.nix")] code_path: PathBuf) { - eval_test(code_path, false) -} - -// eval-fail-* tests from the original Nix test suite. #[cfg(feature = "nix_tests")] -#[rstest] -fn nix_eval_fail(#[files("src/tests/nix_tests/eval-fail-*.nix")] code_path: PathBuf) { - eval_test(code_path, false) -} +mod nix_tests; diff --git a/tvix/eval/src/tests/nix_tests.rs b/tvix/eval/src/tests/nix_tests.rs new file mode 100644 index 0000000000..17968e4bdb --- /dev/null +++ b/tvix/eval/src/tests/nix_tests.rs @@ -0,0 +1,207 @@ +use crate::value::Value; +use builtin_macros::builtins; +use pretty_assertions::assert_eq; +use rstest::rstest; +use std::path::PathBuf; + +#[builtins] +mod mock_builtins { + //! Builtins which are required by language tests, but should not + //! actually exist in //tvix/eval. + use crate as tvix_eval; + use crate::generators::GenCo; + use crate::*; + use genawaiter::rc::Gen; + + #[builtin("derivation")] + async fn builtin_derivation(co: GenCo, input: Value) -> Result<Value, ErrorKind> { + let input = input.to_attrs()?; + let attrs = input.update(NixAttrs::from_iter( + [ + ( + "outPath", + "/nix/store/00000000000000000000000000000000-mock", + ), + ( + "drvPath", + "/nix/store/00000000000000000000000000000000-mock.drv", + ), + ("type", "derivation"), + ] + .into_iter(), + )); + + Ok(Value::Attrs(Box::new(attrs))) + } +} + +#[cfg(feature = "impure")] +fn eval_test(code_path: PathBuf, expect_success: bool) { + std::env::set_var("TEST_VAR", "foo"); // for eval-okay-getenv.nix + + eprintln!("path: {}", code_path.display()); + assert_eq!( + code_path.extension().unwrap(), + "nix", + "test files always end in .nix" + ); + + let code = std::fs::read_to_string(&code_path).expect("should be able to read test code"); + + let mut eval = crate::Evaluation::new_impure(); + eval.strict = true; + eval.builtins.extend(mock_builtins::builtins()); + + let result = eval.evaluate(code, Some(code_path.clone())); + let failed = match result.value { + Some(Value::Catchable(_)) => true, + _ => !result.errors.is_empty(), + }; + if expect_success && failed { + panic!( + "{}: evaluation of eval-okay test should succeed, but failed with {:?}", + code_path.display(), + result.errors, + ); + } + + if !expect_success && failed { + return; + } + // !expect_success can also mean the output differs, so don't panic if the + // evaluation didn't fail. + + let value = result.value.unwrap(); + let result_str = value.to_string(); + + let exp_path = code_path.with_extension("exp"); + if exp_path.exists() { + // If there's an .exp file provided alongside, compare it with the + // output of the NixValue .to_string() method. + let exp_str = std::fs::read_to_string(&exp_path).expect("unable to read .exp file"); + + if expect_success { + assert_eq!( + result_str, + exp_str.trim(), + "{}: result value representation (left) must match expectation (right)", + code_path.display() + ); + } else { + assert_ne!( + result_str, + exp_str.trim(), + "{}: test passed unexpectedly! consider moving it out of notyetpassing", + code_path.display() + ); + + // Early return here, we don't compare .xml outputs if this is a ! + // expect_success test. + return; + } + } + + let exp_xml_path = code_path.with_extension("exp.xml"); + if exp_xml_path.exists() { + // If there's an XML file provided alongside, compare it with the + // output produced when serializing the Value as XML. + let exp_xml_str = std::fs::read_to_string(exp_xml_path).expect("unable to read .xml file"); + + let mut xml_actual_buf = Vec::new(); + crate::builtins::value_to_xml(&mut xml_actual_buf, &value).expect("value_to_xml failed"); + + assert_eq!( + String::from_utf8(xml_actual_buf).expect("to_xml produced invalid utf-8"), + exp_xml_str, + "{}: result value representation (left) must match expectation (right)", + code_path.display() + ); + } +} + +// identity-* tests contain Nix code snippets which should evaluate to +// themselves exactly (i.e. literals). +#[cfg(feature = "impure")] +#[rstest] +fn identity(#[files("src/tests/tvix_tests/identity-*.nix")] code_path: PathBuf) { + use crate::EvalIO; + + let code = std::fs::read_to_string(code_path).expect("should be able to read test code"); + + let mut eval = crate::Evaluation::new(Box::new(crate::StdIO) as Box<dyn EvalIO>, false); + eval.strict = true; + + let result = eval.evaluate(&code, None); + assert!( + result.errors.is_empty(), + "evaluation of identity test failed: {:?}", + result.errors + ); + + let result_str = result.value.unwrap().to_string(); + + assert_eq!( + result_str, + code.trim(), + "result value representation (left) must match expectation (right)" + ) +} + +// eval-okay-* tests contain a snippet of Nix code, and an expectation +// of the produced string output of the evaluator. +// +// These evaluations are always supposed to succeed, i.e. all snippets +// are guaranteed to be valid Nix code. +#[cfg(feature = "impure")] +#[rstest] +fn eval_okay(#[files("src/tests/tvix_tests/eval-okay-*.nix")] code_path: PathBuf) { + eval_test(code_path, true) +} + +// eval-okay-* tests from the original Nix test suite. +#[cfg(feature = "impure")] +#[rstest] +fn nix_eval_okay(#[files("src/tests/nix_tests/eval-okay-*.nix")] code_path: PathBuf) { + eval_test(code_path, true) +} + +// eval-okay-* tests from the original Nix test suite which do not yet pass for tvix +// +// Eventually there will be none of these left, and this function +// will disappear :) +// +// Please don't submit failing tests unless they're in +// notyetpassing; this makes the test suite much more useful for +// regression testing, since there should always be zero non-ignored +// failing tests. +#[cfg(feature = "impure")] +#[rstest] +fn nix_eval_okay_currently_failing( + #[files("src/tests/nix_tests/notyetpassing/eval-okay-*.nix")] code_path: PathBuf, +) { + eval_test(code_path, false) +} + +#[cfg(feature = "impure")] +#[rstest] +fn eval_okay_currently_failing( + #[files("src/tests/tvix_tests/notyetpassing/eval-okay-*.nix")] code_path: PathBuf, +) { + eval_test(code_path, false) +} + +// eval-fail-* tests contain a snippet of Nix code, which is +// expected to fail evaluation. The exact type of failure +// (assertion, parse error, etc) is not currently checked. +#[cfg(feature = "impure")] +#[rstest] +fn eval_fail(#[files("src/tests/tvix_tests/eval-fail-*.nix")] code_path: PathBuf) { + eval_test(code_path, false) +} + +// eval-fail-* tests from the original Nix test suite. +#[cfg(feature = "impure")] +#[rstest] +fn nix_eval_fail(#[files("src/tests/nix_tests/eval-fail-*.nix")] code_path: PathBuf) { + eval_test(code_path, false) +} diff --git a/tvix/eval/src/tests/one_offs.rs b/tvix/eval/src/tests/one_offs.rs index 565d1dd48f..21e9144baf 100644 --- a/tvix/eval/src/tests/one_offs.rs +++ b/tvix/eval/src/tests/one_offs.rs @@ -5,7 +5,7 @@ fn test_source_builtin() { // Test an evaluation with a source-only builtin. The test ensures // that the artificially constructed thunking is correct. - let mut eval = Evaluation::new_impure(); + let mut eval = Evaluation::new_pure(); eval.src_builtins.push(("testSourceBuiltin", "42")); let result = eval.evaluate("builtins.testSourceBuiltin", None); diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.exp new file mode 100644 index 0000000000..eb2117a0ce --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.exp @@ -0,0 +1 @@ +[ [ "" [ "a" ] "c" ] [ "" [ "a" ] "b" [ "c" ] "" ] [ "" [ "a" null ] "b" [ null "c" ] "" ] [ " " [ "FOO" ] " " ] [ "" [ "abc" ] "" [ "" ] "" ] [ "" [ "abc" ] "" [ "" ] "" ] [ "" [ ] "" ] ] diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.nix new file mode 100644 index 0000000000..95305040dc --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-split.nix @@ -0,0 +1,10 @@ +[ + (builtins.split "(a)b" "abc") + (builtins.split "([ac])" "abc") + (builtins.split "(a)|(c)" "abc") + (builtins.split "([[:upper:]]+)" " FOO ") + + (builtins.split "(.*)" "abc") + (builtins.split "([abc]*)" "abc") + (builtins.split ".*" "") +] diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.exp.xml b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.exp.xml new file mode 100644 index 0000000000..468972b2f8 --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.exp.xml @@ -0,0 +1,5 @@ +<?xml version='1.0' encoding='utf-8'?> +<expr> + <attrs> + </attrs> +</expr> diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.nix new file mode 100644 index 0000000000..ffcd4415b0 --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml-empty.nix @@ -0,0 +1 @@ +{ } diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.exp new file mode 100644 index 0000000000..9ae16de526 --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.exp @@ -0,0 +1 @@ +"<?xml version='1.0' encoding='utf-8'?>\n<expr>\n <attrs>\n <attr name=\"&-{\">\n <string value=\";&"\" />\n </attr>\n <attr name=\"a\">\n <string value=\"s\" />\n </attr>\n </attrs>\n</expr>\n" diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.nix new file mode 100644 index 0000000000..7d074048dd --- /dev/null +++ b/tvix/eval/src/tests/tvix_tests/eval-okay-toxml.nix @@ -0,0 +1,2 @@ +# Check some corner cases regarding escaping. +builtins.toXML { a = "s"; "&-{" = ";&\""; } diff --git a/tvix/eval/src/value/json.rs b/tvix/eval/src/value/json.rs index c48e9c1f4e..24a6bcaf6f 100644 --- a/tvix/eval/src/value/json.rs +++ b/tvix/eval/src/value/json.rs @@ -47,8 +47,8 @@ impl Value { for val in l.into_iter() { match generators::request_to_json(co, val).await { - Ok((v, mut ctx)) => { - context = context.join(&mut ctx); + Ok((v, ctx)) => { + context.extend(ctx.into_iter()); out.push(v) } Err(cek) => return Ok(Err(cek)), @@ -100,8 +100,8 @@ impl Value { out.insert( name.to_str()?.to_owned(), match generators::request_to_json(co, value).await { - Ok((v, mut ctx)) => { - context = context.join(&mut ctx); + Ok((v, ctx)) => { + context.extend(ctx.into_iter()); v } Err(cek) => return Ok(Err(cek)), diff --git a/tvix/eval/src/value/mod.rs b/tvix/eval/src/value/mod.rs index c171c9a04e..dfad0cd839 100644 --- a/tvix/eval/src/value/mod.rs +++ b/tvix/eval/src/value/mod.rs @@ -338,8 +338,8 @@ impl Value { let coerced: Result<BString, _> = match (value, kind) { // coercions that are always done (Value::String(mut s), _) => { - if let Some(ctx) = s.context_mut() { - context = context.join(ctx); + if let Some(ctx) = s.take_context() { + context.extend(ctx.into_iter()); } Ok((*s).into()) } diff --git a/tvix/eval/src/value/string.rs b/tvix/eval/src/value/string.rs index ceb43f1ea5..4f869eb00d 100644 --- a/tvix/eval/src/value/string.rs +++ b/tvix/eval/src/value/string.rs @@ -54,6 +54,12 @@ impl From<HashSet<NixContextElement>> for NixContext { } } +impl<const N: usize> From<[NixContextElement; N]> for NixContext { + fn from(value: [NixContextElement; N]) -> Self { + Self(HashSet::from(value)) + } +} + impl NixContext { /// Creates an empty context that can be populated /// and passed to form a contextful [NixString], albeit @@ -78,20 +84,19 @@ impl NixContext { self } - /// Consumes both ends of the join into a new NixContent - /// containing the union of elements of both ends. - pub fn join(mut self, other: &mut NixContext) -> Self { - let other_set = std::mem::take(&mut other.0); - let mut set: HashSet<NixContextElement> = std::mem::take(&mut self.0); - set.extend(other_set); - Self(set) + /// Extends the existing context with more context elements. + pub fn extend<T>(&mut self, iter: T) + where + T: IntoIterator<Item = NixContextElement>, + { + self.0.extend(iter) } /// Copies from another [NixString] its context strings /// in this context. pub fn mimic(&mut self, other: &NixString) { if let Some(context) = other.context() { - self.0.extend(context.iter().cloned()); + self.extend(context.iter().cloned()); } } @@ -154,6 +159,16 @@ impl NixContext { } } +impl IntoIterator for NixContext { + type Item = NixContextElement; + + type IntoIter = std::collections::hash_set::IntoIter<NixContextElement>; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + /// This type is never instantiated, but serves to document the memory layout of the actual heap /// allocation for Nix strings. #[allow(dead_code)] @@ -616,6 +631,11 @@ mod arbitrary { impl NixString { fn new(contents: &[u8], context: Option<Box<NixContext>>) -> Self { + debug_assert!( + !context.as_deref().is_some_and(NixContext::is_empty), + "BUG: initialized with empty context" + ); + // SAFETY: We're always fully initializing a NixString here: // // 1. NixStringInner::alloc sets up the len for us @@ -707,8 +727,10 @@ impl NixString { let context = [self.context(), other.context()] .into_iter() .flatten() - .fold(NixContext::new(), |acc_ctx, new_ctx| { - acc_ctx.join(&mut new_ctx.clone()) + .fold(NixContext::new(), |mut acc_ctx, new_ctx| { + // TODO: consume new_ctx? + acc_ctx.extend(new_ctx.iter().cloned()); + acc_ctx }); Self::new_context_from(context, s) } @@ -719,16 +741,30 @@ impl NixString { // // Also, we're using the same lifetime and mutability as self, to fit the // pointer-to-reference conversion rules - unsafe { NixStringInner::context_ref(self.0).as_deref() } + let context = unsafe { NixStringInner::context_ref(self.0).as_deref() }; + + debug_assert!( + !context.is_some_and(NixContext::is_empty), + "BUG: empty context" + ); + + context } - pub(crate) fn context_mut(&mut self) -> Option<&mut NixContext> { + pub(crate) fn context_mut(&mut self) -> &mut Option<Box<NixContext>> { // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY // comment in `new`). // // Also, we're using the same lifetime and mutability as self, to fit the // pointer-to-reference conversion rules - unsafe { NixStringInner::context_mut(self.0).as_deref_mut() } + let context = unsafe { NixStringInner::context_mut(self.0) }; + + debug_assert!( + !context.as_deref().is_some_and(NixContext::is_empty), + "BUG: empty context" + ); + + context } pub fn iter_context(&self) -> impl Iterator<Item = &NixContext> { @@ -756,12 +792,16 @@ impl NixString { self.context().is_some() } + /// This clears the context of the string, returning + /// the removed dependency tracking information. + pub fn take_context(&mut self) -> Option<Box<NixContext>> { + self.context_mut().take() + } + /// This clears the context of that string, losing /// all dependency tracking information. pub fn clear_context(&mut self) { - // SAFETY: There's no way to construct an uninitialized or invalid NixString (see the SAFETY - // comment in `new`). - *unsafe { NixStringInner::context_mut(self.0) } = None; + let _ = self.take_context(); } pub fn chars(&self) -> Chars<'_> { @@ -849,7 +889,7 @@ impl Display for NixString { } } -#[cfg(test)] +#[cfg(all(test, feature = "arbitrary"))] mod tests { use test_strategy::proptest; diff --git a/tvix/eval/src/vm/generators.rs b/tvix/eval/src/vm/generators.rs index 79de688692..dbf7703bf0 100644 --- a/tvix/eval/src/vm/generators.rs +++ b/tvix/eval/src/vm/generators.rs @@ -745,6 +745,7 @@ pub async fn request_open_file(co: &GenCo, path: PathBuf) -> Box<dyn std::io::Re } } +#[cfg_attr(not(feature = "impure"), allow(unused))] pub(crate) async fn request_path_exists(co: &GenCo, path: PathBuf) -> Value { match co.yield_(VMRequest::PathExists(path)).await { VMResponse::Value(value) => value, @@ -755,6 +756,7 @@ pub(crate) async fn request_path_exists(co: &GenCo, path: PathBuf) -> Value { } } +#[cfg_attr(not(feature = "impure"), allow(unused))] pub(crate) async fn request_read_dir(co: &GenCo, path: PathBuf) -> Vec<(bytes::Bytes, FileType)> { match co.yield_(VMRequest::ReadDir(path)).await { VMResponse::Directory(dir) => dir, diff --git a/tvix/eval/src/vm/mod.rs b/tvix/eval/src/vm/mod.rs index 5c244cc3ca..1fa9eba4bc 100644 --- a/tvix/eval/src/vm/mod.rs +++ b/tvix/eval/src/vm/mod.rs @@ -994,8 +994,8 @@ where } let mut nix_string = val.to_contextful_str().with_span(frame, self)?; out.push_str(nix_string.as_bstr()); - if let Some(nix_string_ctx) = nix_string.context_mut() { - context = context.join(nix_string_ctx); + if let Some(nix_string_ctx) = nix_string.take_context() { + context.extend(nix_string_ctx.into_iter()) } } diff --git a/tvix/eval/tests/nix_oracle.rs b/tvix/eval/tests/nix_oracle.rs index 5a5cc0a822..3d3abc7363 100644 --- a/tvix/eval/tests/nix_oracle.rs +++ b/tvix/eval/tests/nix_oracle.rs @@ -56,6 +56,7 @@ fn nix_eval(expr: &str, strictness: Strictness) -> String { /// `NIX_INSTANTIATE_BINARY_PATH` env var to resolve the `nix-instantiate` binary) and tvix, and /// assert that the result is identical #[track_caller] +#[cfg(feature = "impure")] fn compare_eval(expr: &str, strictness: Strictness) { let nix_result = nix_eval(expr, strictness); let mut eval = tvix_eval::Evaluation::new_pure(); @@ -76,6 +77,7 @@ fn compare_eval(expr: &str, strictness: Strictness) { macro_rules! compare_eval_tests { ($strictness:expr, {}) => {}; ($strictness:expr, {$(#[$meta:meta])* $test_name: ident($expr: expr); $($rest:tt)*}) => { + #[cfg(feature = "impure")] #[test] $(#[$meta])* fn $test_name() { |