From 65a810fc0b12e754af47864ab3b9d4b5756cde15 Mon Sep 17 00:00:00 2001 From: Florian Klink Date: Fri, 1 Mar 2024 10:56:17 +0200 Subject: feat(tvix/docs): switch to mdbook Some of the docs are still outdated (like architecture and drv path inconsistencies). Change-Id: I7a6afceb008ef4cd19a764dd6c637b39fa842a2e Reviewed-on: https://cl.tvl.fyi/c/depot/+/11072 Autosubmit: flokli Tested-by: BuildkiteCI Reviewed-by: edef --- tvix/docs/.gitignore | 4 +- tvix/docs/Makefile | 12 -- tvix/docs/book.toml | 11 + tvix/docs/component-flow.puml | 60 ------ tvix/docs/components.md | 160 -------------- tvix/docs/default.nix | 42 +--- tvix/docs/differences-drv-paths.md | 46 ---- tvix/docs/lang-version.md | 62 ------ tvix/docs/language-spec.md | 78 ------- tvix/docs/src/SUMMARY.md | 10 + tvix/docs/src/architecture.md | 147 +++++++++++++ tvix/docs/src/differences-drv-paths.md | 35 ++++ tvix/docs/src/figures/component-flow.puml | 60 ++++++ tvix/docs/src/lang-version.md | 62 ++++++ tvix/docs/src/language-spec.md | 67 ++++++ tvix/docs/src/value-pointer-equality.md | 338 ++++++++++++++++++++++++++++++ tvix/docs/value-pointer-equality.md | 338 ------------------------------ 17 files changed, 741 insertions(+), 791 deletions(-) delete mode 100644 tvix/docs/Makefile create mode 100644 tvix/docs/book.toml delete mode 100644 tvix/docs/component-flow.puml delete mode 100644 tvix/docs/components.md delete mode 100644 tvix/docs/differences-drv-paths.md delete mode 100644 tvix/docs/lang-version.md delete mode 100644 tvix/docs/language-spec.md create mode 100644 tvix/docs/src/SUMMARY.md create mode 100644 tvix/docs/src/architecture.md create mode 100644 tvix/docs/src/differences-drv-paths.md create mode 100644 tvix/docs/src/figures/component-flow.puml create mode 100644 tvix/docs/src/lang-version.md create mode 100644 tvix/docs/src/language-spec.md create mode 100644 tvix/docs/src/value-pointer-equality.md delete mode 100644 tvix/docs/value-pointer-equality.md (limited to 'tvix') diff --git a/tvix/docs/.gitignore b/tvix/docs/.gitignore index 77699ee8a3..8117055463 100644 --- a/tvix/docs/.gitignore +++ b/tvix/docs/.gitignore @@ -1,2 +1,2 @@ -*.svg -*.html +book +.mdbook-plantuml-cache/ diff --git a/tvix/docs/Makefile b/tvix/docs/Makefile deleted file mode 100644 index ba9e2bdef6..0000000000 --- a/tvix/docs/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -all: build - -puml: - plantuml *.puml -tsvg - -html: - pandoc *.md -f markdown --self-contained -t html -s -o tvix.html --csl=${CSL} - -build: puml html - -clean: - rm -f *.tex *.pdf *.png *.svg diff --git a/tvix/docs/book.toml b/tvix/docs/book.toml new file mode 100644 index 0000000000..7318a90233 --- /dev/null +++ b/tvix/docs/book.toml @@ -0,0 +1,11 @@ +[book] +authors = ["The Tvix Authors"] +language = "en" +multilingual = false +src = "src" +title = "Tvix Docs" + +[preprocessor.plantuml] +# override the /usr/bin/plantuml default +plantuml-cmd = "plantuml" +use-data-uris = true diff --git a/tvix/docs/component-flow.puml b/tvix/docs/component-flow.puml deleted file mode 100644 index 5b6d79b823..0000000000 --- a/tvix/docs/component-flow.puml +++ /dev/null @@ -1,60 +0,0 @@ -@startuml - -title Tvix build flow - -actor User -participant CLI -participant "Coordinator" as Coord -participant "Evaluator" as Eval -database Store -participant "Builder" as Build - -note over CLI,Eval - Typically runs locally on the invoking machine -end note -/ note over Store, Build - Can be either local or remote -end note - -User-->CLI: User initiates build of `hello` (analogous to `nix-build -f '' -A hello`) - -CLI-->Coord: CLI invokes coordinator - -Coord-->Eval: Sends message to start evaluation of `` (path lookup) with attribute `hello` -note right: The paths to the evaluator are local file system paths - -Coord<--Eval: Yields derivations to be built -note right - Immediately starts streaming derivations as they are instantiated across - the dependency graph so they can be built while the evaluation is still running. - - There are two types of build requests: One for regular "fire and forget" builds, - and another for IFD (import from derivation). - - These are distinct because IFD needs to be fed back into the evaluator for - further processing while a regular build does not. -end note - -loop while has more derivations - - Coord-->Store: Check if desired paths are in store - alt Store has path - Coord<--Store: Success response - else Store does not have path - Coord-->Build: Request derivation to be built - - alt Build failure - Coord<--Build: Fail response - note left: It's up to the coordinator whether to exit on build failure - else Build success - Build-->Store: Push outputs to store - Build<--Coord: Send success & pushed response - end - - end -end - -CLI<--Coord: Respond success/fail -User<--CLI: Exit success/fail - -@enduml diff --git a/tvix/docs/components.md b/tvix/docs/components.md deleted file mode 100644 index a7d61948c2..0000000000 --- a/tvix/docs/components.md +++ /dev/null @@ -1,160 +0,0 @@ ---- -title: "Tvix - Architecture & data flow" -numbersections: true -author: -- adisbladis -- flokli -- tazjin -email: -- adis@blad.is -- mail@tazj.in -lang: en-GB -classoption: -- twocolumn -header-includes: -- \usepackage{caption, graphicx, tikz, aeguill, pdflscape} ---- - -# Background - -We intend for Tvix tooling to be more decoupled than the existing, -monolithic Nix implementation. In practice, we expect to gain several -benefits from this, such as: - -- Ability to use different builders -- Ability to use different store implementations -- No monopolisation of the implementation, allowing users to replace - components that they are unhappy with (up to and including the - language evaluator) -- Less hidden intra-dependencies between tools due to explicit RPC/IPC - boundaries - -Communication between different components of the system will use -gRPC. The rest of this document outlines the components. - -# Components - -## Coordinator - -*Purpose:* The coordinator (in the simplest case, the Tvix CLI tool) -oversees the flow of a build process and delegates tasks to the right -subcomponents. For example, if a user runs the equivalent of -`nix-build` in a folder containing a `default.nix` file, the -coordinator will invoke the evaluator, pass the resulting derivations -to the builder and coordinate any necessary store interactions (for -substitution and other purposes). - -While many users are likely to use the CLI tool as their primary -method of interacting with Tvix, it is not unlikely that alternative -coordinators (e.g. for a distributed, "Nix-native" CI system) would be -implemented. To facilitate this, we are considering implementing the -coordinator on top of a state-machine model that would make it -possible to reuse the FSM logic without tying it to any particular -kind of application. - -## Evaluator - -*Purpose:* Eval takes care of evaluating Nix code. In a typical build -flow it would be responsible for producing derivations. It can also be -used as a standalone tool, for example, in use-cases where Nix is used -to generate configuration without any build or store involvement. - -*Requirements:* For now, it will run on the machine invoking the build -command itself. We give it filesystem access to handle things like -imports or `builtins.readFile`. - -To support IFD, the Evaluator also needs access to store paths. This -could be implemented by having the coordinator provide an interface to retrieve -files from a store path, or by ensuring a "realized version of the store" is -accessible by the evaluator (this could be a FUSE filesystem, or the "real" -/nix/store on disk. - -We might be okay with running the evaluator with filesystem access for now and -can extend the interface if the need arises. - -## Builder - -*Purpose:* A builder receives derivations from the coordinator and -builds them. - -By making builder a standardised interface it's possible to make the -sandboxing mechanism used by the build process pluggable. - -Nix is currently using a hard-coded -[libseccomp](https://github.com/seccomp/libseccomp) based sandboxing -mechanism and another one based on -[sandboxd](https://www.unix.com/man-page/mojave/8/sandboxd/) on macOS. -These are only separated by [compiler preprocessor -macros](https://gcc.gnu.org/onlinedocs/cpp/Ifdef.html) within the same -source files despite having very little in common with each other. - -This makes experimentation with alternative backends difficult and -porting Nix to other platforms harder than it has to be. We want to -write a new Linux builder which uses -[OCI](https://github.com/opencontainers/runtime-spec), the current -dominant Linux containerisation technology, by default. - -With a well-defined builder abstraction, it's also easy to imagine -other backends such as a Kubernetes-based one in the future. - -The environment in which builds happen is currently very Nix-specific. We might -want to avoid having to maintain all the intricacies of a Nix-specific -sandboxing environment in every builder, and instead only provide a more -generic interface, receiving build requests (and have the coordinator translate -derivations to that format). [^1] - -To build, the builder needs to be able to mount all build inputs into the build -environment. For this, it needs the store to expose a filesystem interface. - -## Store - -*Purpose:* Store takes care of storing build results. It provides a -unified interface to get store paths and upload new ones, as well as querying -for the existence of a store path and its metadata (references, signatures, …). - -Tvix natively uses an improved store protocol. Instead of transferring around -NAR files, which don't provide an index and don't allow seekable access, a -concept similar to git tree hashing is used. - -This allows more granular substitution, chunk reusage and parallel download of -individual files, reducing bandwidth usage. -As these chunks are content-addressed, it opens up the potential for -peer-to-peer trustless substitution of most of the data, as long as we sign the -root of the index. - -Tvix still keeps the old-style signatures, NAR hashes and NAR size around. In -the case of NAR hash / NAR size, this data is strictly required in some cases. -The old-style signatures are valuable for communication with existing -implementations. - -Old-style binary caches (like cache.nixos.org) can still be exposed via the new -interface, by doing on-the-fly (re)chunking/ingestion. - -Most likely, there will be multiple implementations of store, some storing -things locally, some exposing a "remote view". - -A few possible ones that come to mind are: - -- Local store -- SFTP/ GCP / S3 / HTTP -- NAR/NARInfo protocol: HTTP, S3 - -A remote Tvix store can be connected by simply connecting to its gRPC -interface, possibly using SSH tunneling, but there doesn't need to be an -additional "wire format" like the Nix `ssh(+ng)://` protocol. - -Settling on one interface allows composition of stores, meaning it becomes -possible to express substitution from remote caches as a proxy layer. - -It'd also be possible to write a FUSE implementation on top of the RPC -interface, exposing a lazily-substituting /nix/store mountpoint. Using this in -remote build context dramatically reduces the amount of data transferred to a -builder, as only the files really accessed during the build are substituted. - -# Figures - -![component flow](./component-flow.svg) - -[^1]: There have already been some discussions in the Nix community, to switch - to REAPI: - https://discourse.nixos.org/t/a-proposal-for-replacing-the-nix-worker-protocol/20926/22 diff --git a/tvix/docs/default.nix b/tvix/docs/default.nix index 016d641df5..9fc2f76576 100644 --- a/tvix/docs/default.nix +++ b/tvix/docs/default.nix @@ -1,47 +1,23 @@ { pkgs, lib, ... }: -let - - tl = pkgs.texlive.combine { - inherit (pkgs.texlive) scheme-medium wrapfig ulem capt-of - titlesec preprint enumitem paralist ctex environ svg - beamer trimspaces zhnumber changepage framed pdfpages - fvextra minted upquote ifplatform xstring; - }; - - csl = pkgs.fetchurl { - name = "numeric.csl"; - url = "https://gist.githubusercontent.com/bwiernik/8c6f39cf51ceb3a03107/raw/1d75c2d62113ffbba6ed03a47ad99bde86934f2b/APA%2520Numeric"; - sha256 = "1yfhhnhbzvhrv93baz98frmgsx5y442nzhb0l956l4j35fb0cc3h"; - }; - -in pkgs.stdenv.mkDerivation { - pname = "tvix-doc"; + pname = "tvix-docs"; version = "0.1"; - outputs = [ "out" "svg" ]; + outputs = [ "out" ]; src = lib.cleanSource ./.; - CSL = csl; - nativeBuildInputs = [ - pkgs.pandoc + pkgs.mdbook + pkgs.mdbook-plantuml pkgs.plantuml - tl ]; - installPhase = '' - runHook preInstall - - mkdir -p $out - cp -v *.html $out/ - - mkdir -p $svg - cp -v *.svg $svg/ - - runHook postSubmit + # plantuml wants to create ./.mdbook-plantuml-cache, which fails as $src is r/o. + # copy all sources elsewhere to workaround. + buildCommand = '' + cp -R $src/. . + mdbook build -d $out ''; - } diff --git a/tvix/docs/differences-drv-paths.md b/tvix/docs/differences-drv-paths.md deleted file mode 100644 index e93131aa57..0000000000 --- a/tvix/docs/differences-drv-paths.md +++ /dev/null @@ -1,46 +0,0 @@ ---- -title: ".drvPath inconsistencies" -author: - - tazjin - - flokli -email: - - tazjin@tvl.su - - flokli@flokli.de -lang: en-GB ---- - -# Why .drvPath differs between Nix and Tvix - -Nix and Tvix currently use a different approach when it comes to tracking input -references, in order to build the right dependencies in advance. -Nix is using string contexts, whereas Tvix is doing reference scanning [^inbox-drvpath]. - -There are some real-life cases, for example during nixpkgs bootstrapping, where -multiple different fixed-output derivations are written to produce the same -hash. - -For example, bootstrap sources that are downloaded early are fetched using -a special "builder hack", in which the `builder` field of the derivation is -populated with the magic string `builtin:fetchurl` and the builder itself will -perform a fetch, with everything looking like a normal derivation to the user. - -These bootstrap sources are later on defined *again*, once `curl`is available, -to be downloaded using the standard pkgs.fetchtarball mechanism, but yielding -the *same* outputs (as the same files are being fetched). - -In our reference scanning implementation, this output scanning of FOD will -cause the path of the *first* derivation producing the given fixed output to be -stored in the `inputDrvs` field of the derivation, while Nix will point to the -derivation that was actually used. - -This doesn't cause any differences in the calculated *output paths*, as paths to -fixed-output derivations are replaced with a special -`fixed:out:${algo}:${digest}:${fodPath}` string that doesn't contain the "path -to the wrong derivation" anymore. - -As we haven't fully determined if our reference scanning approach is gonna work, -and comparing output paths is sufficient to determine equality of the build -instructions, this is left as a future work item. - - -[^inbox-drvpath]: https://inbox.tvl.su/depot/20230316120039.j4fkp3puzrtbjcpi@tp/T/#t diff --git a/tvix/docs/lang-version.md b/tvix/docs/lang-version.md deleted file mode 100644 index c288274c91..0000000000 --- a/tvix/docs/lang-version.md +++ /dev/null @@ -1,62 +0,0 @@ -# Nix language version history - -The Nix language (“Nix”) has its own versioning mechanism independent from its -most popular implementation (“C++ Nix”): `builtins.langVersion`. It has been -increased whenever the language has changed syntactically or semantically in a -way that would not be introspectable otherwise. In particular, this does not -include addition (or removal) of `builtins`, as this can be introspected using -standard attribute set operations. - -Changes to `builtins.langVersion` are best found by viewing the git history of -C++ Nix using `git log -G 'mkInt\\(v, [0-9]\\)'` for `builtins.langVersion` < 7. -After that point `git log -G 'v\\.mkInt\\([0-9]+\\)'` should work. To reduce the -amount of false positives, specify the version number you are interested in -explicitly. - -## 1 - -The first version of the Nix language is its state at the point when -`builtins.langVersion` was added in [8b8ee53] which was first released -as part of C++ Nix 1.2. - -## 2 - -Nix version 2 changed the behavior of `builtins.storePath`: It would now [try to -substitute the given path if missing][storePath-substitute], instead of creating -an evaluation failure. `builtins.langVersion` was increased in [e36229d]. - -## 3 - -Nix version 3 changed the behavior of the `==` behavior. Strings would now be -considered [equal even if they had differing string context][equal-no-ctx]. - -## 4 - -Nix version 4 [added the float type][float] to the language. - -## 5 - -The [increase of `builtins.langVersion` to 5][langVersion-5] did not signify a -language change, but added support for structured attributes to the Nix daemon. -Eelco Dolstra writes as to what changed: - -> The structured attributes support. Unfortunately that's not so much a language -> change as a build.cc (i.e. daemon) change, but we don't really have a way to -> express that... - -Maybe `builtins.nixVersion` (which was added in version 1) should have been -used instead. In any case, the [only `langVersion` check][nixpkgs-langVersion-5] -in nixpkgs verifies a lower bound of 5. - -## 6 - -Nix version 6 added support for [comparing two lists][list-comparison]. - -[8b8ee53]: https://github.com/nixos/nix/commit/8b8ee53bc73769bb25d967ba259dabc9b23e2e6f -[storePath-substitute]: https://github.com/nixos/nix/commit/22d665019a3770148929b7504c73bcdbe025ec12 -[e36229d]: https://github.com/nixos/nix/commit/e36229d27f9ab508e0abf1892f3e8c263d2f8c58 -[equal-no-ctx]: https://github.com/nixos/nix/commit/ee7fe64c0ac00f2be11604a2a6509eb86dc19f0a -[float]: https://github.com/nixos/nix/commit/14ebde52893263930cdcde1406cc91cc5c42556f -[langVersion-5]: https://github.com/nixos/nix/commit/8191992c83bf4387b03c5fdaba818dc2b520462d -[list-comparison]: https://github.com/nixos/nix/commit/09471d2680292af48b2788108de56a8da755d661 -[nixpkgs-langVersion-5]: https://github.com/NixOS/nixpkgs/blob/d7ac3423d321b8b145ccdd1aed9dfdb280f5e391/pkgs/build-support/closure-info.nix#L11 diff --git a/tvix/docs/language-spec.md b/tvix/docs/language-spec.md deleted file mode 100644 index a714374933..0000000000 --- a/tvix/docs/language-spec.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -title: "Specification of the Nix language" -numbersections: true -author: -- tazjin -email: -- tazjin@tvl.su -lang: en-GB ---- - -The Nix Language -================ - -WARNING: This document is a work in progress. Please keep an eye on -[`topic:nix-spec`](https://cl.tvl.fyi/q/topic:nix-spec) for ongoing -CLs. - -Nix is a general-purpose, functional programming language which this -document aims to describe. - -## Background - -Nix was designed and implemented as part of the [Nix package -manager](https://nixos.org/nix). It is primarily used for generating -so-called [*derivations*](#derivations), which are data structures -describing how to build a package. - -The language has been described in the -[thesis](https://edolstra.github.io/pubs/phd-thesis.pdf) introducing -the package manager, but only on a high-level. At the time of writing, -Nix is informally specified (via its only complete implementation in -the package manager) and there is no complete overview over its - -sometimes surprising - semantics. - -The primary project written in Nix is -[nixpkgs](https://github.com/NixOS/nixpkgs/). Uncertainties in the -process of writing this specification are resolved by investigating -patterns in nixpkgs, which we consider canonical. The code in nixpkgs -uses a reasonable subset of the features exposed by the current -implementation, some of which are *accidental*, and is thus more -useful for specifying how the language should work. - -## Introduction to Nix - -Nix is a general-purpose, partially lazy, functional programming -language which provides higher-order functions, type reflection, -primitive data types such as integers, strings and floats, and -compound data structures such as lists and attribute sets. - -Nix has syntactic sugar for common operations, such as those for -attribute sets, and also provides a wide range of built-in functions -which have organically accumulated over time. - -Nix has a variety of legacy features that are not in practical use, -but are documented in sections of this specification for the sake of -completeness. - -This document describes the syntax and abstract semantics of the Nix -language, but leaves out implementation details about how Nix can be -interpreted/compiled/analysed etc. - -### Program structure - -This section describes the semantic structure of Nix, and how it -relates to the rest of the specification. - -Each Nix program is a single [*expression*](#expressions) denoting a -[*value*](#values) (commonly a [*function*](#functions)). Each value -has a [*type*](#types), however this type is not statically known. - -Nix code is modularised through the use of the -[*import*](#builtins-import) built-in function. No separate module -system exists. - -In addition to chapters describing the building blocks mentioned -above, this specificiation also describes the [*syntax*](#syntax), the -available [built-in functions](#builtins), [*error handling*](#errors) -and known [*deficiencies*](#deficiencies) in the language. diff --git a/tvix/docs/src/SUMMARY.md b/tvix/docs/src/SUMMARY.md new file mode 100644 index 0000000000..d4585c00ac --- /dev/null +++ b/tvix/docs/src/SUMMARY.md @@ -0,0 +1,10 @@ +# Summary + +# Tvix +- [Architecture & data flow](./architecture.md) +- [.drvPath inconsistencies](./differences-drv-paths.md) + +# Nix +- [Specification of the Nix Language](./language-spec.md) +- [Nix language version history](./lang-version.md) +- [Value Pointer Equality](./value-pointer-equality.md) diff --git a/tvix/docs/src/architecture.md b/tvix/docs/src/architecture.md new file mode 100644 index 0000000000..5e0aa95f1a --- /dev/null +++ b/tvix/docs/src/architecture.md @@ -0,0 +1,147 @@ +# Tvix - Architecture & data flow + +## Background + +We intend for Tvix tooling to be more decoupled than the existing, +monolithic Nix implementation. In practice, we expect to gain several +benefits from this, such as: + +- Ability to use different builders +- Ability to use different store implementations +- No monopolisation of the implementation, allowing users to replace + components that they are unhappy with (up to and including the + language evaluator) +- Less hidden intra-dependencies between tools due to explicit RPC/IPC + boundaries + +Communication between different components of the system will use +gRPC. The rest of this document outlines the components. + +## Components + +### Coordinator + +*Purpose:* The coordinator (in the simplest case, the Tvix CLI tool) +oversees the flow of a build process and delegates tasks to the right +subcomponents. For example, if a user runs the equivalent of +`nix-build` in a folder containing a `default.nix` file, the +coordinator will invoke the evaluator, pass the resulting derivations +to the builder and coordinate any necessary store interactions (for +substitution and other purposes). + +While many users are likely to use the CLI tool as their primary +method of interacting with Tvix, it is not unlikely that alternative +coordinators (e.g. for a distributed, "Nix-native" CI system) would be +implemented. To facilitate this, we are considering implementing the +coordinator on top of a state-machine model that would make it +possible to reuse the FSM logic without tying it to any particular +kind of application. + +### Evaluator + +*Purpose:* Eval takes care of evaluating Nix code. In a typical build +flow it would be responsible for producing derivations. It can also be +used as a standalone tool, for example, in use-cases where Nix is used +to generate configuration without any build or store involvement. + +*Requirements:* For now, it will run on the machine invoking the build +command itself. We give it filesystem access to handle things like +imports or `builtins.readFile`. + +To support IFD, the Evaluator also needs access to store paths. This +could be implemented by having the coordinator provide an interface to retrieve +files from a store path, or by ensuring a "realized version of the store" is +accessible by the evaluator (this could be a FUSE filesystem, or the "real" +/nix/store on disk. + +We might be okay with running the evaluator with filesystem access for now and +can extend the interface if the need arises. + +### Builder + +*Purpose:* A builder receives derivations from the coordinator and +builds them. + +By making builder a standardised interface it's possible to make the +sandboxing mechanism used by the build process pluggable. + +Nix is currently using a hard-coded +[libseccomp](https://github.com/seccomp/libseccomp) based sandboxing +mechanism and another one based on +[sandboxd](https://www.unix.com/man-page/mojave/8/sandboxd/) on macOS. +These are only separated by [compiler preprocessor +macros](https://gcc.gnu.org/onlinedocs/cpp/Ifdef.html) within the same +source files despite having very little in common with each other. + +This makes experimentation with alternative backends difficult and +porting Nix to other platforms harder than it has to be. We want to +write a new Linux builder which uses +[OCI](https://github.com/opencontainers/runtime-spec), the current +dominant Linux containerisation technology, by default. + +With a well-defined builder abstraction, it's also easy to imagine +other backends such as a Kubernetes-based one in the future. + +The environment in which builds happen is currently very Nix-specific. We might +want to avoid having to maintain all the intricacies of a Nix-specific +sandboxing environment in every builder, and instead only provide a more +generic interface, receiving build requests (and have the coordinator translate +derivations to that format). [^1] + +To build, the builder needs to be able to mount all build inputs into the build +environment. For this, it needs the store to expose a filesystem interface. + +### Store + +*Purpose:* Store takes care of storing build results. It provides a +unified interface to get store paths and upload new ones, as well as querying +for the existence of a store path and its metadata (references, signatures, …). + +Tvix natively uses an improved store protocol. Instead of transferring around +NAR files, which don't provide an index and don't allow seekable access, a +concept similar to git tree hashing is used. + +This allows more granular substitution, chunk reusage and parallel download of +individual files, reducing bandwidth usage. +As these chunks are content-addressed, it opens up the potential for +peer-to-peer trustless substitution of most of the data, as long as we sign the +root of the index. + +Tvix still keeps the old-style signatures, NAR hashes and NAR size around. In +the case of NAR hash / NAR size, this data is strictly required in some cases. +The old-style signatures are valuable for communication with existing +implementations. + +Old-style binary caches (like cache.nixos.org) can still be exposed via the new +interface, by doing on-the-fly (re)chunking/ingestion. + +Most likely, there will be multiple implementations of store, some storing +things locally, some exposing a "remote view". + +A few possible ones that come to mind are: + +- Local store +- SFTP/ GCP / S3 / HTTP +- NAR/NARInfo protocol: HTTP, S3 + +A remote Tvix store can be connected by simply connecting to its gRPC +interface, possibly using SSH tunneling, but there doesn't need to be an +additional "wire format" like the Nix `ssh(+ng)://` protocol. + +Settling on one interface allows composition of stores, meaning it becomes +possible to express substitution from remote caches as a proxy layer. + +It'd also be possible to write a FUSE implementation on top of the RPC +interface, exposing a lazily-substituting /nix/store mountpoint. Using this in +remote build context dramatically reduces the amount of data transferred to a +builder, as only the files really accessed during the build are substituted. + +## Figures + +```plantuml,format=svg +{{#include figures/component-flow.puml}} +``` + +[^1]: There have already been some discussions in the Nix community, to switch + to REAPI: + https://discourse.nixos.org/t/a-proposal-for-replacing-the-nix-worker-protocol/20926/22 diff --git a/tvix/docs/src/differences-drv-paths.md b/tvix/docs/src/differences-drv-paths.md new file mode 100644 index 0000000000..f1c90bd528 --- /dev/null +++ b/tvix/docs/src/differences-drv-paths.md @@ -0,0 +1,35 @@ +# .drvPath inconsistencies / Why .drvPath differs between Nix and Tvix + +Nix and Tvix currently use a different approach when it comes to tracking input +references, in order to build the right dependencies in advance. +Nix is using string contexts, whereas Tvix is doing reference scanning [^inbox-drvpath]. + +There are some real-life cases, for example during nixpkgs bootstrapping, where +multiple different fixed-output derivations are written to produce the same +hash. + +For example, bootstrap sources that are downloaded early are fetched using +a special "builder hack", in which the `builder` field of the derivation is +populated with the magic string `builtin:fetchurl` and the builder itself will +perform a fetch, with everything looking like a normal derivation to the user. + +These bootstrap sources are later on defined *again*, once `curl`is available, +to be downloaded using the standard pkgs.fetchtarball mechanism, but yielding +the *same* outputs (as the same files are being fetched). + +In our reference scanning implementation, this output scanning of FOD will +cause the path of the *first* derivation producing the given fixed output to be +stored in the `inputDrvs` field of the derivation, while Nix will point to the +derivation that was actually used. + +This doesn't cause any differences in the calculated *output paths*, as paths to +fixed-output derivations are replaced with a special +`fixed:out:${algo}:${digest}:${fodPath}` string that doesn't contain the "path +to the wrong derivation" anymore. + +As we haven't fully determined if our reference scanning approach is gonna work, +and comparing output paths is sufficient to determine equality of the build +instructions, this is left as a future work item. + + +[^inbox-drvpath]: https://inbox.tvl.su/depot/20230316120039.j4fkp3puzrtbjcpi@tp/T/#t diff --git a/tvix/docs/src/figures/component-flow.puml b/tvix/docs/src/figures/component-flow.puml new file mode 100644 index 0000000000..5b6d79b823 --- /dev/null +++ b/tvix/docs/src/figures/component-flow.puml @@ -0,0 +1,60 @@ +@startuml + +title Tvix build flow + +actor User +participant CLI +participant "Coordinator" as Coord +participant "Evaluator" as Eval +database Store +participant "Builder" as Build + +note over CLI,Eval + Typically runs locally on the invoking machine +end note +/ note over Store, Build + Can be either local or remote +end note + +User-->CLI: User initiates build of `hello` (analogous to `nix-build -f '' -A hello`) + +CLI-->Coord: CLI invokes coordinator + +Coord-->Eval: Sends message to start evaluation of `` (path lookup) with attribute `hello` +note right: The paths to the evaluator are local file system paths + +Coord<--Eval: Yields derivations to be built +note right + Immediately starts streaming derivations as they are instantiated across + the dependency graph so they can be built while the evaluation is still running. + + There are two types of build requests: One for regular "fire and forget" builds, + and another for IFD (import from derivation). + + These are distinct because IFD needs to be fed back into the evaluator for + further processing while a regular build does not. +end note + +loop while has more derivations + + Coord-->Store: Check if desired paths are in store + alt Store has path + Coord<--Store: Success response + else Store does not have path + Coord-->Build: Request derivation to be built + + alt Build failure + Coord<--Build: Fail response + note left: It's up to the coordinator whether to exit on build failure + else Build success + Build-->Store: Push outputs to store + Build<--Coord: Send success & pushed response + end + + end +end + +CLI<--Coord: Respond success/fail +User<--CLI: Exit success/fail + +@enduml diff --git a/tvix/docs/src/lang-version.md b/tvix/docs/src/lang-version.md new file mode 100644 index 0000000000..c288274c91 --- /dev/null +++ b/tvix/docs/src/lang-version.md @@ -0,0 +1,62 @@ +# Nix language version history + +The Nix language (“Nix”) has its own versioning mechanism independent from its +most popular implementation (“C++ Nix”): `builtins.langVersion`. It has been +increased whenever the language has changed syntactically or semantically in a +way that would not be introspectable otherwise. In particular, this does not +include addition (or removal) of `builtins`, as this can be introspected using +standard attribute set operations. + +Changes to `builtins.langVersion` are best found by viewing the git history of +C++ Nix using `git log -G 'mkInt\\(v, [0-9]\\)'` for `builtins.langVersion` < 7. +After that point `git log -G 'v\\.mkInt\\([0-9]+\\)'` should work. To reduce the +amount of false positives, specify the version number you are interested in +explicitly. + +## 1 + +The first version of the Nix language is its state at the point when +`builtins.langVersion` was added in [8b8ee53] which was first released +as part of C++ Nix 1.2. + +## 2 + +Nix version 2 changed the behavior of `builtins.storePath`: It would now [try to +substitute the given path if missing][storePath-substitute], instead of creating +an evaluation failure. `builtins.langVersion` was increased in [e36229d]. + +## 3 + +Nix version 3 changed the behavior of the `==` behavior. Strings would now be +considered [equal even if they had differing string context][equal-no-ctx]. + +## 4 + +Nix version 4 [added the float type][float] to the language. + +## 5 + +The [increase of `builtins.langVersion` to 5][langVersion-5] did not signify a +language change, but added support for structured attributes to the Nix daemon. +Eelco Dolstra writes as to what changed: + +> The structured attributes support. Unfortunately that's not so much a language +> change as a build.cc (i.e. daemon) change, but we don't really have a way to +> express that... + +Maybe `builtins.nixVersion` (which was added in version 1) should have been +used instead. In any case, the [only `langVersion` check][nixpkgs-langVersion-5] +in nixpkgs verifies a lower bound of 5. + +## 6 + +Nix version 6 added support for [comparing two lists][list-comparison]. + +[8b8ee53]: https://github.com/nixos/nix/commit/8b8ee53bc73769bb25d967ba259dabc9b23e2e6f +[storePath-substitute]: https://github.com/nixos/nix/commit/22d665019a3770148929b7504c73bcdbe025ec12 +[e36229d]: https://github.com/nixos/nix/commit/e36229d27f9ab508e0abf1892f3e8c263d2f8c58 +[equal-no-ctx]: https://github.com/nixos/nix/commit/ee7fe64c0ac00f2be11604a2a6509eb86dc19f0a +[float]: https://github.com/nixos/nix/commit/14ebde52893263930cdcde1406cc91cc5c42556f +[langVersion-5]: https://github.com/nixos/nix/commit/8191992c83bf4387b03c5fdaba818dc2b520462d +[list-comparison]: https://github.com/nixos/nix/commit/09471d2680292af48b2788108de56a8da755d661 +[nixpkgs-langVersion-5]: https://github.com/NixOS/nixpkgs/blob/d7ac3423d321b8b145ccdd1aed9dfdb280f5e391/pkgs/build-support/closure-info.nix#L11 diff --git a/tvix/docs/src/language-spec.md b/tvix/docs/src/language-spec.md new file mode 100644 index 0000000000..0ff1dc491e --- /dev/null +++ b/tvix/docs/src/language-spec.md @@ -0,0 +1,67 @@ +# Specification of the Nix Language + +WARNING: This document is a work in progress. Please keep an eye on +[`topic:nix-spec`](https://cl.tvl.fyi/q/topic:nix-spec) for ongoing +CLs. + +Nix is a general-purpose, functional programming language which this +document aims to describe. + +## Background + +Nix was designed and implemented as part of the [Nix package +manager](https://nixos.org/nix). It is primarily used for generating +so-called [*derivations*](#derivations), which are data structures +describing how to build a package. + +The language has been described in the +[thesis](https://edolstra.github.io/pubs/phd-thesis.pdf) introducing +the package manager, but only on a high-level. At the time of writing, +Nix is informally specified (via its only complete implementation in +the package manager) and there is no complete overview over its - +sometimes surprising - semantics. + +The primary project written in Nix is +[nixpkgs](https://github.com/NixOS/nixpkgs/). Uncertainties in the +process of writing this specification are resolved by investigating +patterns in nixpkgs, which we consider canonical. The code in nixpkgs +uses a reasonable subset of the features exposed by the current +implementation, some of which are *accidental*, and is thus more +useful for specifying how the language should work. + +## Introduction to Nix + +Nix is a general-purpose, partially lazy, functional programming +language which provides higher-order functions, type reflection, +primitive data types such as integers, strings and floats, and +compound data structures such as lists and attribute sets. + +Nix has syntactic sugar for common operations, such as those for +attribute sets, and also provides a wide range of built-in functions +which have organically accumulated over time. + +Nix has a variety of legacy features that are not in practical use, +but are documented in sections of this specification for the sake of +completeness. + +This document describes the syntax and abstract semantics of the Nix +language, but leaves out implementation details about how Nix can be +interpreted/compiled/analysed etc. + +### Program structure + +This section describes the semantic structure of Nix, and how it +relates to the rest of the specification. + +Each Nix program is a single [*expression*](#expressions) denoting a +[*value*](#values) (commonly a [*function*](#functions)). Each value +has a [*type*](#types), however this type is not statically known. + +Nix code is modularised through the use of the +[*import*](#builtins-import) built-in function. No separate module +system exists. + +In addition to chapters describing the building blocks mentioned +above, this specificiation also describes the [*syntax*](#syntax), the +available [built-in functions](#builtins), [*error handling*](#errors) +and known [*deficiencies*](#deficiencies) in the language. diff --git a/tvix/docs/src/value-pointer-equality.md b/tvix/docs/src/value-pointer-equality.md new file mode 100644 index 0000000000..d84efcb50c --- /dev/null +++ b/tvix/docs/src/value-pointer-equality.md @@ -0,0 +1,338 @@ +# Value Pointer Equality in Nix + +## Introduction + +It is a piece of semi-obscure Nix trivia that while functions are generally not +comparable, they can be compared in certain situations. This is actually quite an +important fact, as it is essential for the evaluation of nixpkgs: The attribute sets +used to represent platforms in nixpkgs, like `stdenv.buildPlatform`, contain functions, +such as `stdenv.buildPlatform.canExecute`. When writing cross logic, one invariably +ends up writing expressions that compare these sets, e.g. `stdenv.buildPlatform != +stdenv.hostPlatform`. Since attribute set equality is the equality of their attribute +names and values, we also end up comparing the functions within them. We can summarize +the relevant part of this behavior for platform comparisons in the following (true) +Nix expressions: + +* `stdenv.hostPlatform.canExecute != stdenv.hostPlatform.canExecute` +* `stdenv.hostPlatform == stdenv.hostPlatform` + +This fact is commonly referred to as pointer equality of functions (or function pointer +equality) which is not an entirely accurate name, as we'll see. This account of the +behavior states that, while functions are incomparable in general, they are comparable +insofar, as they occupy the same spot in an attribute set. + +However, [a maybe lesser known trick][puck-issue] is to write a function such as the +following to allow comparing functions: + +```nix +let + pointerEqual = lhs: rhs: { x = lhs; } == { x = rhs; }; + + f = name: "Hello, my name is ${name}"; + g = name: "Hello, my name is ${name}"; +in +[ + (pointerEqual f f) # => true + (pointerEqual f g) # => false +] +``` + +Here, clearly, the function is not contained at the same position in one and the same +attribute set, but at the same position in two entirely different attribute sets. We can +also see that we are not comparing the functions themselves (e.g. their AST), but +rather if they are the same individual value (i.e. pointer equal). + +To figure out the _actual_ semantics, we'll first have a look at how value (pointer) equality +works in C++ Nix, the only production ready Nix implementation currently available. + +## Nix (Pointer) Equality in C++ Nix + +TIP: The summary presented here is up-to-date as of 2023-06-27 and was tested +with Nix 2.3, 2.11 and 2.15. + +### `EvalState::eqValues` and `ExprOpEq::eval` + +The function implementing equality in C++ Nix is `EvalState::eqValues` which starts with +[the following bit of code][eqValues-pointer-eq]: + +```cpp +bool EvalState::eqValues(Value & v1, Value & v2) +{ + forceValue(v1); + forceValue(v2); + + /* !!! Hack to support some old broken code that relies on pointer + equality tests between sets. (Specifically, builderDefs calls + uniqList on a list of sets.) Will remove this eventually. */ + if (&v1 == &v2) return true; +``` + +So this immediately looks more like pointer equality of arbitrary *values* instead of functions. In fact +there is [no special code facilitating function equality][eqValues-function-eq]: + +```cpp + /* Functions are incomparable. */ + case nFunction: + return false; +``` + +So one takeaway of this is that pointer equality is neither dependent on functions nor attribute sets. +In fact, we can also write our `pointerEqual` function as: + +```nix +lhs: rhs: [ lhs ] == [ rhs ] +``` + +It's interesting that `EvalState::eqValues` forces the left and right-hand value before trying pointer +equality. It explains that `let x = throw ""; in x == x` does not evaluate successfully, but it is puzzling why +`let f = x: x; in f == f` does not return `true`. In fact, why do we need to wrap the values in a list or +attribute set at all for our `pointerEqual` function to work? + +The answer lies in [the code that evaluates `ExprOpEq`][ExprOpEq], +i.e. an expression involving the `==` operator: + +```cpp +void ExprOpEq::eval(EvalState & state, Env & env, Value & v) +{ + Value v1; e1->eval(state, env, v1); + Value v2; e2->eval(state, env, v2); + v.mkBool(state.eqValues(v1, v2)); +} +``` + +As you can see, two _distinct_ `Value` structs are created, so they can never be pointer equal even +if the `union` inside points to the same bit of memory. We can thus understand what actually happens +when we check the equality of an attribute set (or list), by looking at the following expression: + +```nix +let + x = { name = throw "nameless"; }; +in + +x == x # => causes an evaluation error +``` + +Because `x` can't be pointer equal, as it'll end up in the distinct structs `v1` and `v2`, it needs to be compared +by value. For this reason, the `name` attribute will be forced and an evaluation error caused. +If we rewrite the expression to use… + +```nix +{ inherit x; } == { inherit x; } # => true +``` + +…, it'll work: The two attribute sets are compared by value, but their `x` attribute turns out to be pointer +equal _after_ forcing it. This does not throw, since forcing an attribute set does not force its attributes' +values (as forcing a list doesn't force its elements). + +As we have seen, pointer equality can not only be used to compare function values, but also other +otherwise incomparable values, such as lists and attribute sets that would cause an evaluation +error if they were forced recursively. We can even switch out the `throw` for an `abort`. The limitation is +of course that we need to use a value that behaves differently depending on whether it is forced +“normally” (think `builtins.seq`) or recursively (think `builtins.deepSeq`), so thunks will generally be +evaluated before pointer equality can kick into effect. + +### Other Comparisons + +The `!=` operator uses `EvalState::eqValues` internally as well, so it behaves exactly as `!(a == b)`. + +The `>`, `<`, `>=` and `<=` operators all desugar to [CompareValues][] +eventually which generally looks at the value type before comparing. It does, +however, rely on `EvalState::eqValues` for list comparisons +([introduced in Nix 2.5][nix-2.5-changelog]), so it is possible to compare lists +with e.g. functions in them, as long as they are equal by pointer: + +```nix +let + f = x: x + 42; +in + +[ + ([ f 2 ] > [ f 1 ]) # => true + ([ f 2 ] > [ (x: x) 1]) # => error: cannot compare a function with a function + ([ f ] > [ f ]) # => false +] +``` + +Finally, since `builtins.elem` relies on `EvalState::eqValues`, you can check for +a function by pointer equality: + +```nix +let + f = x: f x; +in +builtins.elem f [ f 2 3 ] # => true +``` + +### Pointer Equality Preserving Nix Operations + +We have seen that pointer equality is established by comparing the memory +location of two C++ `Value` structs. But how does this _representation_ relate +to Nix values _themselves_ (in the sense of a platonic ideal if you will)? In +Nix, values have no identity (ignoring `unsafeGetAttrPos`) or memory location. + +Since Nix is purely functional, values can't be mutated, so they need to be +copied frequently. With Nix being garbage collected, there is no strong +expectation when a copy is made, we probably just hope it is done as seldomly as +possible to save on memory. With pointer equality leaking the memory location of +the `Value` structs to an extent, it is now suddenly our business to know +exactly _when_ a copy of a value is made. + +Evaluation in C++ Nix mainly proceeds along the following [two +functions][eval-maybeThunk]. + +```cpp +struct Expr +{ + /* … */ + virtual void eval(EvalState & state, Env & env, Value & v); + virtual Value * maybeThunk(EvalState & state, Env & env); + /* … */ +}; +``` + +As you can see, `Expr::eval` always takes a reference to a struct _allocated by +the caller_ to place the evaluation result in. Anything that is processed using +`Expr::eval` will be a copy of the `Value` struct even if the value before and +after are the same. + +`Expr::maybeThunk`, on the other hand, returns a pointer to a `Value` which may +already exist or be newly allocated. So, if evaluation passes through `maybeThunk`, +Nix values _can_ retain their pointer equality. Since Nix is lazy, a lot of +evaluation needs to be thunked and pass through `maybeThunk`—knowing under what +circumstances `maybeThunk` will return a pointer to an already existing `Value` +struct thus means knowing the circumstances under which pointer equality of a +Nix value will be preserved in C++ Nix. + +The [default case][maybeThunk-default] of `Expr::maybeThunk` allocates a new +`Value` which holds the delayed computation of the `Expr` as a thunk: + +```cpp + +Value * Expr::maybeThunk(EvalState & state, Env & env) +{ + Value * v = state.allocValue(); + mkThunk(*v, env, this); + return v; +} +``` + +Consequently, only special cased expressions could preserve pointer equality. +These are `ExprInt`, `ExprFloat`, `ExprString`, `ExprPath`—all of which relate +to creating new values—and [finally, `ExprVar`][maybeThunk-ExprVar]: + +```cpp +Value * ExprVar::maybeThunk(EvalState & state, Env & env) +{ + Value * v = state.lookupVar(&env, *this, true); + /* The value might not be initialised in the environment yet. + In that case, ignore it. */ + if (v) { state.nrAvoided++; return v; } + return Expr::maybeThunk(state, env); +} +``` + +Here we may actually return an already existing `Value` struct. Consequently, +accessing a value from the scope is the only thing you can do with a value in +C++ Nix that preserves its pointer equality, as the following example shows: +For example, using the select operator to get a value from an attribute set +or even passing a value trough the identity function invalidates its pointer +equality to itself (or rather, its former self). + +```nix +let + pointerEqual = a: b: [ a ] == [ b ]; + id = x: x; + + f = _: null; + x = { inherit f; }; + y = { inherit f; }; +in + +[ + (pointerEqual f f) # => true + + (pointerEqual f (id f)) # => false + + (pointerEqual x.f y.f) # => false + (pointerEqual x.f x.f) # => false + + (pointerEqual x x) # => true + (pointerEqual x y) # => true +] +``` + +In the last two cases, the example also shows that there is another way to +preserve pointer equality: Storing a value in an attribute set (or list) +preserves its pointer equality even if the structure holding it is modified in +some way (as long as the value we care about is left untouched). The catch is, +of course, that there is no way to get the value out of the structure while +preserving pointer equality (which requires using the select operator or a call +to `builtins.elemAt`). + +We initially illustrated the issue of pointer equality using the following +true expressions: + +* `stdenv.hostPlatform.canExecute != stdenv.hostPlatform.canExecute` +* `stdenv.hostPlatform == stdenv.hostPlatform` + +We can now add a third one, illustrating that pointer equality is invalidated +by select operations: + +* `[ stdenv.hostPlatform.canExecute ] != [ stdenv.hostPlatform.canExecute ]` + +To summarize, pointer equality is established on the memory location of the +`Value` struct in C++ Nix. Except for simple values (`int`, `bool`, …), +the `Value` struct only consists of a pointer to the actual representation +of the value (attribute set, list, function, …) and is thus cheap to copy. +In practice, this happens when a value passes through the evaluation of +almost any Nix expression. Only in the select cases described above +a value preserves its pointer equality despite being unchanged by an +expression. We can call this behavior *exterior pointer equality*. + +## Summary + +When comparing two Nix values, we must force both of them (non-recursively!), but are +allowed to short-circuit the comparison based on pointer equality, i.e. if they are at +the same exact value in memory, they are deemed equal immediately. This is completely +independent of what type of value they are. If they are not pointer equal, they are +(recursively) compared by value as expected. + +However, when evaluating the Nix expression `a == b`, we *must* invoke our implementation's +value equality function in a way that `a` and `b` themselves can never be deemed pointer equal. +Any values we encounter while recursing during the equality check must be compared by +pointer as described above, though. + +## Stability of the Feature + +Keen readers will have noticed the following comment in the C++ Nix source code, +indicating that pointer comparison may be removed in the future. + +```cpp + /* !!! Hack to support some old broken code that relies on pointer + equality tests between sets. (Specifically, builderDefs calls + uniqList on a list of sets.) Will remove this eventually. */ +``` + +Now, I can't speak for the upstream C++ Nix developers, but sure can speculate. +As already pointed out, this feature is currently needed for evaluating nixpkgs. +While its use could realistically be eliminated (only bothersome spot is probably +the `emulator` function, but that should also be doable), removing the feature +would seriously compromise C++ Nix's ability to evaluate historical nixpkgs +revision which is arguably a strength of the system. + +Another indication that it is likely here to stay is that it has already +[outlived builderDefs][], even though +it was (apparently) reintroduced just for this use case. More research into +the history of this feature would still be prudent, especially the reason for +its original introduction (maybe performance?). + +[puck-issue]: https://github.com/NixOS/nix/issues/3371 +[eqValues-pointer-eq]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/eval.cc#L2401-L2404 +[eqValues-function-eq]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/eval.cc#L2458-L2460 +[ExprOpEq]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/eval.cc#L1822-L1827 +[outlived builderDefs]: https://github.com/NixOS/nixpkgs/issues/4210 +[CompareValues]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/primops.cc#L569-L610 +[nix-2.5-changelog]: https://nixos.org/manual/nix/stable/release-notes/rl-2.5.html +[eval-maybeThunk]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/nixexpr.hh#L161-L162 +[maybeThunk-default]: https://github.com/NixOS/nix/blob/8e770dac9f68162cfbb368e53f928df491babff3/src/libexpr/eval.cc#L1076-L1081 +[maybeThunk-ExprVar]: https://github.com/NixOS/nix/blob/8e770dac9f68162cfbb368e53f928df491babff3/src/libexpr/eval.cc#L1084-L1091 diff --git a/tvix/docs/value-pointer-equality.md b/tvix/docs/value-pointer-equality.md deleted file mode 100644 index d84efcb50c..0000000000 --- a/tvix/docs/value-pointer-equality.md +++ /dev/null @@ -1,338 +0,0 @@ -# Value Pointer Equality in Nix - -## Introduction - -It is a piece of semi-obscure Nix trivia that while functions are generally not -comparable, they can be compared in certain situations. This is actually quite an -important fact, as it is essential for the evaluation of nixpkgs: The attribute sets -used to represent platforms in nixpkgs, like `stdenv.buildPlatform`, contain functions, -such as `stdenv.buildPlatform.canExecute`. When writing cross logic, one invariably -ends up writing expressions that compare these sets, e.g. `stdenv.buildPlatform != -stdenv.hostPlatform`. Since attribute set equality is the equality of their attribute -names and values, we also end up comparing the functions within them. We can summarize -the relevant part of this behavior for platform comparisons in the following (true) -Nix expressions: - -* `stdenv.hostPlatform.canExecute != stdenv.hostPlatform.canExecute` -* `stdenv.hostPlatform == stdenv.hostPlatform` - -This fact is commonly referred to as pointer equality of functions (or function pointer -equality) which is not an entirely accurate name, as we'll see. This account of the -behavior states that, while functions are incomparable in general, they are comparable -insofar, as they occupy the same spot in an attribute set. - -However, [a maybe lesser known trick][puck-issue] is to write a function such as the -following to allow comparing functions: - -```nix -let - pointerEqual = lhs: rhs: { x = lhs; } == { x = rhs; }; - - f = name: "Hello, my name is ${name}"; - g = name: "Hello, my name is ${name}"; -in -[ - (pointerEqual f f) # => true - (pointerEqual f g) # => false -] -``` - -Here, clearly, the function is not contained at the same position in one and the same -attribute set, but at the same position in two entirely different attribute sets. We can -also see that we are not comparing the functions themselves (e.g. their AST), but -rather if they are the same individual value (i.e. pointer equal). - -To figure out the _actual_ semantics, we'll first have a look at how value (pointer) equality -works in C++ Nix, the only production ready Nix implementation currently available. - -## Nix (Pointer) Equality in C++ Nix - -TIP: The summary presented here is up-to-date as of 2023-06-27 and was tested -with Nix 2.3, 2.11 and 2.15. - -### `EvalState::eqValues` and `ExprOpEq::eval` - -The function implementing equality in C++ Nix is `EvalState::eqValues` which starts with -[the following bit of code][eqValues-pointer-eq]: - -```cpp -bool EvalState::eqValues(Value & v1, Value & v2) -{ - forceValue(v1); - forceValue(v2); - - /* !!! Hack to support some old broken code that relies on pointer - equality tests between sets. (Specifically, builderDefs calls - uniqList on a list of sets.) Will remove this eventually. */ - if (&v1 == &v2) return true; -``` - -So this immediately looks more like pointer equality of arbitrary *values* instead of functions. In fact -there is [no special code facilitating function equality][eqValues-function-eq]: - -```cpp - /* Functions are incomparable. */ - case nFunction: - return false; -``` - -So one takeaway of this is that pointer equality is neither dependent on functions nor attribute sets. -In fact, we can also write our `pointerEqual` function as: - -```nix -lhs: rhs: [ lhs ] == [ rhs ] -``` - -It's interesting that `EvalState::eqValues` forces the left and right-hand value before trying pointer -equality. It explains that `let x = throw ""; in x == x` does not evaluate successfully, but it is puzzling why -`let f = x: x; in f == f` does not return `true`. In fact, why do we need to wrap the values in a list or -attribute set at all for our `pointerEqual` function to work? - -The answer lies in [the code that evaluates `ExprOpEq`][ExprOpEq], -i.e. an expression involving the `==` operator: - -```cpp -void ExprOpEq::eval(EvalState & state, Env & env, Value & v) -{ - Value v1; e1->eval(state, env, v1); - Value v2; e2->eval(state, env, v2); - v.mkBool(state.eqValues(v1, v2)); -} -``` - -As you can see, two _distinct_ `Value` structs are created, so they can never be pointer equal even -if the `union` inside points to the same bit of memory. We can thus understand what actually happens -when we check the equality of an attribute set (or list), by looking at the following expression: - -```nix -let - x = { name = throw "nameless"; }; -in - -x == x # => causes an evaluation error -``` - -Because `x` can't be pointer equal, as it'll end up in the distinct structs `v1` and `v2`, it needs to be compared -by value. For this reason, the `name` attribute will be forced and an evaluation error caused. -If we rewrite the expression to use… - -```nix -{ inherit x; } == { inherit x; } # => true -``` - -…, it'll work: The two attribute sets are compared by value, but their `x` attribute turns out to be pointer -equal _after_ forcing it. This does not throw, since forcing an attribute set does not force its attributes' -values (as forcing a list doesn't force its elements). - -As we have seen, pointer equality can not only be used to compare function values, but also other -otherwise incomparable values, such as lists and attribute sets that would cause an evaluation -error if they were forced recursively. We can even switch out the `throw` for an `abort`. The limitation is -of course that we need to use a value that behaves differently depending on whether it is forced -“normally” (think `builtins.seq`) or recursively (think `builtins.deepSeq`), so thunks will generally be -evaluated before pointer equality can kick into effect. - -### Other Comparisons - -The `!=` operator uses `EvalState::eqValues` internally as well, so it behaves exactly as `!(a == b)`. - -The `>`, `<`, `>=` and `<=` operators all desugar to [CompareValues][] -eventually which generally looks at the value type before comparing. It does, -however, rely on `EvalState::eqValues` for list comparisons -([introduced in Nix 2.5][nix-2.5-changelog]), so it is possible to compare lists -with e.g. functions in them, as long as they are equal by pointer: - -```nix -let - f = x: x + 42; -in - -[ - ([ f 2 ] > [ f 1 ]) # => true - ([ f 2 ] > [ (x: x) 1]) # => error: cannot compare a function with a function - ([ f ] > [ f ]) # => false -] -``` - -Finally, since `builtins.elem` relies on `EvalState::eqValues`, you can check for -a function by pointer equality: - -```nix -let - f = x: f x; -in -builtins.elem f [ f 2 3 ] # => true -``` - -### Pointer Equality Preserving Nix Operations - -We have seen that pointer equality is established by comparing the memory -location of two C++ `Value` structs. But how does this _representation_ relate -to Nix values _themselves_ (in the sense of a platonic ideal if you will)? In -Nix, values have no identity (ignoring `unsafeGetAttrPos`) or memory location. - -Since Nix is purely functional, values can't be mutated, so they need to be -copied frequently. With Nix being garbage collected, there is no strong -expectation when a copy is made, we probably just hope it is done as seldomly as -possible to save on memory. With pointer equality leaking the memory location of -the `Value` structs to an extent, it is now suddenly our business to know -exactly _when_ a copy of a value is made. - -Evaluation in C++ Nix mainly proceeds along the following [two -functions][eval-maybeThunk]. - -```cpp -struct Expr -{ - /* … */ - virtual void eval(EvalState & state, Env & env, Value & v); - virtual Value * maybeThunk(EvalState & state, Env & env); - /* … */ -}; -``` - -As you can see, `Expr::eval` always takes a reference to a struct _allocated by -the caller_ to place the evaluation result in. Anything that is processed using -`Expr::eval` will be a copy of the `Value` struct even if the value before and -after are the same. - -`Expr::maybeThunk`, on the other hand, returns a pointer to a `Value` which may -already exist or be newly allocated. So, if evaluation passes through `maybeThunk`, -Nix values _can_ retain their pointer equality. Since Nix is lazy, a lot of -evaluation needs to be thunked and pass through `maybeThunk`—knowing under what -circumstances `maybeThunk` will return a pointer to an already existing `Value` -struct thus means knowing the circumstances under which pointer equality of a -Nix value will be preserved in C++ Nix. - -The [default case][maybeThunk-default] of `Expr::maybeThunk` allocates a new -`Value` which holds the delayed computation of the `Expr` as a thunk: - -```cpp - -Value * Expr::maybeThunk(EvalState & state, Env & env) -{ - Value * v = state.allocValue(); - mkThunk(*v, env, this); - return v; -} -``` - -Consequently, only special cased expressions could preserve pointer equality. -These are `ExprInt`, `ExprFloat`, `ExprString`, `ExprPath`—all of which relate -to creating new values—and [finally, `ExprVar`][maybeThunk-ExprVar]: - -```cpp -Value * ExprVar::maybeThunk(EvalState & state, Env & env) -{ - Value * v = state.lookupVar(&env, *this, true); - /* The value might not be initialised in the environment yet. - In that case, ignore it. */ - if (v) { state.nrAvoided++; return v; } - return Expr::maybeThunk(state, env); -} -``` - -Here we may actually return an already existing `Value` struct. Consequently, -accessing a value from the scope is the only thing you can do with a value in -C++ Nix that preserves its pointer equality, as the following example shows: -For example, using the select operator to get a value from an attribute set -or even passing a value trough the identity function invalidates its pointer -equality to itself (or rather, its former self). - -```nix -let - pointerEqual = a: b: [ a ] == [ b ]; - id = x: x; - - f = _: null; - x = { inherit f; }; - y = { inherit f; }; -in - -[ - (pointerEqual f f) # => true - - (pointerEqual f (id f)) # => false - - (pointerEqual x.f y.f) # => false - (pointerEqual x.f x.f) # => false - - (pointerEqual x x) # => true - (pointerEqual x y) # => true -] -``` - -In the last two cases, the example also shows that there is another way to -preserve pointer equality: Storing a value in an attribute set (or list) -preserves its pointer equality even if the structure holding it is modified in -some way (as long as the value we care about is left untouched). The catch is, -of course, that there is no way to get the value out of the structure while -preserving pointer equality (which requires using the select operator or a call -to `builtins.elemAt`). - -We initially illustrated the issue of pointer equality using the following -true expressions: - -* `stdenv.hostPlatform.canExecute != stdenv.hostPlatform.canExecute` -* `stdenv.hostPlatform == stdenv.hostPlatform` - -We can now add a third one, illustrating that pointer equality is invalidated -by select operations: - -* `[ stdenv.hostPlatform.canExecute ] != [ stdenv.hostPlatform.canExecute ]` - -To summarize, pointer equality is established on the memory location of the -`Value` struct in C++ Nix. Except for simple values (`int`, `bool`, …), -the `Value` struct only consists of a pointer to the actual representation -of the value (attribute set, list, function, …) and is thus cheap to copy. -In practice, this happens when a value passes through the evaluation of -almost any Nix expression. Only in the select cases described above -a value preserves its pointer equality despite being unchanged by an -expression. We can call this behavior *exterior pointer equality*. - -## Summary - -When comparing two Nix values, we must force both of them (non-recursively!), but are -allowed to short-circuit the comparison based on pointer equality, i.e. if they are at -the same exact value in memory, they are deemed equal immediately. This is completely -independent of what type of value they are. If they are not pointer equal, they are -(recursively) compared by value as expected. - -However, when evaluating the Nix expression `a == b`, we *must* invoke our implementation's -value equality function in a way that `a` and `b` themselves can never be deemed pointer equal. -Any values we encounter while recursing during the equality check must be compared by -pointer as described above, though. - -## Stability of the Feature - -Keen readers will have noticed the following comment in the C++ Nix source code, -indicating that pointer comparison may be removed in the future. - -```cpp - /* !!! Hack to support some old broken code that relies on pointer - equality tests between sets. (Specifically, builderDefs calls - uniqList on a list of sets.) Will remove this eventually. */ -``` - -Now, I can't speak for the upstream C++ Nix developers, but sure can speculate. -As already pointed out, this feature is currently needed for evaluating nixpkgs. -While its use could realistically be eliminated (only bothersome spot is probably -the `emulator` function, but that should also be doable), removing the feature -would seriously compromise C++ Nix's ability to evaluate historical nixpkgs -revision which is arguably a strength of the system. - -Another indication that it is likely here to stay is that it has already -[outlived builderDefs][], even though -it was (apparently) reintroduced just for this use case. More research into -the history of this feature would still be prudent, especially the reason for -its original introduction (maybe performance?). - -[puck-issue]: https://github.com/NixOS/nix/issues/3371 -[eqValues-pointer-eq]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/eval.cc#L2401-L2404 -[eqValues-function-eq]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/eval.cc#L2458-L2460 -[ExprOpEq]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/eval.cc#L1822-L1827 -[outlived builderDefs]: https://github.com/NixOS/nixpkgs/issues/4210 -[CompareValues]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/primops.cc#L569-L610 -[nix-2.5-changelog]: https://nixos.org/manual/nix/stable/release-notes/rl-2.5.html -[eval-maybeThunk]: https://github.com/NixOS/nix/blob/3c618c43c6044eda184df235c193877529e951cb/src/libexpr/nixexpr.hh#L161-L162 -[maybeThunk-default]: https://github.com/NixOS/nix/blob/8e770dac9f68162cfbb368e53f928df491babff3/src/libexpr/eval.cc#L1076-L1081 -[maybeThunk-ExprVar]: https://github.com/NixOS/nix/blob/8e770dac9f68162cfbb368e53f928df491babff3/src/libexpr/eval.cc#L1084-L1091 -- cgit 1.4.1