about summary refs log tree commit diff
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2023-05-26T18·38+0200
committerclbot <clbot@tvl.fyi>2023-05-26T22·35+0000
commit3b33c19a9c43cf66c2d28ffa3d49bb6e8757d9b1 (patch)
tree27fa4f35dfe0e069bb1e79bf034ec3ff51bc0157
parenta4d0a80466016281c3f2b90a1ade66338d258153 (diff)
fix(tvix): don't call function eagerly in genList, map & mapAttrs r/6207
mapAttrs, map and genList call Nix functions provided by the caller and
store the result of applying them in a Nix data structure that does not
force all of its contents when forced itself. This means that when such
a builtin application is forced, the Nix function calls performed by the
builtin should not be forced: They may be forced later, but it is also
possible that they will never be forced, e.g. in

    builtins.length (builtins.map (builtins.add 2) [ 1 2 3 ])

it is not necessary to compute a single application of builtins.add.

Since request_call_with immediately performs the function call
requested, Tvix would compute function applications unnecessarily before
this change. Because this was not followed by a request_force, the
impact of this was relatively low in Nix code (most functions return a
new thunk after being applied), but it was enough to cause a lot of
bogus builtins.trace applications when evaluating anything from
`lib.modules`. The newly added test includes many cases where Tvix
previously incorrectly applied a builtin, breaking a working expression.

To fix this we add a new helper to construct a Thunk performing a
function application at runtime from a function and argument given as
`Value`s. This mimics the compiler's compile_apply(), but does itself
not require a compiler, since the necessary Lambda can be constructed
independently.

I also looked into other builtins that call a Nix function to verify
that they don't exhibit such a problem:

- Many builtins immediately use the resulting value in a way that makes
  it necessary to compute all the function calls they do as soon as
  the outer builtin application is forced:

  * all
  * any
  * filter
  * groupBy
  * partition

- concatMap needs to (shallowly) force the returned list for
  concatenation.

- foldl' is strict in the application of `op` (I added a comment that
  makes this explicit).

- genericClosure needs to (shallowly) force the resulting list and some
  keys of the attribute sets inside.

Resolves b/272.

Change-Id: I1fa53f744bcffc035da84c1f97ed25d146830446
Reviewed-on: https://cl.tvl.fyi/c/depot/+/8651
Autosubmit: sterni <sternenseemann@systemli.org>
Tested-by: BuildkiteCI
Reviewed-by: tazjin <tazjin@tvl.su>
-rw-r--r--tvix/eval/src/builtins/mod.rs30
-rw-r--r--tvix/eval/src/tests/tvix_tests/eval-okay-builtins-thunked-function-calls.exp1
-rw-r--r--tvix/eval/src/tests/tvix_tests/eval-okay-builtins-thunked-function-calls.nix31
-rw-r--r--tvix/eval/src/value/thunk.rs31
4 files changed, 88 insertions, 5 deletions
diff --git a/tvix/eval/src/builtins/mod.rs b/tvix/eval/src/builtins/mod.rs
index d774e88138..53ad6f3f8e 100644
--- a/tvix/eval/src/builtins/mod.rs
+++ b/tvix/eval/src/builtins/mod.rs
@@ -17,7 +17,7 @@ use crate::vm::generators::{self, GenCo};
 use crate::warnings::WarningKind;
 use crate::{
     errors::ErrorKind,
-    value::{CoercionKind, NixAttrs, NixList, NixString, SharedThunkSet, Value},
+    value::{CoercionKind, NixAttrs, NixList, NixString, SharedThunkSet, Thunk, Value},
 };
 
 use self::versions::{VersionPart, VersionPartsIter};
@@ -307,6 +307,9 @@ mod pure_builtins {
         let mut nul = nul;
         let list = list.to_list()?;
         for val in list {
+            // Every call of `op` is forced immediately, but `nul` is not, see
+            // https://github.com/NixOS/nix/blob/940e9eb8/src/libexpr/primops.cc#L3069-L3070C36
+            // and our tests for foldl'.
             nul = generators::request_call_with(&co, op.clone(), [nul, val]).await;
             nul = generators::request_force(&co, nul).await;
         }
@@ -397,9 +400,15 @@ mod pure_builtins {
     ) -> Result<Value, ErrorKind> {
         let mut out = imbl::Vector::<Value>::new();
         let len = length.as_int()?;
+        // the best span we can get…
+        let span = generators::request_span(&co).await;
 
         for i in 0..len {
-            let val = generators::request_call_with(&co, generator.clone(), [i.into()]).await;
+            let val = Value::Thunk(Thunk::new_suspended_call(
+                generator.clone(),
+                i.into(),
+                span.clone(),
+            ));
             out.push_back(val);
         }
 
@@ -551,8 +560,11 @@ mod pure_builtins {
     async fn builtin_map(co: GenCo, f: Value, list: Value) -> Result<Value, ErrorKind> {
         let mut out = imbl::Vector::<Value>::new();
 
+        // the best span we can get…
+        let span = generators::request_span(&co).await;
+
         for val in list.to_list()? {
-            let result = generators::request_call_with(&co, f.clone(), [val]).await;
+            let result = Value::Thunk(Thunk::new_suspended_call(f.clone(), val, span.clone()));
             out.push_back(result)
         }
 
@@ -564,9 +576,17 @@ mod pure_builtins {
         let attrs = attrs.to_attrs()?;
         let mut out = imbl::OrdMap::new();
 
+        // the best span we can get…
+        let span = generators::request_span(&co).await;
+
         for (key, value) in attrs.into_iter() {
-            let result =
-                generators::request_call_with(&co, f.clone(), [key.clone().into(), value]).await;
+            let result = Value::Thunk(Thunk::new_suspended_call(
+                f.clone(),
+                key.clone().into(),
+                span.clone(),
+            ));
+            let result = Value::Thunk(Thunk::new_suspended_call(result, value, span.clone()));
+
             out.insert(key, result);
         }
 
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-thunked-function-calls.exp b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-thunked-function-calls.exp
new file mode 100644
index 0000000000..3d4204d5a8
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-thunked-function-calls.exp
@@ -0,0 +1 @@
+[ 2 [ "Hans" "James" "Joachim" ] 2 [ "Clawdia" "Mynheer" ] 981 3 2 2 ]
diff --git a/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-thunked-function-calls.nix b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-thunked-function-calls.nix
new file mode 100644
index 0000000000..d96ddb3bd1
--- /dev/null
+++ b/tvix/eval/src/tests/tvix_tests/eval-okay-builtins-thunked-function-calls.nix
@@ -0,0 +1,31 @@
+[
+  # This is independent of builtins
+  (builtins.length [ (builtins.throw "Ferge") (builtins.throw "Wehsal") ])
+  (builtins.attrNames {
+    Hans = throw "Castorp";
+    Joachim = throw "Ziemßen";
+    James = "Tienappel";
+  })
+
+  (builtins.length (builtins.map builtins.throw [ "Settembrini" "Naphta" ]))
+
+  (builtins.attrNames (builtins.mapAttrs builtins.throw {
+    Clawdia = "Chauchat";
+    Mynheer = "Peeperkorn";
+  }))
+
+  (builtins.length (builtins.genList (builtins.add "Marusja") 981))
+  (builtins.length (builtins.genList builtins.throw 3))
+
+  # These are hard to get wrong since the outer layer needs to be forced anyways
+  (builtins.length (builtins.genericClosure {
+    startSet = [
+      { key = 1; initial = true; }
+    ];
+    operator = { key, initial, ... }:
+      if initial
+      then [ { key = key - 1; initial = false;  value = throw "lol"; } ]
+      else [ ];
+  }))
+  (builtins.length (builtins.concatMap (m: [ m (builtins.throw m) ]) [ "Marusja" ]))
+]
diff --git a/tvix/eval/src/value/thunk.rs b/tvix/eval/src/value/thunk.rs
index 1acb50a94b..9c048d40b4 100644
--- a/tvix/eval/src/value/thunk.rs
+++ b/tvix/eval/src/value/thunk.rs
@@ -27,6 +27,7 @@ use std::{
 
 use crate::{
     errors::ErrorKind,
+    opcode::OpCode,
     spans::LightSpan,
     upvalues::Upvalues,
     value::Closure,
@@ -124,6 +125,36 @@ impl Thunk {
         )))))
     }
 
+    /// Helper function to create a [`Thunk`] that calls a function given as the
+    /// [`Value`] `callee` with the argument `arg` when it is forced. This is
+    /// particularly useful in builtin implementations if the result of calling
+    /// a function does not need to be forced immediately, because e.g. it is
+    /// stored in an attribute set.
+    pub fn new_suspended_call(callee: Value, arg: Value, light_span: LightSpan) -> Self {
+        let mut lambda = Lambda::default();
+        let span = light_span.span();
+
+        let arg_idx = lambda.chunk().push_constant(arg);
+        let f_idx = lambda.chunk().push_constant(callee);
+
+        // This is basically a recreation of compile_apply():
+        // We need to push the argument onto the stack and then the function.
+        // The function (not the argument) needs to be forced before calling.
+        lambda.chunk.push_op(OpCode::OpConstant(arg_idx), span);
+        lambda.chunk().push_op(OpCode::OpConstant(f_idx), span);
+        lambda.chunk.push_op(OpCode::OpForce, span);
+        lambda.chunk.push_op(OpCode::OpCall, span);
+
+        // Inform the VM that the chunk has ended
+        lambda.chunk.push_op(OpCode::OpReturn, span);
+
+        Thunk(Rc::new(RefCell::new(ThunkRepr::Suspended {
+            upvalues: Rc::new(Upvalues::with_capacity(0)),
+            lambda: Rc::new(lambda),
+            light_span,
+        })))
+    }
+
     fn prepare_blackhole(&self, forced_at: LightSpan) -> ThunkRepr {
         match &*self.0.borrow() {
             ThunkRepr::Suspended {