about summary refs log tree commit diff
diff options
context:
space:
mode:
authorsterni <sternenseemann@systemli.org>2022-09-15T14·38+0200
committersterni <sternenseemann@systemli.org>2022-09-15T15·52+0000
commit4eb33e82ff418de0ad811a0d3afffac84831fac4 (patch)
tree8007303407c8ea8fd35d251e75001ced0a0ac2c2
parentbcd7e520f076fb7a6b26805663fac2d70c677bc8 (diff)
fix(tvix/eval): coerce string interpolation parts to string r/4860
With this puzzle piece of string compilation in place, `compile_str`
becomes less redundant, as every part now needs to be compiled the same.
The thunking logic becomes a bit trickier, since we need to thunk even
in the case of `count == 1` if the single part is interpolating.
Splitting the inner (shared) code in a separate function turned out to
be easier for making rustc content.

Change-Id: I6a554ca599926ae5907d7acffce349c9616f568f
Reviewed-on: https://cl.tvl.fyi/c/depot/+/6582
Reviewed-by: tazjin <tazjin@tvl.su>
Tested-by: BuildkiteCI
-rw-r--r--tvix/eval/src/compiler/mod.rs80
-rw-r--r--tvix/eval/src/opcode.rs3
-rw-r--r--tvix/eval/src/vm.rs12
3 files changed, 59 insertions, 36 deletions
diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs
index a624cf11c74d..f0c9d0dc432f 100644
--- a/tvix/eval/src/compiler/mod.rs
+++ b/tvix/eval/src/compiler/mod.rs
@@ -257,49 +257,59 @@ impl Compiler<'_, '_> {
         self.emit_constant(value, &node);
     }
 
-    fn compile_str(&mut self, slot: LocalIdx, node: ast::Str) {
-        let parts = node.normalized_parts();
+    /// Helper that compiles the given string parts strictly. The caller
+    /// (`compile_str`) needs to figure out if the result of compiling this
+    /// needs to be thunked or not.
+    fn compile_str_parts(
+        &mut self,
+        slot: LocalIdx,
+        parent_node: &ast::Str,
+        parts: Vec<ast::InterpolPart<String>>,
+    ) {
         let count = parts.len();
 
-        if count != 1 {
-            self.thunk(slot, &node, |c, n, s| {
-                // The string parts are produced in literal order, however
-                // they need to be reversed on the stack in order to
-                // efficiently create the real string in case of
-                // interpolation.
-                for part in parts.into_iter().rev() {
-                    match part {
-                        // Interpolated expressions are compiled as normal and
-                        // dealt with by the VM before being assembled into
-                        // the final string. We need to force them here,
-                        // so OpInterpolate definitely has a string to consume.
-                        // TODO(sterni): coerce to string
-                        ast::InterpolPart::Interpolation(ipol) => {
-                            c.compile(s, ipol.expr().unwrap());
-                            c.emit_force(&ipol);
-                        }
-
-                        ast::InterpolPart::Literal(lit) => {
-                            c.emit_constant(Value::String(lit.into()), n);
-                        }
-                    }
-                }
-
-                c.push_op(OpCode::OpInterpolate(Count(count)), n);
-            });
-        } else {
-            match &parts[0] {
-                // Since we only have a single part, it is okay if this yields a thunk
-                // TODO(sterni): coerce to string
-                ast::InterpolPart::Interpolation(node) => {
-                    self.compile(slot, node.expr().unwrap());
+        // The string parts are produced in literal order, however
+        // they need to be reversed on the stack in order to
+        // efficiently create the real string in case of
+        // interpolation.
+        for part in parts.into_iter().rev() {
+            match part {
+                // Interpolated expressions are compiled as normal and
+                // dealt with by the VM before being assembled into
+                // the final string. We need to coerce them here,
+                // so OpInterpolate definitely has a string to consume.
+                ast::InterpolPart::Interpolation(ipol) => {
+                    self.compile(slot, ipol.expr().unwrap());
+                    // implicitly forces as well
+                    self.push_op(OpCode::OpCoerceToString, &ipol);
                 }
 
                 ast::InterpolPart::Literal(lit) => {
-                    self.emit_constant(Value::String(lit.as_str().into()), &node);
+                    self.emit_constant(Value::String(lit.into()), parent_node);
                 }
             }
         }
+
+        if count != 1 {
+            self.push_op(OpCode::OpInterpolate(Count(count)), parent_node);
+        }
+    }
+
+    fn compile_str(&mut self, slot: LocalIdx, node: ast::Str) {
+        let parts = node.normalized_parts();
+
+        // We need to thunk string expressions if they are the result of
+        // interpolation. A string that only consists of a single part (`"${foo}"`)
+        // can't desugar to the enclosed expression (`foo`) because we need to
+        // coerce the result to a string value. This would require forcing the
+        // value of the inner expression, so we need to wrap it in another thunk.
+        if parts.len() != 1 || matches!(&parts[0], ast::InterpolPart::Interpolation(_)) {
+            self.thunk(slot, &node, move |c, n, s| {
+                c.compile_str_parts(s, n, parts);
+            });
+        } else {
+            self.compile_str_parts(slot, &node, parts);
+        }
     }
 
     fn compile_unary_op(&mut self, slot: LocalIdx, op: ast::UnaryOp) {
diff --git a/tvix/eval/src/opcode.rs b/tvix/eval/src/opcode.rs
index aee45d7a4452..706aceac8357 100644
--- a/tvix/eval/src/opcode.rs
+++ b/tvix/eval/src/opcode.rs
@@ -105,6 +105,9 @@ pub enum OpCode {
 
     // Strings
     OpInterpolate(Count),
+    /// Force the Value on the stack and coerce it to a string, always using
+    /// `CoercionKind::Weak`.
+    OpCoerceToString,
 
     // Type assertion operators
     OpAssertBool,
diff --git a/tvix/eval/src/vm.rs b/tvix/eval/src/vm.rs
index 6fe94184b8a2..980a3ed0f5dd 100644
--- a/tvix/eval/src/vm.rs
+++ b/tvix/eval/src/vm.rs
@@ -9,7 +9,7 @@ use crate::{
     observer::Observer,
     opcode::{CodeIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx},
     upvalues::{UpvalueCarrier, Upvalues},
-    value::{Builtin, Closure, Lambda, NixAttrs, NixList, Thunk, Value},
+    value::{Builtin, Closure, CoercionKind, Lambda, NixAttrs, NixList, Thunk, Value},
 };
 
 struct CallFrame {
@@ -344,6 +344,16 @@ impl<'o> VM<'o> {
 
                 OpCode::OpInterpolate(Count(count)) => self.run_interpolate(count)?,
 
+                OpCode::OpCoerceToString => {
+                    // TODO: handle string context, copying to store
+                    let string = fallible!(
+                        self,
+                        // note that coerce_to_string also forces
+                        self.pop().coerce_to_string(CoercionKind::Weak, self)
+                    );
+                    self.push(Value::String(string));
+                }
+
                 OpCode::OpJump(JumpOffset(offset)) => {
                     debug_assert!(offset != 0);
                     self.frame_mut().ip += offset;