From 4eb33e82ff418de0ad811a0d3afffac84831fac4 Mon Sep 17 00:00:00 2001 From: sterni Date: Thu, 15 Sep 2022 16:38:35 +0200 Subject: fix(tvix/eval): coerce string interpolation parts to string With this puzzle piece of string compilation in place, `compile_str` becomes less redundant, as every part now needs to be compiled the same. The thunking logic becomes a bit trickier, since we need to thunk even in the case of `count == 1` if the single part is interpolating. Splitting the inner (shared) code in a separate function turned out to be easier for making rustc content. Change-Id: I6a554ca599926ae5907d7acffce349c9616f568f Reviewed-on: https://cl.tvl.fyi/c/depot/+/6582 Reviewed-by: tazjin Tested-by: BuildkiteCI --- tvix/eval/src/compiler/mod.rs | 80 ++++++++++++++++++++++++------------------- tvix/eval/src/opcode.rs | 3 ++ tvix/eval/src/vm.rs | 12 ++++++- 3 files changed, 59 insertions(+), 36 deletions(-) diff --git a/tvix/eval/src/compiler/mod.rs b/tvix/eval/src/compiler/mod.rs index a624cf11c74d..f0c9d0dc432f 100644 --- a/tvix/eval/src/compiler/mod.rs +++ b/tvix/eval/src/compiler/mod.rs @@ -257,49 +257,59 @@ impl Compiler<'_, '_> { self.emit_constant(value, &node); } - fn compile_str(&mut self, slot: LocalIdx, node: ast::Str) { - let parts = node.normalized_parts(); + /// Helper that compiles the given string parts strictly. The caller + /// (`compile_str`) needs to figure out if the result of compiling this + /// needs to be thunked or not. + fn compile_str_parts( + &mut self, + slot: LocalIdx, + parent_node: &ast::Str, + parts: Vec>, + ) { let count = parts.len(); - if count != 1 { - self.thunk(slot, &node, |c, n, s| { - // The string parts are produced in literal order, however - // they need to be reversed on the stack in order to - // efficiently create the real string in case of - // interpolation. - for part in parts.into_iter().rev() { - match part { - // Interpolated expressions are compiled as normal and - // dealt with by the VM before being assembled into - // the final string. We need to force them here, - // so OpInterpolate definitely has a string to consume. - // TODO(sterni): coerce to string - ast::InterpolPart::Interpolation(ipol) => { - c.compile(s, ipol.expr().unwrap()); - c.emit_force(&ipol); - } - - ast::InterpolPart::Literal(lit) => { - c.emit_constant(Value::String(lit.into()), n); - } - } - } - - c.push_op(OpCode::OpInterpolate(Count(count)), n); - }); - } else { - match &parts[0] { - // Since we only have a single part, it is okay if this yields a thunk - // TODO(sterni): coerce to string - ast::InterpolPart::Interpolation(node) => { - self.compile(slot, node.expr().unwrap()); + // The string parts are produced in literal order, however + // they need to be reversed on the stack in order to + // efficiently create the real string in case of + // interpolation. + for part in parts.into_iter().rev() { + match part { + // Interpolated expressions are compiled as normal and + // dealt with by the VM before being assembled into + // the final string. We need to coerce them here, + // so OpInterpolate definitely has a string to consume. + ast::InterpolPart::Interpolation(ipol) => { + self.compile(slot, ipol.expr().unwrap()); + // implicitly forces as well + self.push_op(OpCode::OpCoerceToString, &ipol); } ast::InterpolPart::Literal(lit) => { - self.emit_constant(Value::String(lit.as_str().into()), &node); + self.emit_constant(Value::String(lit.into()), parent_node); } } } + + if count != 1 { + self.push_op(OpCode::OpInterpolate(Count(count)), parent_node); + } + } + + fn compile_str(&mut self, slot: LocalIdx, node: ast::Str) { + let parts = node.normalized_parts(); + + // We need to thunk string expressions if they are the result of + // interpolation. A string that only consists of a single part (`"${foo}"`) + // can't desugar to the enclosed expression (`foo`) because we need to + // coerce the result to a string value. This would require forcing the + // value of the inner expression, so we need to wrap it in another thunk. + if parts.len() != 1 || matches!(&parts[0], ast::InterpolPart::Interpolation(_)) { + self.thunk(slot, &node, move |c, n, s| { + c.compile_str_parts(s, n, parts); + }); + } else { + self.compile_str_parts(slot, &node, parts); + } } fn compile_unary_op(&mut self, slot: LocalIdx, op: ast::UnaryOp) { diff --git a/tvix/eval/src/opcode.rs b/tvix/eval/src/opcode.rs index aee45d7a4452..706aceac8357 100644 --- a/tvix/eval/src/opcode.rs +++ b/tvix/eval/src/opcode.rs @@ -105,6 +105,9 @@ pub enum OpCode { // Strings OpInterpolate(Count), + /// Force the Value on the stack and coerce it to a string, always using + /// `CoercionKind::Weak`. + OpCoerceToString, // Type assertion operators OpAssertBool, diff --git a/tvix/eval/src/vm.rs b/tvix/eval/src/vm.rs index 6fe94184b8a2..980a3ed0f5dd 100644 --- a/tvix/eval/src/vm.rs +++ b/tvix/eval/src/vm.rs @@ -9,7 +9,7 @@ use crate::{ observer::Observer, opcode::{CodeIdx, Count, JumpOffset, OpCode, StackIdx, UpvalueIdx}, upvalues::{UpvalueCarrier, Upvalues}, - value::{Builtin, Closure, Lambda, NixAttrs, NixList, Thunk, Value}, + value::{Builtin, Closure, CoercionKind, Lambda, NixAttrs, NixList, Thunk, Value}, }; struct CallFrame { @@ -344,6 +344,16 @@ impl<'o> VM<'o> { OpCode::OpInterpolate(Count(count)) => self.run_interpolate(count)?, + OpCode::OpCoerceToString => { + // TODO: handle string context, copying to store + let string = fallible!( + self, + // note that coerce_to_string also forces + self.pop().coerce_to_string(CoercionKind::Weak, self) + ); + self.push(Value::String(string)); + } + OpCode::OpJump(JumpOffset(offset)) => { debug_assert!(offset != 0); self.frame_mut().ip += offset; -- cgit 1.4.1