about summary refs log tree commit diff
path: root/tvix/eval/src/chunk.rs
diff options
context:
space:
mode:
Diffstat (limited to 'tvix/eval/src/chunk.rs')
-rw-r--r--tvix/eval/src/chunk.rs273
1 files changed, 268 insertions, 5 deletions
diff --git a/tvix/eval/src/chunk.rs b/tvix/eval/src/chunk.rs
index 3475e58f18..f1a35a6ce1 100644
--- a/tvix/eval/src/chunk.rs
+++ b/tvix/eval/src/chunk.rs
@@ -1,26 +1,289 @@
+use std::io::Write;
+use std::ops::{Index, IndexMut};
+
 use crate::opcode::{CodeIdx, ConstantIdx, OpCode};
 use crate::value::Value;
+use crate::SourceCode;
+
+/// Represents a source location from which one or more operations
+/// were compiled.
+///
+/// The span itself is an index into a [codemap::CodeMap], and the
+/// structure tracks the number of operations that were yielded from
+/// the same span.
+///
+/// At error reporting time, it becomes possible to either just fetch
+/// the textual representation of that span from the codemap, or to
+/// even re-parse the AST using rnix to create more semantically
+/// interesting errors.
+#[derive(Clone, Debug, PartialEq)]
+struct SourceSpan {
+    /// Span into the [codemap::CodeMap].
+    span: codemap::Span,
+
+    /// Index of the first operation covered by this span.
+    start: usize,
+}
 
+/// A chunk is a representation of a sequence of bytecode
+/// instructions, associated constants and additional metadata as
+/// emitted by the compiler.
 #[derive(Debug, Default)]
 pub struct Chunk {
     pub code: Vec<OpCode>,
-    constants: Vec<Value>,
+    pub constants: Vec<Value>,
+    spans: Vec<SourceSpan>,
+}
+
+impl Index<ConstantIdx> for Chunk {
+    type Output = Value;
+
+    fn index(&self, index: ConstantIdx) -> &Self::Output {
+        &self.constants[index.0]
+    }
+}
+
+impl Index<CodeIdx> for Chunk {
+    type Output = OpCode;
+
+    fn index(&self, index: CodeIdx) -> &Self::Output {
+        &self.code[index.0]
+    }
+}
+
+impl IndexMut<CodeIdx> for Chunk {
+    fn index_mut(&mut self, index: CodeIdx) -> &mut Self::Output {
+        &mut self.code[index.0]
+    }
 }
 
 impl Chunk {
-    pub fn add_op(&mut self, data: OpCode) -> CodeIdx {
+    pub fn push_op(&mut self, data: OpCode, span: codemap::Span) -> CodeIdx {
         let idx = self.code.len();
         self.code.push(data);
+        self.push_span(span, idx);
         CodeIdx(idx)
     }
 
-    pub fn add_constant(&mut self, data: Value) -> ConstantIdx {
+    /// Get the first span of a chunk, no questions asked.
+    pub fn first_span(&self) -> codemap::Span {
+        self.spans[0].span
+    }
+
+    /// Return a reference to the last op in the chunk, if any
+    pub fn last_op(&self) -> Option<&OpCode> {
+        self.code.last()
+    }
+
+    /// Pop the last operation from the chunk and clean up its tracked
+    /// span. Used when the compiler backtracks.
+    pub fn pop_op(&mut self) {
+        // Simply drop the last op.
+        self.code.pop();
+
+        if let Some(span) = self.spans.last() {
+            // If the last span started at this op, drop it.
+            if span.start == self.code.len() {
+                self.spans.pop();
+            }
+        }
+    }
+
+    pub fn push_constant(&mut self, data: Value) -> ConstantIdx {
         let idx = self.constants.len();
         self.constants.push(data);
         ConstantIdx(idx)
     }
 
-    pub fn constant(&self, idx: ConstantIdx) -> &Value {
-        &self.constants[idx.0]
+    /// Return a reference to the constant at the given [`ConstantIdx`]
+    pub fn get_constant(&self, constant: ConstantIdx) -> Option<&Value> {
+        self.constants.get(constant.0)
+    }
+
+    // Span tracking implementation
+
+    fn push_span(&mut self, span: codemap::Span, start: usize) {
+        match self.spans.last_mut() {
+            // We do not need to insert the same span again, as this
+            // instruction was compiled from the same span as the last
+            // one.
+            Some(last) if last.span == span => {}
+
+            // In all other cases, this is a new source span.
+            _ => self.spans.push(SourceSpan { span, start }),
+        }
+    }
+
+    /// Retrieve the [codemap::Span] from which the instruction at
+    /// `offset` was compiled.
+    pub fn get_span(&self, offset: CodeIdx) -> codemap::Span {
+        let position = self
+            .spans
+            .binary_search_by(|span| span.start.cmp(&offset.0));
+
+        let span = match position {
+            Ok(index) => &self.spans[index],
+            Err(index) => {
+                if index == 0 {
+                    &self.spans[0]
+                } else {
+                    &self.spans[index - 1]
+                }
+            }
+        };
+
+        span.span
+    }
+
+    /// Write the disassembler representation of the operation at
+    /// `idx` to the specified writer.
+    pub fn disassemble_op<W: Write>(
+        &self,
+        writer: &mut W,
+        source: &SourceCode,
+        width: usize,
+        idx: CodeIdx,
+    ) -> Result<(), std::io::Error> {
+        write!(writer, "{:#width$x}\t ", idx.0, width = width)?;
+
+        // Print continuation character if the previous operation was at
+        // the same line, otherwise print the line.
+        let line = source.get_line(self.get_span(idx));
+        if idx.0 > 0 && source.get_line(self.get_span(CodeIdx(idx.0 - 1))) == line {
+            write!(writer, "   |\t")?;
+        } else {
+            write!(writer, "{:4}\t", line)?;
+        }
+
+        match self[idx] {
+            OpCode::OpConstant(idx) => {
+                let val_str = match &self[idx] {
+                    Value::Thunk(t) => t.debug_repr(),
+                    Value::Closure(c) => format!("closure({:p})", c.lambda),
+                    val => format!("{}", val),
+                };
+
+                writeln!(writer, "OpConstant({}@{})", val_str, idx.0)
+            }
+            op => writeln!(writer, "{:?}", op),
+        }?;
+
+        Ok(())
+    }
+
+    /// Extend this chunk with the content of another, moving out of the other
+    /// in the process.
+    ///
+    /// This is used by the compiler when it detects that it unnecessarily
+    /// thunked a nested expression.
+    pub fn extend(&mut self, other: Self) {
+        // Some operations need to be modified in certain ways before being
+        // valid as part of the new chunk.
+        let const_count = self.constants.len();
+        for (idx, op) in other.code.iter().enumerate() {
+            let span = other.get_span(CodeIdx(idx));
+            match op {
+                // As the constants shift, the index needs to be moved relatively.
+                OpCode::OpConstant(ConstantIdx(idx)) => {
+                    self.push_op(OpCode::OpConstant(ConstantIdx(idx + const_count)), span)
+                }
+
+                // Other operations either operate on relative offsets, or no
+                // offsets, and are safe to keep as-is.
+                _ => self.push_op(*op, span),
+            };
+        }
+
+        self.constants.extend(other.constants);
+        self.spans.extend(other.spans);
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::test_utils::dummy_span;
+
+    // Note: These tests are about the functionality of the `Chunk` type, the
+    // opcodes used below do *not* represent valid, executable Tvix code (and
+    // don't need to).
+
+    #[test]
+    fn push_op() {
+        let mut chunk = Chunk::default();
+        chunk.push_op(OpCode::OpAdd, dummy_span());
+        assert_eq!(chunk.code.last().unwrap(), &OpCode::OpAdd);
+    }
+
+    #[test]
+    fn extend_empty() {
+        let mut chunk = Chunk::default();
+        chunk.push_op(OpCode::OpAdd, dummy_span());
+
+        let other = Chunk::default();
+        chunk.extend(other);
+
+        assert_eq!(
+            chunk.code,
+            vec![OpCode::OpAdd],
+            "code should not have changed"
+        );
+    }
+
+    #[test]
+    fn extend_simple() {
+        let span = dummy_span();
+        let mut chunk = Chunk::default();
+        chunk.push_op(OpCode::OpAdd, span);
+
+        let mut other = Chunk::default();
+        other.push_op(OpCode::OpSub, span);
+        other.push_op(OpCode::OpMul, span);
+
+        let expected_code = vec![OpCode::OpAdd, OpCode::OpSub, OpCode::OpMul];
+
+        chunk.extend(other);
+
+        assert_eq!(chunk.code, expected_code, "code should have been extended");
+    }
+
+    #[test]
+    fn extend_with_constant() {
+        let span = dummy_span();
+        let mut chunk = Chunk::default();
+        chunk.push_op(OpCode::OpAdd, span);
+        let cidx = chunk.push_constant(Value::Integer(0));
+        assert_eq!(
+            cidx.0, 0,
+            "first constant in main chunk should have index 0"
+        );
+        chunk.push_op(OpCode::OpConstant(cidx), span);
+
+        let mut other = Chunk::default();
+        other.push_op(OpCode::OpSub, span);
+        let other_cidx = other.push_constant(Value::Integer(1));
+        assert_eq!(
+            other_cidx.0, 0,
+            "first constant in other chunk should have index 0"
+        );
+        other.push_op(OpCode::OpConstant(other_cidx), span);
+
+        chunk.extend(other);
+
+        let expected_code = vec![
+            OpCode::OpAdd,
+            OpCode::OpConstant(ConstantIdx(0)),
+            OpCode::OpSub,
+            OpCode::OpConstant(ConstantIdx(1)), // <- note: this was rewritten
+        ];
+
+        assert_eq!(
+            chunk.code, expected_code,
+            "code should have been extended and rewritten"
+        );
+
+        assert_eq!(chunk.constants.len(), 2);
+        assert!(matches!(chunk.constants[0], Value::Integer(0)));
+        assert!(matches!(chunk.constants[1], Value::Integer(1)));
     }
 }