about summary refs log tree commit diff
path: root/tvix/eval/src/chunk.rs
blob: f1a35a6ce162f5e5f0530e3cd21913d715a4b973 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
use std::io::Write;
use std::ops::{Index, IndexMut};

use crate::opcode::{CodeIdx, ConstantIdx, OpCode};
use crate::value::Value;
use crate::SourceCode;

/// Represents a source location from which one or more operations
/// were compiled.
///
/// The span itself is an index into a [codemap::CodeMap], and the
/// structure tracks the number of operations that were yielded from
/// the same span.
///
/// At error reporting time, it becomes possible to either just fetch
/// the textual representation of that span from the codemap, or to
/// even re-parse the AST using rnix to create more semantically
/// interesting errors.
#[derive(Clone, Debug, PartialEq)]
struct SourceSpan {
    /// Span into the [codemap::CodeMap].
    span: codemap::Span,

    /// Index of the first operation covered by this span.
    start: usize,
}

/// A chunk is a representation of a sequence of bytecode
/// instructions, associated constants and additional metadata as
/// emitted by the compiler.
#[derive(Debug, Default)]
pub struct Chunk {
    pub code: Vec<OpCode>,
    pub constants: Vec<Value>,
    spans: Vec<SourceSpan>,
}

impl Index<ConstantIdx> for Chunk {
    type Output = Value;

    fn index(&self, index: ConstantIdx) -> &Self::Output {
        &self.constants[index.0]
    }
}

impl Index<CodeIdx> for Chunk {
    type Output = OpCode;

    fn index(&self, index: CodeIdx) -> &Self::Output {
        &self.code[index.0]
    }
}

impl IndexMut<CodeIdx> for Chunk {
    fn index_mut(&mut self, index: CodeIdx) -> &mut Self::Output {
        &mut self.code[index.0]
    }
}

impl Chunk {
    pub fn push_op(&mut self, data: OpCode, span: codemap::Span) -> CodeIdx {
        let idx = self.code.len();
        self.code.push(data);
        self.push_span(span, idx);
        CodeIdx(idx)
    }

    /// Get the first span of a chunk, no questions asked.
    pub fn first_span(&self) -> codemap::Span {
        self.spans[0].span
    }

    /// Return a reference to the last op in the chunk, if any
    pub fn last_op(&self) -> Option<&OpCode> {
        self.code.last()
    }

    /// Pop the last operation from the chunk and clean up its tracked
    /// span. Used when the compiler backtracks.
    pub fn pop_op(&mut self) {
        // Simply drop the last op.
        self.code.pop();

        if let Some(span) = self.spans.last() {
            // If the last span started at this op, drop it.
            if span.start == self.code.len() {
                self.spans.pop();
            }
        }
    }

    pub fn push_constant(&mut self, data: Value) -> ConstantIdx {
        let idx = self.constants.len();
        self.constants.push(data);
        ConstantIdx(idx)
    }

    /// Return a reference to the constant at the given [`ConstantIdx`]
    pub fn get_constant(&self, constant: ConstantIdx) -> Option<&Value> {
        self.constants.get(constant.0)
    }

    // Span tracking implementation

    fn push_span(&mut self, span: codemap::Span, start: usize) {
        match self.spans.last_mut() {
            // We do not need to insert the same span again, as this
            // instruction was compiled from the same span as the last
            // one.
            Some(last) if last.span == span => {}

            // In all other cases, this is a new source span.
            _ => self.spans.push(SourceSpan { span, start }),
        }
    }

    /// Retrieve the [codemap::Span] from which the instruction at
    /// `offset` was compiled.
    pub fn get_span(&self, offset: CodeIdx) -> codemap::Span {
        let position = self
            .spans
            .binary_search_by(|span| span.start.cmp(&offset.0));

        let span = match position {
            Ok(index) => &self.spans[index],
            Err(index) => {
                if index == 0 {
                    &self.spans[0]
                } else {
                    &self.spans[index - 1]
                }
            }
        };

        span.span
    }

    /// Write the disassembler representation of the operation at
    /// `idx` to the specified writer.
    pub fn disassemble_op<W: Write>(
        &self,
        writer: &mut W,
        source: &SourceCode,
        width: usize,
        idx: CodeIdx,
    ) -> Result<(), std::io::Error> {
        write!(writer, "{:#width$x}\t ", idx.0, width = width)?;

        // Print continuation character if the previous operation was at
        // the same line, otherwise print the line.
        let line = source.get_line(self.get_span(idx));
        if idx.0 > 0 && source.get_line(self.get_span(CodeIdx(idx.0 - 1))) == line {
            write!(writer, "   |\t")?;
        } else {
            write!(writer, "{:4}\t", line)?;
        }

        match self[idx] {
            OpCode::OpConstant(idx) => {
                let val_str = match &self[idx] {
                    Value::Thunk(t) => t.debug_repr(),
                    Value::Closure(c) => format!("closure({:p})", c.lambda),
                    val => format!("{}", val),
                };

                writeln!(writer, "OpConstant({}@{})", val_str, idx.0)
            }
            op => writeln!(writer, "{:?}", op),
        }?;

        Ok(())
    }

    /// Extend this chunk with the content of another, moving out of the other
    /// in the process.
    ///
    /// This is used by the compiler when it detects that it unnecessarily
    /// thunked a nested expression.
    pub fn extend(&mut self, other: Self) {
        // Some operations need to be modified in certain ways before being
        // valid as part of the new chunk.
        let const_count = self.constants.len();
        for (idx, op) in other.code.iter().enumerate() {
            let span = other.get_span(CodeIdx(idx));
            match op {
                // As the constants shift, the index needs to be moved relatively.
                OpCode::OpConstant(ConstantIdx(idx)) => {
                    self.push_op(OpCode::OpConstant(ConstantIdx(idx + const_count)), span)
                }

                // Other operations either operate on relative offsets, or no
                // offsets, and are safe to keep as-is.
                _ => self.push_op(*op, span),
            };
        }

        self.constants.extend(other.constants);
        self.spans.extend(other.spans);
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::test_utils::dummy_span;

    // Note: These tests are about the functionality of the `Chunk` type, the
    // opcodes used below do *not* represent valid, executable Tvix code (and
    // don't need to).

    #[test]
    fn push_op() {
        let mut chunk = Chunk::default();
        chunk.push_op(OpCode::OpAdd, dummy_span());
        assert_eq!(chunk.code.last().unwrap(), &OpCode::OpAdd);
    }

    #[test]
    fn extend_empty() {
        let mut chunk = Chunk::default();
        chunk.push_op(OpCode::OpAdd, dummy_span());

        let other = Chunk::default();
        chunk.extend(other);

        assert_eq!(
            chunk.code,
            vec![OpCode::OpAdd],
            "code should not have changed"
        );
    }

    #[test]
    fn extend_simple() {
        let span = dummy_span();
        let mut chunk = Chunk::default();
        chunk.push_op(OpCode::OpAdd, span);

        let mut other = Chunk::default();
        other.push_op(OpCode::OpSub, span);
        other.push_op(OpCode::OpMul, span);

        let expected_code = vec![OpCode::OpAdd, OpCode::OpSub, OpCode::OpMul];

        chunk.extend(other);

        assert_eq!(chunk.code, expected_code, "code should have been extended");
    }

    #[test]
    fn extend_with_constant() {
        let span = dummy_span();
        let mut chunk = Chunk::default();
        chunk.push_op(OpCode::OpAdd, span);
        let cidx = chunk.push_constant(Value::Integer(0));
        assert_eq!(
            cidx.0, 0,
            "first constant in main chunk should have index 0"
        );
        chunk.push_op(OpCode::OpConstant(cidx), span);

        let mut other = Chunk::default();
        other.push_op(OpCode::OpSub, span);
        let other_cidx = other.push_constant(Value::Integer(1));
        assert_eq!(
            other_cidx.0, 0,
            "first constant in other chunk should have index 0"
        );
        other.push_op(OpCode::OpConstant(other_cidx), span);

        chunk.extend(other);

        let expected_code = vec![
            OpCode::OpAdd,
            OpCode::OpConstant(ConstantIdx(0)),
            OpCode::OpSub,
            OpCode::OpConstant(ConstantIdx(1)), // <- note: this was rewritten
        ];

        assert_eq!(
            chunk.code, expected_code,
            "code should have been extended and rewritten"
        );

        assert_eq!(chunk.constants.len(), 2);
        assert!(matches!(chunk.constants[0], Value::Integer(0)));
        assert!(matches!(chunk.constants[1], Value::Integer(1)));
    }
}