//! This module implements a compiler for compiling the rnix AST //! representation to Tvix bytecode. //! //! A note on `unwrap()`: This module contains a lot of calls to //! `unwrap()` or `expect(...)` on data structures returned by `rnix`. //! The reason for this is that rnix uses the same data structures to //! represent broken and correct ASTs, so all typed AST variants have //! the ability to represent an incorrect node. //! //! However, at the time that the AST is passed to the compiler we //! have verified that `rnix` considers the code to be correct, so all //! variants are filed. In cases where the invariant is guaranteed by //! the code in this module, `debug_assert!` has been used to catch //! mistakes early during development. use crate::chunk::Chunk; use crate::errors::EvalResult; use crate::opcode::{CodeIdx, OpCode}; use crate::value::Value; use rnix; use rnix::types::{BinOpKind, EntryHolder, TokenWrapper, TypedNode, Wrapper}; struct Compiler { chunk: Chunk, } impl Compiler { fn compile(&mut self, node: rnix::SyntaxNode) -> EvalResult<()> { match node.kind() { // Root of a file contains no content, it's just a marker // type. rnix::SyntaxKind::NODE_ROOT => self.compile(node.first_child().expect("TODO")), // Literals contain a single token comprising of the // literal itself. rnix::SyntaxKind::NODE_LITERAL => { let value = rnix::types::Value::cast(node).unwrap(); self.compile_literal(value.to_value().expect("TODO")) } rnix::SyntaxKind::NODE_STRING => { let op = rnix::types::Str::cast(node).unwrap(); self.compile_string(op) } // The interpolation & dynamic nodes are just wrappers // around the inner value of a fragment, they only require // unwrapping. rnix::SyntaxKind::NODE_STRING_INTERPOL | rnix::SyntaxKind::NODE_DYNAMIC => { self.compile(node.first_child().expect("TODO (should not be possible)")) } rnix::SyntaxKind::NODE_BIN_OP => { let op = rnix::types::BinOp::cast(node).expect("TODO (should not be possible)"); self.compile_binop(op) } rnix::SyntaxKind::NODE_UNARY_OP => { let op = rnix::types::UnaryOp::cast(node).expect("TODO: (should not be possible)"); self.compile_unary_op(op) } rnix::SyntaxKind::NODE_PAREN => { let node = rnix::types::Paren::cast(node).unwrap(); self.compile(node.inner().unwrap()) } rnix::SyntaxKind::NODE_IDENT => { let node = rnix::types::Ident::cast(node).unwrap(); self.compile_ident(node) } rnix::SyntaxKind::NODE_ATTR_SET => { let node = rnix::types::AttrSet::cast(node).unwrap(); self.compile_attr_set(node) } rnix::SyntaxKind::NODE_SELECT => { let node = rnix::types::Select::cast(node).unwrap(); self.compile_select(node) } rnix::SyntaxKind::NODE_LIST => { let node = rnix::types::List::cast(node).unwrap(); self.compile_list(node) } rnix::SyntaxKind::NODE_IF_ELSE => { let node = rnix::types::IfElse::cast(node).unwrap(); self.compile_if_else(node) } kind => panic!("visiting unsupported node: {:?}", kind), } } /// Compiles nodes the same way that `Self::compile` does, with /// the exception of identifiers which are added literally to the /// stack as string values. /// /// This is needed for correctly accessing attribute sets. fn compile_with_literal_ident(&mut self, node: rnix::SyntaxNode) -> EvalResult<()> { if node.kind() == rnix::SyntaxKind::NODE_IDENT { let ident = rnix::types::Ident::cast(node).unwrap(); let idx = self .chunk .add_constant(Value::String(ident.as_str().into())); self.chunk.add_op(OpCode::OpConstant(idx)); return Ok(()); } self.compile(node) } fn compile_literal(&mut self, value: rnix::value::Value) -> EvalResult<()> { match value { rnix::NixValue::Float(f) => { let idx = self.chunk.add_constant(Value::Float(f)); self.chunk.add_op(OpCode::OpConstant(idx)); Ok(()) } rnix::NixValue::Integer(i) => { let idx = self.chunk.add_constant(Value::Integer(i)); self.chunk.add_op(OpCode::OpConstant(idx)); Ok(()) } rnix::NixValue::String(_) => todo!(), rnix::NixValue::Path(_, _) => todo!(), } } fn compile_string(&mut self, string: rnix::types::Str) -> EvalResult<()> { let mut count = 0; // The string parts are produced in literal order, however // they need to be reversed on the stack in order to // efficiently create the real string in case of // interpolation. for part in string.parts().into_iter().rev() { count += 1; match part { // Interpolated expressions are compiled as normal and // dealt with by the VM before being assembled into // the final string. rnix::StrPart::Ast(node) => self.compile(node)?, rnix::StrPart::Literal(lit) => { let idx = self.chunk.add_constant(Value::String(lit.into())); self.chunk.add_op(OpCode::OpConstant(idx)); } } } if count != 1 { self.chunk.add_op(OpCode::OpInterpolate(count)); } Ok(()) } fn compile_binop(&mut self, op: rnix::types::BinOp) -> EvalResult<()> { // Short-circuiting and other strange operators, which are // under the same node type as NODE_BIN_OP, but need to be // handled separately (i.e. before compiling the expressions // used for standard binary operators). match op.operator().unwrap() { BinOpKind::And => return self.compile_and(op), BinOpKind::Or => return self.compile_or(op), BinOpKind::Implication => return self.compile_implication(op), BinOpKind::IsSet => return self.compile_is_set(op), _ => {} }; self.compile(op.lhs().unwrap())?; self.compile(op.rhs().unwrap())?; match op.operator().unwrap() { BinOpKind::Add => self.chunk.add_op(OpCode::OpAdd), BinOpKind::Sub => self.chunk.add_op(OpCode::OpSub), BinOpKind::Mul => self.chunk.add_op(OpCode::OpMul), BinOpKind::Div => self.chunk.add_op(OpCode::OpDiv), BinOpKind::Update => self.chunk.add_op(OpCode::OpAttrsUpdate), BinOpKind::Equal => self.chunk.add_op(OpCode::OpEqual), BinOpKind::Less => self.chunk.add_op(OpCode::OpLess), BinOpKind::LessOrEq => self.chunk.add_op(OpCode::OpLessOrEq), BinOpKind::More => self.chunk.add_op(OpCode::OpMore), BinOpKind::MoreOrEq => self.chunk.add_op(OpCode::OpMoreOrEq), BinOpKind::Concat => self.chunk.add_op(OpCode::OpConcat), BinOpKind::NotEqual => { self.chunk.add_op(OpCode::OpEqual); self.chunk.add_op(OpCode::OpInvert) } // Handled by separate branch above. BinOpKind::And | BinOpKind::Implication | BinOpKind::Or | BinOpKind::IsSet => { unreachable!() } }; Ok(()) } fn compile_unary_op(&mut self, op: rnix::types::UnaryOp) -> EvalResult<()> { self.compile(op.value().unwrap())?; use rnix::types::UnaryOpKind; let opcode = match op.operator() { UnaryOpKind::Invert => OpCode::OpInvert, UnaryOpKind::Negate => OpCode::OpNegate, }; self.chunk.add_op(opcode); Ok(()) } fn compile_ident(&mut self, node: rnix::types::Ident) -> EvalResult<()> { match node.as_str() { // TODO(tazjin): Nix technically allows code like // // let null = 1; in null // => 1 // // which we do *not* want to check at runtime. Once // scoping is introduced, the compiler should carry some // optimised information about any "weird" stuff that's // happened to the scope (such as overrides of these // literals, or builtins). "true" => self.chunk.add_op(OpCode::OpTrue), "false" => self.chunk.add_op(OpCode::OpFalse), "null" => self.chunk.add_op(OpCode::OpNull), _ => todo!("identifier access"), }; Ok(()) } // Compile attribute set literals into equivalent bytecode. // // This is complicated by a number of features specific to Nix // attribute sets, most importantly: // // 1. Keys can be dynamically constructed through interpolation. // 2. Keys can refer to nested attribute sets. // 3. Attribute sets can (optionally) be recursive. fn compile_attr_set(&mut self, node: rnix::types::AttrSet) -> EvalResult<()> { if node.recursive() { todo!("recursive attribute sets are not yet implemented") } let mut count = 0; for kv in node.entries() { count += 1; // Because attribute set literals can contain nested keys, // there is potentially more than one key fragment. If // this is the case, a special operation to construct a // runtime value representing the attribute path is // emitted. let mut key_count = 0; for fragment in kv.key().unwrap().path() { key_count += 1; match fragment.kind() { rnix::SyntaxKind::NODE_IDENT => { let ident = rnix::types::Ident::cast(fragment).unwrap(); // TODO(tazjin): intern! let idx = self .chunk .add_constant(Value::String(ident.as_str().into())); self.chunk.add_op(OpCode::OpConstant(idx)); } // For all other expression types, we simply // compile them as normal. The operation should // result in a string value, which is checked at // runtime on construction. _ => self.compile(fragment)?, } } // We're done with the key if there was only one fragment, // otherwise we need to emit an instruction to construct // the attribute path. if key_count > 1 { self.chunk.add_op(OpCode::OpAttrPath(2)); } // The value is just compiled as normal so that its // resulting value is on the stack when the attribute set // is constructed at runtime. self.compile(kv.value().unwrap())?; } self.chunk.add_op(OpCode::OpAttrs(count)); Ok(()) } fn compile_select(&mut self, node: rnix::types::Select) -> EvalResult<()> { // Push the set onto the stack self.compile(node.set().unwrap())?; // Push the key and emit the access instruction. // // This order matters because the key needs to be evaluated // first to fail in the correct order on type errors. self.compile_with_literal_ident(node.index().unwrap())?; self.chunk.add_op(OpCode::OpAttrsSelect); Ok(()) } // Compile list literals into equivalent bytecode. List // construction is fairly simple, composing of pushing code for // each literal element and an instruction with the element count. // // The VM, after evaluating the code for each element, simply // constructs the list from the given number of elements. fn compile_list(&mut self, node: rnix::types::List) -> EvalResult<()> { let mut count = 0; for item in node.items() { count += 1; self.compile(item)?; } self.chunk.add_op(OpCode::OpList(count)); Ok(()) } // Compile conditional expressions using jumping instructions in the VM. // // ┌────────────────────┐ // │ 0 [ conditional ] │ // │ 1 JUMP_IF_FALSE →┼─┐ // │ 2 [ main body ] │ │ Jump to else body if // ┌┼─3─← JUMP │ │ condition is false. // Jump over else body ││ 4 [ else body ]←┼─┘ // if condition is true.└┼─5─→ ... │ // └────────────────────┘ fn compile_if_else(&mut self, node: rnix::types::IfElse) -> EvalResult<()> { self.compile(node.condition().unwrap())?; let then_idx = self.chunk.add_op(OpCode::OpJumpIfFalse(0)); self.chunk.add_op(OpCode::OpPop); // discard condition value self.compile(node.body().unwrap())?; let else_idx = self.chunk.add_op(OpCode::OpJump(0)); self.patch_jump(then_idx); // patch jump *to* else_body self.chunk.add_op(OpCode::OpPop); // discard condition value self.compile(node.else_body().unwrap())?; self.patch_jump(else_idx); // patch jump *over* else body Ok(()) } fn compile_and(&mut self, node: rnix::types::BinOp) -> EvalResult<()> { debug_assert!( matches!(node.operator(), Some(BinOpKind::And)), "compile_and called with wrong operator kind: {:?}", node.operator(), ); // Leave left-hand side value on the stack. self.compile(node.lhs().unwrap())?; // If this value is false, jump over the right-hand side - the // whole expression is false. let end_idx = self.chunk.add_op(OpCode::OpJumpIfFalse(0)); // Otherwise, remove the previous value and leave the // right-hand side on the stack. Its result is now the value // of the whole expression. self.chunk.add_op(OpCode::OpPop); self.compile(node.rhs().unwrap())?; self.patch_jump(end_idx); self.chunk.add_op(OpCode::OpAssertBool); Ok(()) } fn compile_or(&mut self, node: rnix::types::BinOp) -> EvalResult<()> { debug_assert!( matches!(node.operator(), Some(BinOpKind::Or)), "compile_or called with wrong operator kind: {:?}", node.operator(), ); // Leave left-hand side value on the stack self.compile(node.lhs().unwrap())?; // Opposite of above: If this value is **true**, we can // short-circuit the right-hand side. let end_idx = self.chunk.add_op(OpCode::OpJumpIfTrue(0)); self.chunk.add_op(OpCode::OpPop); self.compile(node.rhs().unwrap())?; self.patch_jump(end_idx); self.chunk.add_op(OpCode::OpAssertBool); Ok(()) } fn compile_implication(&mut self, node: rnix::types::BinOp) -> EvalResult<()> { debug_assert!( matches!(node.operator(), Some(BinOpKind::Implication)), "compile_implication called with wrong operator kind: {:?}", node.operator(), ); // Leave left-hand side value on the stack and invert it. self.compile(node.lhs().unwrap())?; self.chunk.add_op(OpCode::OpInvert); // Exactly as `||` (because `a -> b` = `!a || b`). let end_idx = self.chunk.add_op(OpCode::OpJumpIfTrue(0)); self.chunk.add_op(OpCode::OpPop); self.compile(node.rhs().unwrap())?; self.patch_jump(end_idx); self.chunk.add_op(OpCode::OpAssertBool); Ok(()) } fn compile_is_set(&mut self, node: rnix::types::BinOp) -> EvalResult<()> { debug_assert!( matches!(node.operator(), Some(BinOpKind::IsSet)), "compile_is_set called with wrong operator kind: {:?}", node.operator(), ); // Put the attribute set on the stack. self.compile(node.lhs().unwrap())?; // If the key is a NODE_SELECT, the check is deeper than one // level and requires special handling. // // Otherwise, the right hand side is the (only) key expression // itself and can be compiled directly. let mut next = node.rhs().unwrap(); let mut fragments = vec![]; loop { if matches!(next.kind(), rnix::SyntaxKind::NODE_SELECT) { // Keep nesting deeper until we encounter something // different than `NODE_SELECT` on the left side. This is // required because `rnix` parses nested keys as select // expressions, instead of as a key expression. // // The parsed tree will nest something like `a.b.c.d.e.f` // as (((((a, b), c), d), e), f). fragments.push(next.last_child().unwrap()); next = next.first_child().unwrap(); } else { self.compile_with_literal_ident(next)?; for fragment in fragments.into_iter().rev() { println!("fragment: {}", fragment); self.chunk.add_op(OpCode::OpAttrsSelect); self.compile_with_literal_ident(fragment)?; } self.chunk.add_op(OpCode::OpAttrsIsSet); break; } } Ok(()) } fn patch_jump(&mut self, idx: CodeIdx) { let offset = self.chunk.code.len() - 1 - idx.0; match &mut self.chunk.code[idx.0] { OpCode::OpJump(n) | OpCode::OpJumpIfFalse(n) | OpCode::OpJumpIfTrue(n) => { *n = offset; } op => panic!("attempted to patch unsupported op: {:?}", op), } } } pub fn compile(ast: rnix::AST) -> EvalResult<Chunk> { let mut c = Compiler { chunk: Chunk::default(), }; c.compile(ast.node())?; Ok(c.chunk) }