about summary refs log tree commit diff
path: root/tvix/eval/src/compiler.rs
//! This module implements a compiler for compiling the rnix AST
//! representation to Tvix bytecode.
//!
//! A note on `unwrap()`: This module contains a lot of calls to
//! `unwrap()` or `expect(...)` on data structures returned by `rnix`.
//! The reason for this is that rnix uses the same data structures to
//! represent broken and correct ASTs, so all typed AST variants have
//! the ability to represent an incorrect node.
//!
//! However, at the time that the AST is passed to the compiler we
//! have verified that `rnix` considers the code to be correct, so all
//! variants are filed. In cases where the invariant is guaranteed by
//! the code in this module, `debug_assert!` has been used to catch
//! mistakes early during development.

use path_clean::PathClean;
use rnix;
use rnix::types::{BinOpKind, EntryHolder, TokenWrapper, TypedNode, Wrapper};
use std::path::{Path, PathBuf};

use crate::chunk::Chunk;
use crate::errors::{Error, EvalResult};
use crate::opcode::{CodeIdx, OpCode};
use crate::value::Value;
use crate::warnings::{EvalWarning, WarningKind};

/// Represents the result of compiling a piece of Nix code. If
/// compilation was successful, the resulting bytecode can be passed
/// to the VM.
pub struct CompilationResult {
    pub chunk: Chunk,
    pub warnings: Vec<EvalWarning>,
}

// Represents a single local already known to the compiler.
struct Local {
    // Definition name, which can be different kinds of tokens (plain
    // string or identifier). Nix does not allow dynamic names inside
    // of `let`-expressions.
    name: rnix::SyntaxNode,

    // Scope depth of this local.
    depth: usize,
}

/// Represents locals known during compilation, which can be resolved
/// directly to stack indices.
///
/// TODO(tazjin): `with`-stack
/// TODO(tazjin): flag "specials" (e.g. note depth if builtins are
/// overridden)
struct Locals {
    locals: Vec<Local>,

    // How many scopes "deep" are these locals?
    scope_depth: usize,
}

struct Compiler {
    chunk: Chunk,
    warnings: Vec<EvalWarning>,
    root_dir: PathBuf,
}

impl Compiler {
    fn compile(&mut self, node: rnix::SyntaxNode) -> EvalResult<()> {
        match node.kind() {
            // Root of a file contains no content, it's just a marker
            // type.
            rnix::SyntaxKind::NODE_ROOT => self.compile(node.first_child().expect("TODO")),

            // Literals contain a single token consisting of the
            // literal itself.
            rnix::SyntaxKind::NODE_LITERAL => {
                let value = rnix::types::Value::cast(node).unwrap();
                self.compile_literal(value)
            }

            rnix::SyntaxKind::NODE_STRING => {
                let op = rnix::types::Str::cast(node).unwrap();
                self.compile_string(op)
            }

            // The interpolation & dynamic nodes are just wrappers
            // around the inner value of a fragment, they only require
            // unwrapping.
            rnix::SyntaxKind::NODE_STRING_INTERPOL | rnix::SyntaxKind::NODE_DYNAMIC => {
                self.compile(node.first_child().expect("TODO (should not be possible)"))
            }

            rnix::SyntaxKind::NODE_BIN_OP => {
                let op = rnix::types::BinOp::cast(node).expect("TODO (should not be possible)");
                self.compile_binop(op)
            }

            rnix::SyntaxKind::NODE_UNARY_OP => {
                let op = rnix::types::UnaryOp::cast(node).expect("TODO: (should not be possible)");
                self.compile_unary_op(op)
            }

            rnix::SyntaxKind::NODE_PAREN => {
                let node = rnix::types::Paren::cast(node).unwrap();
                self.compile(node.inner().unwrap())
            }

            rnix::SyntaxKind::NODE_IDENT => {
                let node = rnix::types::Ident::cast(node).unwrap();
                self.compile_ident(node)
            }

            rnix::SyntaxKind::NODE_ATTR_SET => {
                let node = rnix::types::AttrSet::cast(node).unwrap();
                self.compile_attr_set(node)
            }

            rnix::SyntaxKind::NODE_SELECT => {
                let node = rnix::types::Select::cast(node).unwrap();
                self.compile_select(node)
            }

            rnix::SyntaxKind::NODE_OR_DEFAULT => {
                let node = rnix::types::OrDefault::cast(node).unwrap();
                self.compile_or_default(node)
            }

            rnix::SyntaxKind::NODE_LIST => {
                let node = rnix::types::List::cast(node).unwrap();
                self.compile_list(node)
            }

            rnix::SyntaxKind::NODE_IF_ELSE => {
                let node = rnix::types::IfElse::cast(node).unwrap();
                self.compile_if_else(node)
            }

            kind => panic!("visiting unsupported node: {:?}", kind),
        }
    }

    /// Compiles nodes the same way that `Self::compile` does, with
    /// the exception of identifiers which are added literally to the
    /// stack as string values.
    ///
    /// This is needed for correctly accessing attribute sets.
    fn compile_with_literal_ident(&mut self, node: rnix::SyntaxNode) -> EvalResult<()> {
        if node.kind() == rnix::SyntaxKind::NODE_IDENT {
            let ident = rnix::types::Ident::cast(node).unwrap();
            let idx = self
                .chunk
                .push_constant(Value::String(ident.as_str().into()));
            self.chunk.push_op(OpCode::OpConstant(idx));
            return Ok(());
        }

        self.compile(node)
    }

    fn compile_literal(&mut self, node: rnix::types::Value) -> EvalResult<()> {
        match node.to_value().unwrap() {
            rnix::NixValue::Float(f) => {
                let idx = self.chunk.push_constant(Value::Float(f));
                self.chunk.push_op(OpCode::OpConstant(idx));
                Ok(())
            }

            rnix::NixValue::Integer(i) => {
                let idx = self.chunk.push_constant(Value::Integer(i));
                self.chunk.push_op(OpCode::OpConstant(idx));
                Ok(())
            }

            // These nodes are yielded by literal URL values.
            rnix::NixValue::String(s) => {
                self.warnings.push(EvalWarning {
                    node: node.node().clone(),
                    kind: WarningKind::DeprecatedLiteralURL,
                });

                let idx = self.chunk.push_constant(Value::String(s.into()));
                self.chunk.push_op(OpCode::OpConstant(idx));
                Ok(())
            }

            rnix::NixValue::Path(anchor, path) => self.compile_path(anchor, path),
        }
    }

    fn compile_path(&mut self, anchor: rnix::value::Anchor, path: String) -> EvalResult<()> {
        let path = match anchor {
            rnix::value::Anchor::Absolute => Path::new(&path).to_owned(),

            rnix::value::Anchor::Home => {
                let mut buf = dirs::home_dir().ok_or_else(|| {
                    Error::PathResolution("failed to determine home directory".into())
                })?;

                buf.push(&path);
                buf
            }

            rnix::value::Anchor::Relative => {
                let mut buf = self.root_dir.clone();
                buf.push(path);
                buf
            }

            // This confusingly named variant is actually
            // angle-bracket lookups, which in C++ Nix desugar
            // to calls to `__findFile` (implicitly in the
            // current scope).
            rnix::value::Anchor::Store => todo!("resolve <...> lookups at runtime"),
        };

        // TODO: Use https://github.com/rust-lang/rfcs/issues/2208
        // once it is available
        let value = Value::Path(path.clean());
        let idx = self.chunk.push_constant(value);
        self.chunk.push_op(OpCode::OpConstant(idx));

        Ok(())
    }

    fn compile_string(&mut self, string: rnix::types::Str) -> EvalResult<()> {
        let mut count = 0;

        // The string parts are produced in literal order, however
        // they need to be reversed on the stack in order to
        // efficiently create the real string in case of
        // interpolation.
        for part in string.parts().into_iter().rev() {
            count += 1;

            match part {
                // Interpolated expressions are compiled as normal and
                // dealt with by the VM before being assembled into
                // the final string.
                rnix::StrPart::Ast(node) => self.compile(node)?,

                rnix::StrPart::Literal(lit) => {
                    let idx = self.chunk.push_constant(Value::String(lit.into()));
                    self.chunk.push_op(OpCode::OpConstant(idx));
                }
            }
        }

        if count != 1 {
            self.chunk.push_op(OpCode::OpInterpolate(count));
        }

        Ok(())
    }

    fn compile_binop(&mut self, op: rnix::types::BinOp) -> EvalResult<()> {
        // Short-circuiting and other strange operators, which are
        // under the same node type as NODE_BIN_OP, but need to be
        // handled separately (i.e. before compiling the expressions
        // used for standard binary operators).
        match op.operator().unwrap() {
            BinOpKind::And => return self.compile_and(op),
            BinOpKind::Or => return self.compile_or(op),
            BinOpKind::Implication => return self.compile_implication(op),
            BinOpKind::IsSet => return self.compile_is_set(op),

            _ => {}
        };

        self.compile(op.lhs().unwrap())?;
        self.compile(op.rhs().unwrap())?;

        match op.operator().unwrap() {
            BinOpKind::Add => self.chunk.push_op(OpCode::OpAdd),
            BinOpKind::Sub => self.chunk.push_op(OpCode::OpSub),
            BinOpKind::Mul => self.chunk.push_op(OpCode::OpMul),
            BinOpKind::Div => self.chunk.push_op(OpCode::OpDiv),
            BinOpKind::Update => self.chunk.push_op(OpCode::OpAttrsUpdate),
            BinOpKind::Equal => self.chunk.push_op(OpCode::OpEqual),
            BinOpKind::Less => self.chunk.push_op(OpCode::OpLess),
            BinOpKind::LessOrEq => self.chunk.push_op(OpCode::OpLessOrEq),
            BinOpKind::More => self.chunk.push_op(OpCode::OpMore),
            BinOpKind::MoreOrEq => self.chunk.push_op(OpCode::OpMoreOrEq),
            BinOpKind::Concat => self.chunk.push_op(OpCode::OpConcat),

            BinOpKind::NotEqual => {
                self.chunk.push_op(OpCode::OpEqual);
                self.chunk.push_op(OpCode::OpInvert)
            }

            // Handled by separate branch above.
            BinOpKind::And | BinOpKind::Implication | BinOpKind::Or | BinOpKind::IsSet => {
                unreachable!()
            }
        };

        Ok(())
    }

    fn compile_unary_op(&mut self, op: rnix::types::UnaryOp) -> EvalResult<()> {
        self.compile(op.value().unwrap())?;

        use rnix::types::UnaryOpKind;
        let opcode = match op.operator() {
            UnaryOpKind::Invert => OpCode::OpInvert,
            UnaryOpKind::Negate => OpCode::OpNegate,
        };

        self.chunk.push_op(opcode);
        Ok(())
    }

    fn compile_ident(&mut self, node: rnix::types::Ident) -> EvalResult<()> {
        match node.as_str() {
            // TODO(tazjin): Nix technically allows code like
            //
            //   let null = 1; in null
            //   => 1
            //
            // which we do *not* want to check at runtime. Once
            // scoping is introduced, the compiler should carry some
            // optimised information about any "weird" stuff that's
            // happened to the scope (such as overrides of these
            // literals, or builtins).
            "true" => self.chunk.push_op(OpCode::OpTrue),
            "false" => self.chunk.push_op(OpCode::OpFalse),
            "null" => self.chunk.push_op(OpCode::OpNull),

            _ => todo!("identifier access"),
        };

        Ok(())
    }

    // Compile attribute set literals into equivalent bytecode.
    //
    // This is complicated by a number of features specific to Nix
    // attribute sets, most importantly:
    //
    // 1. Keys can be dynamically constructed through interpolation.
    // 2. Keys can refer to nested attribute sets.
    // 3. Attribute sets can (optionally) be recursive.
    fn compile_attr_set(&mut self, node: rnix::types::AttrSet) -> EvalResult<()> {
        if node.recursive() {
            todo!("recursive attribute sets are not yet implemented")
        }

        let mut count = 0;

        for kv in node.entries() {
            count += 1;

            // Because attribute set literals can contain nested keys,
            // there is potentially more than one key fragment. If
            // this is the case, a special operation to construct a
            // runtime value representing the attribute path is
            // emitted.
            let mut key_count = 0;
            for fragment in kv.key().unwrap().path() {
                key_count += 1;

                match fragment.kind() {
                    rnix::SyntaxKind::NODE_IDENT => {
                        let ident = rnix::types::Ident::cast(fragment).unwrap();

                        // TODO(tazjin): intern!
                        let idx = self
                            .chunk
                            .push_constant(Value::String(ident.as_str().into()));
                        self.chunk.push_op(OpCode::OpConstant(idx));
                    }

                    // For all other expression types, we simply
                    // compile them as normal. The operation should
                    // result in a string value, which is checked at
                    // runtime on construction.
                    _ => self.compile(fragment)?,
                }
            }

            // We're done with the key if there was only one fragment,
            // otherwise we need to emit an instruction to construct
            // the attribute path.
            if key_count > 1 {
                self.chunk.push_op(OpCode::OpAttrPath(2));
            }

            // The value is just compiled as normal so that its
            // resulting value is on the stack when the attribute set
            // is constructed at runtime.
            self.compile(kv.value().unwrap())?;
        }

        self.chunk.push_op(OpCode::OpAttrs(count));
        Ok(())
    }

    fn compile_select(&mut self, node: rnix::types::Select) -> EvalResult<()> {
        // Push the set onto the stack
        self.compile(node.set().unwrap())?;

        // Push the key and emit the access instruction.
        //
        // This order matters because the key needs to be evaluated
        // first to fail in the correct order on type errors.
        self.compile_with_literal_ident(node.index().unwrap())?;
        self.chunk.push_op(OpCode::OpAttrsSelect);

        Ok(())
    }

    // Compile list literals into equivalent bytecode. List
    // construction is fairly simple, composing of pushing code for
    // each literal element and an instruction with the element count.
    //
    // The VM, after evaluating the code for each element, simply
    // constructs the list from the given number of elements.
    fn compile_list(&mut self, node: rnix::types::List) -> EvalResult<()> {
        let mut count = 0;

        for item in node.items() {
            count += 1;
            self.compile(item)?;
        }

        self.chunk.push_op(OpCode::OpList(count));
        Ok(())
    }

    // Compile conditional expressions using jumping instructions in the VM.
    //
    //                        ┌────────────────────┐
    //                        │ 0  [ conditional ] │
    //                        │ 1   JUMP_IF_FALSE →┼─┐
    //                        │ 2  [  main body  ] │ │ Jump to else body if
    //                       ┌┼─3─←     JUMP       │ │ condition is false.
    //  Jump over else body  ││ 4  [  else body  ]←┼─┘
    //  if condition is true.└┼─5─→     ...        │
    //                        └────────────────────┘
    fn compile_if_else(&mut self, node: rnix::types::IfElse) -> EvalResult<()> {
        self.compile(node.condition().unwrap())?;

        let then_idx = self.chunk.push_op(OpCode::OpJumpIfFalse(0));

        self.chunk.push_op(OpCode::OpPop); // discard condition value
        self.compile(node.body().unwrap())?;

        let else_idx = self.chunk.push_op(OpCode::OpJump(0));

        self.patch_jump(then_idx); // patch jump *to* else_body
        self.chunk.push_op(OpCode::OpPop); // discard condition value
        self.compile(node.else_body().unwrap())?;

        self.patch_jump(else_idx); // patch jump *over* else body

        Ok(())
    }

    fn compile_and(&mut self, node: rnix::types::BinOp) -> EvalResult<()> {
        debug_assert!(
            matches!(node.operator(), Some(BinOpKind::And)),
            "compile_and called with wrong operator kind: {:?}",
            node.operator(),
        );

        // Leave left-hand side value on the stack.
        self.compile(node.lhs().unwrap())?;

        // If this value is false, jump over the right-hand side - the
        // whole expression is false.
        let end_idx = self.chunk.push_op(OpCode::OpJumpIfFalse(0));

        // Otherwise, remove the previous value and leave the
        // right-hand side on the stack. Its result is now the value
        // of the whole expression.
        self.chunk.push_op(OpCode::OpPop);
        self.compile(node.rhs().unwrap())?;

        self.patch_jump(end_idx);
        self.chunk.push_op(OpCode::OpAssertBool);

        Ok(())
    }

    fn compile_or(&mut self, node: rnix::types::BinOp) -> EvalResult<()> {
        debug_assert!(
            matches!(node.operator(), Some(BinOpKind::Or)),
            "compile_or called with wrong operator kind: {:?}",
            node.operator(),
        );

        // Leave left-hand side value on the stack
        self.compile(node.lhs().unwrap())?;

        // Opposite of above: If this value is **true**, we can
        // short-circuit the right-hand side.
        let end_idx = self.chunk.push_op(OpCode::OpJumpIfTrue(0));
        self.chunk.push_op(OpCode::OpPop);
        self.compile(node.rhs().unwrap())?;
        self.patch_jump(end_idx);
        self.chunk.push_op(OpCode::OpAssertBool);

        Ok(())
    }

    fn compile_implication(&mut self, node: rnix::types::BinOp) -> EvalResult<()> {
        debug_assert!(
            matches!(node.operator(), Some(BinOpKind::Implication)),
            "compile_implication called with wrong operator kind: {:?}",
            node.operator(),
        );

        // Leave left-hand side value on the stack and invert it.
        self.compile(node.lhs().unwrap())?;
        self.chunk.push_op(OpCode::OpInvert);

        // Exactly as `||` (because `a -> b` = `!a || b`).
        let end_idx = self.chunk.push_op(OpCode::OpJumpIfTrue(0));
        self.chunk.push_op(OpCode::OpPop);
        self.compile(node.rhs().unwrap())?;
        self.patch_jump(end_idx);
        self.chunk.push_op(OpCode::OpAssertBool);

        Ok(())
    }

    fn compile_is_set(&mut self, node: rnix::types::BinOp) -> EvalResult<()> {
        debug_assert!(
            matches!(node.operator(), Some(BinOpKind::IsSet)),
            "compile_is_set called with wrong operator kind: {:?}",
            node.operator(),
        );

        // Put the attribute set on the stack.
        self.compile(node.lhs().unwrap())?;

        // If the key is a NODE_SELECT, the check is deeper than one
        // level and requires special handling.
        //
        // Otherwise, the right hand side is the (only) key expression
        // itself and can be compiled directly.
        let mut next = node.rhs().unwrap();
        let mut fragments = vec![];

        loop {
            if matches!(next.kind(), rnix::SyntaxKind::NODE_SELECT) {
                // Keep nesting deeper until we encounter something
                // different than `NODE_SELECT` on the left side. This is
                // required because `rnix` parses nested keys as select
                // expressions, instead of as a key expression.
                //
                // The parsed tree will nest something like `a.b.c.d.e.f`
                // as (((((a, b), c), d), e), f).
                fragments.push(next.last_child().unwrap());
                next = next.first_child().unwrap();
            } else {
                self.compile_with_literal_ident(next)?;

                for fragment in fragments.into_iter().rev() {
                    self.chunk.push_op(OpCode::OpAttrsSelect);
                    self.compile_with_literal_ident(fragment)?;
                }

                self.chunk.push_op(OpCode::OpAttrsIsSet);
                break;
            }
        }

        Ok(())
    }

    /// Compile an `or` expression into a chunk of conditional jumps.
    ///
    /// If at any point during attribute set traversal a key is
    /// missing, the `OpAttrOrNotFound` instruction will leave a
    /// special sentinel value on the stack.
    ///
    /// After each access, a conditional jump evaluates the top of the
    /// stack and short-circuits to the default value if it sees the
    /// sentinel.
    ///
    /// Code like `{ a.b = 1; }.a.c or 42` yields this bytecode and
    /// runtime stack:
    ///
    /// ```notrust
    ///            Bytecode                     Runtime stack
    ///  ┌────────────────────────────┐   ┌─────────────────────────┐
    ///  │    ...                     │   │ ...                     │
    ///  │ 5  OP_ATTRS(1)             │ → │ 5  [ { a.b = 1; }     ] │
    ///  │ 6  OP_CONSTANT("a")        │ → │ 6  [ { a.b = 1; } "a" ] │
    ///  │ 7  OP_ATTR_OR_NOT_FOUND    │ → │ 7  [ { b = 1; }       ] │
    ///  │ 8  JUMP_IF_NOT_FOUND(13)   │ → │ 8  [ { b = 1; }       ] │
    ///  │ 9  OP_CONSTANT("C")        │ → │ 9  [ { b = 1; } "c"   ] │
    ///  │ 10 OP_ATTR_OR_NOT_FOUND    │ → │ 10 [ NOT_FOUND        ] │
    ///  │ 11 JUMP_IF_NOT_FOUND(13)   │ → │ 11 [                  ] │
    ///  │ 12 JUMP(14)                │   │ ..     jumped over      │
    ///  │ 13 CONSTANT(42)            │ → │ 12 [ 42 ]               │
    ///  │ 14 ...                     │   │ ..   ....               │
    ///  └────────────────────────────┘   └─────────────────────────┘
    /// ```
    fn compile_or_default(&mut self, node: rnix::types::OrDefault) -> EvalResult<()> {
        let select = node.index().unwrap();

        let mut next = select.set().unwrap();
        let mut fragments = vec![select.index().unwrap()];
        let mut jumps = vec![];

        loop {
            if matches!(next.kind(), rnix::SyntaxKind::NODE_SELECT) {
                fragments.push(next.last_child().unwrap());
                next = next.first_child().unwrap();
                continue;
            } else {
                self.compile(next)?;
            }

            for fragment in fragments.into_iter().rev() {
                self.compile_with_literal_ident(fragment)?;
                self.chunk.push_op(OpCode::OpAttrOrNotFound);
                jumps.push(self.chunk.push_op(OpCode::OpJumpIfNotFound(0)));
            }

            break;
        }

        let final_jump = self.chunk.push_op(OpCode::OpJump(0));
        for jump in jumps {
            self.patch_jump(jump);
        }

        // Compile the default value expression and patch the final
        // jump to point *beyond* it.
        self.compile(node.default().unwrap())?;
        self.patch_jump(final_jump);

        Ok(())
    }

    fn patch_jump(&mut self, idx: CodeIdx) {
        let offset = self.chunk.code.len() - 1 - idx.0;

        match &mut self.chunk.code[idx.0] {
            OpCode::OpJump(n)
            | OpCode::OpJumpIfFalse(n)
            | OpCode::OpJumpIfTrue(n)
            | OpCode::OpJumpIfNotFound(n) => {
                *n = offset;
            }

            op => panic!("attempted to patch unsupported op: {:?}", op),
        }
    }
}

pub fn compile(ast: rnix::AST, location: Option<PathBuf>) -> EvalResult<CompilationResult> {
    let mut root_dir = match location {
        Some(dir) => Ok(dir),
        None => std::env::current_dir().map_err(|e| {
            Error::PathResolution(format!("could not determine current directory: {}", e))
        }),
    }?;

    // If the path passed from the caller points to a file, the
    // filename itself needs to be truncated as this must point to a
    // directory.
    if root_dir.is_file() {
        root_dir.pop();
    }

    let mut c = Compiler {
        root_dir,
        chunk: Chunk::default(),
        warnings: vec![],
    };

    c.compile(ast.node())?;

    Ok(CompilationResult {
        chunk: c.chunk,
        warnings: c.warnings,
    })
}