about summary refs log blame commit diff
path: root/users/tazjin/rlox/src/bytecode/compiler.rs
blob: 9a8f80ae2b7d0ad2c8b817484e4a5f6ac61770b4 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
                        
                                                 
                              
                          
                                             
 

                               
                                            
                 
                       
                      
 















                                       
 




















                                                        



















                                                                         


























                                                                            










                                                                           


                                                                         






                                                                              














                                                                                
                                                                          
         
 


                                                          
                                             
                                            


                                                  







                                            


                                                     




                                                

                                       
         










                                                                 








                                                                      
                                           
                                                              
                                                   


                                                                           
     
                                             


                                                                      
              
     


                                                



                                                  
                                                           





                                                                       



                                                    
                                                     
                                                       





                                                                 





                                                                   










                                                                  
 

                                                                        
              
     









                                                                      
                                           
                                               
                                                         


                                                     


              
                                                                             
                                                                 















                                                                                
         
              
     
                                                                 
                                             



                                                





                                                 
                                       
                                       


                                                   
 

              



                                              




                                               


                                                                              
 




                                                                             
                                                          



                          
                                              
     











                                                          
 
                                                                     





                                                                  
                                  
                       
                                               
                       



                                   
                                              


                            
use super::chunk::Chunk;
use super::errors::{Error, ErrorKind, LoxResult};
use super::interner::Interner;
use super::opcode::OpCode;
use super::value::Value;
use crate::scanner::{self, Token, TokenKind};

#[cfg(feature = "disassemble")]
use super::chunk;

struct Compiler<T: Iterator<Item = Token>> {
    tokens: T,
    chunk: Chunk,
    panic: bool,
    errors: Vec<Error>,
    strings: Interner,

    current: Option<Token>,
    previous: Option<Token>,
}

#[derive(Debug, PartialEq, PartialOrd)]
enum Precedence {
    None,
    Assignment, // =
    Or,         // or
    And,        // and
    Equality,   // == !=
    Comparison, // < > <= >=
    Term,       // + -
    Factor,     // * /
    Unary,      // ! -
    Call,       // . ()
    Primary,
}

type ParseFn<T> = fn(&mut Compiler<T>) -> LoxResult<()>;

struct ParseRule<T: Iterator<Item = Token>> {
    prefix: Option<ParseFn<T>>,
    infix: Option<ParseFn<T>>,
    precedence: Precedence,
}

impl<T: Iterator<Item = Token>> ParseRule<T> {
    fn new(
        prefix: Option<ParseFn<T>>,
        infix: Option<ParseFn<T>>,
        precedence: Precedence,
    ) -> Self {
        ParseRule {
            prefix,
            infix,
            precedence,
        }
    }
}

impl Precedence {
    // Return the next highest precedence, if there is one.
    fn next(&self) -> Self {
        match self {
            Precedence::None => Precedence::Assignment,
            Precedence::Assignment => Precedence::Or,
            Precedence::Or => Precedence::And,
            Precedence::And => Precedence::Equality,
            Precedence::Equality => Precedence::Comparison,
            Precedence::Comparison => Precedence::Term,
            Precedence::Term => Precedence::Factor,
            Precedence::Factor => Precedence::Unary,
            Precedence::Unary => Precedence::Call,
            Precedence::Call => Precedence::Primary,
            Precedence::Primary => panic!(
                "invalid parser state: no higher precedence than Primary"
            ),
        }
    }
}

fn rule_for<T: Iterator<Item = Token>>(token: &TokenKind) -> ParseRule<T> {
    match token {
        TokenKind::LeftParen => {
            ParseRule::new(Some(Compiler::grouping), None, Precedence::None)
        }

        TokenKind::Minus => ParseRule::new(
            Some(Compiler::unary),
            Some(Compiler::binary),
            Precedence::Term,
        ),

        TokenKind::Plus => {
            ParseRule::new(None, Some(Compiler::binary), Precedence::Term)
        }

        TokenKind::Slash => {
            ParseRule::new(None, Some(Compiler::binary), Precedence::Factor)
        }

        TokenKind::Star => {
            ParseRule::new(None, Some(Compiler::binary), Precedence::Factor)
        }

        TokenKind::Number(_) => {
            ParseRule::new(Some(Compiler::number), None, Precedence::None)
        }

        TokenKind::True => {
            ParseRule::new(Some(Compiler::literal), None, Precedence::None)
        }

        TokenKind::False => {
            ParseRule::new(Some(Compiler::literal), None, Precedence::None)
        }

        TokenKind::Nil => {
            ParseRule::new(Some(Compiler::literal), None, Precedence::None)
        }

        TokenKind::Bang => {
            ParseRule::new(Some(Compiler::unary), None, Precedence::None)
        }

        TokenKind::BangEqual => {
            ParseRule::new(None, Some(Compiler::binary), Precedence::Equality)
        }

        TokenKind::EqualEqual => {
            ParseRule::new(None, Some(Compiler::binary), Precedence::Equality)
        }

        TokenKind::Greater => {
            ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison)
        }

        TokenKind::GreaterEqual => {
            ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison)
        }

        TokenKind::Less => {
            ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison)
        }

        TokenKind::LessEqual => {
            ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison)
        }

        TokenKind::String(_) => {
            ParseRule::new(Some(Compiler::string), None, Precedence::None)
        }

        _ => ParseRule::new(None, None, Precedence::None),
    }
}

impl<T: Iterator<Item = Token>> Compiler<T> {
    fn compile(&mut self) -> LoxResult<()> {
        self.advance();

        while !self.match_token(&TokenKind::Eof) {
            self.declaration()?;
        }

        self.end_compiler()
    }

    fn advance(&mut self) {
        self.previous = self.current.take();
        self.current = self.tokens.next();
    }

    fn expression(&mut self) -> LoxResult<()> {
        self.parse_precedence(Precedence::Assignment)
    }

    fn declaration(&mut self) -> LoxResult<()> {
        self.statement()
    }

    fn statement(&mut self) -> LoxResult<()> {
        if self.match_token(&TokenKind::Print) {
            self.print_statement()
        } else {
            self.expression_statement()
        }
    }

    fn print_statement(&mut self) -> LoxResult<()> {
        self.expression()?;
        self.consume(
            &TokenKind::Semicolon,
            ErrorKind::ExpectedToken("Expected ';' after value"),
        );
        self.emit_op(OpCode::OpPrint);
        Ok(())
    }

    fn expression_statement(&mut self) -> LoxResult<()> {
        self.expression()?;
        self.consume(
            &TokenKind::Semicolon,
            ErrorKind::ExpectedToken("Expected ';' after expression"),
        );
        self.emit_op(OpCode::OpPop);
        Ok(())
    }

    fn number(&mut self) -> LoxResult<()> {
        if let TokenKind::Number(num) = self.previous().kind {
            self.emit_constant(Value::Number(num));
            return Ok(());
        }

        unreachable!("internal parser error: entered number() incorrectly")
    }

    fn grouping(&mut self) -> LoxResult<()> {
        self.expression()?;
        self.consume(
            &TokenKind::RightParen,
            ErrorKind::ExpectedToken("Expected ')' after expression"),
        );
        Ok(())
    }

    fn unary(&mut self) -> LoxResult<()> {
        // TODO(tazjin): Avoid clone
        let kind = self.previous().kind.clone();

        // Compile the operand
        self.parse_precedence(Precedence::Unary)?;

        // Emit operator instruction
        match kind {
            TokenKind::Bang => self.emit_op(OpCode::OpNot),
            TokenKind::Minus => self.emit_op(OpCode::OpNegate),
            _ => unreachable!("only called for unary operator tokens"),
        }

        Ok(())
    }

    fn binary(&mut self) -> LoxResult<()> {
        // Remember the operator
        let operator = self.previous().kind.clone();

        // Compile the right operand
        let rule: ParseRule<T> = rule_for(&operator);
        self.parse_precedence(rule.precedence.next())?;

        // Emit operator instruction
        match operator {
            TokenKind::Minus => self.emit_op(OpCode::OpSubtract),
            TokenKind::Plus => self.emit_op(OpCode::OpAdd),
            TokenKind::Star => self.emit_op(OpCode::OpMultiply),
            TokenKind::Slash => self.emit_op(OpCode::OpDivide),

            TokenKind::BangEqual => {
                self.emit_op(OpCode::OpEqual);
                self.emit_op(OpCode::OpNot);
            }

            TokenKind::EqualEqual => self.emit_op(OpCode::OpEqual),
            TokenKind::Greater => self.emit_op(OpCode::OpGreater),

            TokenKind::GreaterEqual => {
                self.emit_op(OpCode::OpLess);
                self.emit_op(OpCode::OpNot);
            }

            TokenKind::Less => self.emit_op(OpCode::OpLess),
            TokenKind::LessEqual => {
                self.emit_op(OpCode::OpGreater);
                self.emit_op(OpCode::OpNot);
            }

            _ => unreachable!("only called for binary operator tokens"),
        }

        Ok(())
    }

    fn literal(&mut self) -> LoxResult<()> {
        match self.previous().kind {
            TokenKind::Nil => self.emit_op(OpCode::OpNil),
            TokenKind::True => self.emit_op(OpCode::OpTrue),
            TokenKind::False => self.emit_op(OpCode::OpFalse),
            _ => unreachable!("only called for literal value tokens"),
        }

        Ok(())
    }

    fn string(&mut self) -> LoxResult<()> {
        let val = match &self.previous().kind {
            TokenKind::String(s) => s.clone(),
            _ => unreachable!("only called for strings"),
        };

        let id = self.strings.intern(val);
        self.emit_constant(Value::String(id.into()));

        Ok(())
    }

    fn parse_precedence(&mut self, precedence: Precedence) -> LoxResult<()> {
        self.advance();
        let rule: ParseRule<T> = rule_for(&self.previous().kind);
        let prefix_fn = match rule.prefix {
            None => unimplemented!("expected expression or something, unclear"),
            Some(func) => func,
        };

        prefix_fn(self)?;

        while precedence <= rule_for::<T>(&self.current().kind).precedence {
            self.advance();
            match rule_for::<T>(&self.previous().kind).infix {
                Some(func) => {
                    func(self)?;
                }
                None => {
                    unreachable!("invalid compiler state: error in parse rules")
                }
            }
        }

        Ok(())
    }

    fn consume(&mut self, expected: &TokenKind, err: ErrorKind) {
        if self.current().kind == *expected {
            self.advance();
            return;
        }

        self.error_at(self.current().line, err);
    }

    fn current_chunk(&mut self) -> &mut Chunk {
        &mut self.chunk
    }

    fn end_compiler(&mut self) -> LoxResult<()> {
        self.emit_op(OpCode::OpReturn);

        #[cfg(feature = "disassemble")]
        {
            chunk::disassemble_chunk(&self.chunk);
            println!("== compilation finished ==");
        }

        Ok(())
    }

    fn emit_op(&mut self, op: OpCode) {
        let line = self.previous().line;
        self.current_chunk().add_op(op, line);
    }

    fn emit_constant(&mut self, val: Value) {
        let idx = self.chunk.add_constant(val);
        self.emit_op(OpCode::OpConstant(idx));
    }

    fn previous(&self) -> &Token {
        self.previous
            .as_ref()
            .expect("invalid internal compiler state: missing previous token")
    }

    fn current(&self) -> &Token {
        self.current
            .as_ref()
            .expect("invalid internal compiler state: missing current token")
    }

    fn error_at(&mut self, line: usize, kind: ErrorKind) {
        if self.panic {
            return;
        }

        self.panic = true;
        self.errors.push(Error { kind, line })
    }

    fn match_token(&mut self, token: &TokenKind) -> bool {
        if !self.check(token) {
            return false;
        }

        self.advance();
        true
    }

    fn check(&self, token: &TokenKind) -> bool {
        return self.current().kind == *token;
    }
}

pub fn compile(code: &str) -> Result<(Interner, Chunk), Vec<Error>> {
    let chars = code.chars().collect::<Vec<char>>();
    let tokens = scanner::scan(&chars).map_err(|errors| {
        errors.into_iter().map(Into::into).collect::<Vec<Error>>()
    })?;

    let mut compiler = Compiler {
        tokens: tokens.into_iter().peekable(),
        chunk: Default::default(),
        panic: false,
        errors: vec![],
        strings: Interner::with_capacity(1024),
        current: None,
        previous: None,
    };

    compiler.compile()?;

    if compiler.errors.is_empty() {
        Ok((compiler.strings, compiler.chunk))
    } else {
        Err(compiler.errors)
    }
}