diff options
author | Vincent Ambo <mail@tazj.in> | 2021-01-17T18·13+0300 |
---|---|---|
committer | tazjin <mail@tazj.in> | 2021-01-17T21·17+0000 |
commit | b1d0e22b1f5fe907ba3d48931e5a38b9a75b0dcf (patch) | |
tree | aaed1a8bf4cd3bde2f3fd63980ce3d98928155c5 /users/tazjin/rlox/src/treewalk/parser.rs | |
parent | c26915d0120e8577cd684eb9c4f2694e1727cb4a (diff) |
chore(tazjin/rlox): Move other modules under treewalk:: r/2126
It's unclear if the second part of the book can reuse anything from the first part (I'm guessing probably the scanner, but I'll move that back if it turns out to be the case). Change-Id: I9411355929e31ac6e953599e51665406b1f48d55 Reviewed-on: https://cl.tvl.fyi/c/depot/+/2415 Reviewed-by: tazjin <mail@tazj.in> Tested-by: BuildkiteCI
Diffstat (limited to 'users/tazjin/rlox/src/treewalk/parser.rs')
-rw-r--r-- | users/tazjin/rlox/src/treewalk/parser.rs | 702 |
1 files changed, 702 insertions, 0 deletions
diff --git a/users/tazjin/rlox/src/treewalk/parser.rs b/users/tazjin/rlox/src/treewalk/parser.rs new file mode 100644 index 000000000000..8117c29726c9 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/parser.rs @@ -0,0 +1,702 @@ +// This implements the grammar of Lox as described starting in the +// Crafting Interpreters chapter "Representing Code". Note that the +// upstream Java implementation works around Java being bad at value +// classes by writing a code generator for Java. +// +// My Rust implementation skips this step because it's unnecessary, we +// have real types. +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::scanner::{Token, TokenKind}; +use std::rc::Rc; + +// AST + +#[derive(Debug)] +pub struct Assign { + pub name: Token, + pub value: Box<Expr>, + pub depth: Option<usize>, +} + +#[derive(Debug)] +pub struct Binary { + pub left: Box<Expr>, + pub operator: Token, + pub right: Box<Expr>, +} + +#[derive(Debug)] +pub struct Logical { + pub left: Box<Expr>, + pub operator: Token, + pub right: Box<Expr>, +} + +#[derive(Debug)] +pub struct Grouping(pub Box<Expr>); + +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + Boolean(bool), + Number(f64), + String(String), + Nil, +} + +#[derive(Debug)] +pub struct Unary { + pub operator: Token, + pub right: Box<Expr>, +} + +#[derive(Debug)] +pub struct Call { + pub callee: Box<Expr>, + pub paren: Token, + pub args: Vec<Expr>, +} + +// Not to be confused with `Var`, which is for assignment. +#[derive(Debug)] +pub struct Variable { + pub name: Token, + pub depth: Option<usize>, +} + +#[derive(Debug)] +pub enum Expr { + Assign(Assign), + Binary(Binary), + Grouping(Grouping), + Literal(Literal), + Unary(Unary), + Call(Call), + Variable(Variable), + Logical(Logical), +} + +// Variable assignment. Not to be confused with `Variable`, which is +// for access. +#[derive(Debug)] +pub struct Var { + pub name: Token, + pub initialiser: Option<Expr>, +} + +#[derive(Debug)] +pub struct Return { + pub value: Expr, +} + +#[derive(Debug)] +pub struct If { + pub condition: Expr, + pub then_branch: Box<Statement>, + pub else_branch: Option<Box<Statement>>, +} + +#[derive(Debug)] +pub struct While { + pub condition: Expr, + pub body: Box<Statement>, +} + +pub type Block = Vec<Statement>; + +#[derive(Debug)] +pub struct Function { + pub name: Token, + pub params: Vec<Token>, + pub body: Block, +} + +#[derive(Debug)] +pub enum Statement { + Expr(Expr), + Print(Expr), + Var(Var), + Block(Block), + If(If), + While(While), + Function(Rc<Function>), + Return(Return), +} + +// Parser + +/* +program → declaration* EOF ; + +declaration → funDecl + | varDecl + | statement ; + +funDecl → "fun" function ; +function → IDENTIFIER "(" parameters? ")" block ; +parameters → IDENTIFIER ( "," IDENTIFIER )* ; + + +statement → exprStmt + | forStmt + | ifStmt + | printStmt + | returnStmt + | whileStmt + | block ; + +forStmt → "for" "(" ( varDecl | exprStmt | ";" ) + expression? ";" + expression? ")" statement ; + +returnStmt → "return" expression? ";" ; + +whileStmt → "while" "(" expression ")" statement ; + +exprStmt → expression ";" ; + +ifStmt → "if" "(" expression ")" statement + ( "else" statement )? ; + +printStmt → "print" expression ";" ; + +expression → assignment ; +assignment → IDENTIFIER "=" assignment + | logic_or ; +logic_or → logic_and ( "or" logic_and )* ; +logic_and → equality ( "and" equality )* ; +equality → comparison ( ( "!=" | "==" ) comparison )* ; +comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ; +term → factor ( ( "-" | "+" ) factor )* ; +factor → unary ( ( "/" | "*" ) unary )* ; +unary → ( "!" | "-" ) unary | call ; +call → primary ( "(" arguments? ")" )* ; +arguments → expression ( "," expression )* ; +primary → NUMBER | STRING | "true" | "false" | "nil" + | "(" expression ")" ; +*/ + +struct Parser { + tokens: Vec<Token>, + current: usize, +} + +type ExprResult = Result<Expr, Error>; +type StmtResult = Result<Statement, Error>; + +impl Parser { + // recursive-descent parser functions + + fn declaration(&mut self) -> StmtResult { + if self.match_token(&TokenKind::Fun) { + return self.function(); + } + + if self.match_token(&TokenKind::Var) { + return self.var_declaration(); + } + + self.statement() + } + + fn function(&mut self) -> StmtResult { + let name = self.identifier("Expected function name.")?; + + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expect '(' after function name."), + )?; + + let mut params = vec![]; + + if !self.check_token(&TokenKind::RightParen) { + loop { + if params.len() >= 255 { + return Err(Error { + line: self.peek().line, + kind: ErrorKind::InternalError("255 parameter limit exceeded.".into()), + }); + } + + params.push(self.identifier("Expected parameter name.")?); + + if !self.match_token(&TokenKind::Comma) { + break; + } + } + } + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expect ')' after parameters."), + )?; + + self.consume( + &TokenKind::LeftBrace, + ErrorKind::ExpectedToken("Expect '{' before function body."), + )?; + + Ok(Statement::Function(Rc::new(Function { + name, + params, + body: self.block_statement()?, + }))) + } + + fn var_declaration(&mut self) -> StmtResult { + // Since `TokenKind::Identifier` carries data, we can't use + // `consume`. + let mut var = Var { + name: self.identifier("Expected variable name.")?, + initialiser: None, + }; + + if self.match_token(&TokenKind::Equal) { + var.initialiser = Some(self.expression()?); + } + + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Var(var)) + } + + fn statement(&mut self) -> StmtResult { + if self.match_token(&TokenKind::Print) { + self.print_statement() + } else if self.match_token(&TokenKind::LeftBrace) { + Ok(Statement::Block(self.block_statement()?)) + } else if self.match_token(&TokenKind::If) { + self.if_statement() + } else if self.match_token(&TokenKind::While) { + self.while_statement() + } else if self.match_token(&TokenKind::For) { + self.for_statement() + } else if self.match_token(&TokenKind::Return) { + self.return_statement() + } else { + self.expr_statement() + } + } + + fn print_statement(&mut self) -> StmtResult { + let expr = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Print(expr)) + } + + fn block_statement(&mut self) -> Result<Block, Error> { + let mut block: Block = vec![]; + + while !self.check_token(&TokenKind::RightBrace) && !self.is_at_end() { + block.push(self.declaration()?); + } + + self.consume(&TokenKind::RightBrace, ErrorKind::ExpectedClosingBrace)?; + + Ok(block) + } + + fn if_statement(&mut self) -> StmtResult { + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'if'"), + )?; + let condition = self.expression()?; + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after condition"), + )?; + + let then_branch = Box::new(self.statement()?); + + let mut stmt = If { + condition, + then_branch, + else_branch: Option::None, + }; + + if self.match_token(&TokenKind::Else) { + stmt.else_branch = Some(Box::new(self.statement()?)); + } + + Ok(Statement::If(stmt)) + } + + fn while_statement(&mut self) -> StmtResult { + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'while'"), + )?; + + let condition = self.expression()?; + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after 'while'"), + )?; + + Ok(Statement::While(While { + condition, + body: Box::new(self.statement()?), + })) + } + + fn for_statement(&mut self) -> StmtResult { + // Parsing of clauses ... + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'for'"), + )?; + + let initialiser = if self.match_token(&TokenKind::Semicolon) { + None + } else if self.match_token(&TokenKind::Var) { + Some(self.var_declaration()?) + } else { + Some(self.expr_statement()?) + }; + + let condition = if self.check_token(&TokenKind::Semicolon) { + // unspecified condition => infinite loop + Expr::Literal(Literal::Boolean(true)) + } else { + self.expression()? + }; + + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + + let increment = if self.check_token(&TokenKind::RightParen) { + None + } else { + Some(self.expression()?) + }; + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after for clauses"), + )?; + + let mut body = self.statement()?; + + // ... desugaring to while + + if let Some(inc) = increment { + body = Statement::Block(vec![body, Statement::Expr(inc)]); + } + + body = Statement::While(While { + condition, + body: Box::new(body), + }); + + if let Some(init) = initialiser { + body = Statement::Block(vec![init, body]); + } + + Ok(body) + } + + fn return_statement(&mut self) -> StmtResult { + let value = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Return(Return { value })) + } + + fn expr_statement(&mut self) -> StmtResult { + let expr = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Expr(expr)) + } + + fn expression(&mut self) -> ExprResult { + self.assignment() + } + + fn assignment(&mut self) -> ExprResult { + let expr = self.logic_or()?; + + if self.match_token(&TokenKind::Equal) { + let equals = self.previous().clone(); + let value = self.assignment()?; + + if let Expr::Variable(Variable { name, .. }) = expr { + return Ok(Expr::Assign(Assign { + name, + value: Box::new(value), + depth: None, + })); + } + + return Err(Error { + line: equals.line, + kind: ErrorKind::InvalidAssignmentTarget(format!("{:?}", equals)), + }); + } + + Ok(expr) + } + + fn logic_or(&mut self) -> ExprResult { + let mut expr = self.logic_and()?; + + while self.match_token(&TokenKind::Or) { + expr = Expr::Logical(Logical { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(self.logic_and()?), + }) + } + + Ok(expr) + } + + fn logic_and(&mut self) -> ExprResult { + let mut expr = self.equality()?; + + while self.match_token(&TokenKind::And) { + expr = Expr::Logical(Logical { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(self.equality()?), + }) + } + + Ok(expr) + } + + fn equality(&mut self) -> ExprResult { + self.binary_operator( + &[TokenKind::BangEqual, TokenKind::EqualEqual], + Self::comparison, + ) + } + + fn comparison(&mut self) -> ExprResult { + self.binary_operator( + &[ + TokenKind::Greater, + TokenKind::GreaterEqual, + TokenKind::Less, + TokenKind::LessEqual, + ], + Self::term, + ) + } + + fn term(&mut self) -> ExprResult { + self.binary_operator(&[TokenKind::Minus, TokenKind::Plus], Self::factor) + } + + fn factor(&mut self) -> ExprResult { + self.binary_operator(&[TokenKind::Slash, TokenKind::Star], Self::unary) + } + + fn unary(&mut self) -> ExprResult { + if self.match_token(&TokenKind::Bang) || self.match_token(&TokenKind::Minus) { + return Ok(Expr::Unary(Unary { + operator: self.previous().clone(), + right: Box::new(self.unary()?), + })); + } + + return self.call(); + } + + fn call(&mut self) -> ExprResult { + let mut expr = self.primary()?; + + loop { + if self.match_token(&TokenKind::LeftParen) { + expr = self.finish_call(expr)?; + } else { + break; + } + } + + Ok(expr) + } + + fn finish_call(&mut self, callee: Expr) -> ExprResult { + let mut args = vec![]; + + if !self.check_token(&TokenKind::RightParen) { + loop { + // TODO(tazjin): Check for max args count + args.push(self.expression()?); + if !self.match_token(&TokenKind::Comma) { + break; + } + } + } + + let paren = self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expect ')' after arguments."), + )?; + + Ok(Expr::Call(Call { + args, + callee: Box::new(callee), + paren, + })) + } + + fn primary(&mut self) -> ExprResult { + let next = self.advance(); + let literal = match next.kind { + TokenKind::True => Literal::Boolean(true), + TokenKind::False => Literal::Boolean(false), + TokenKind::Nil => Literal::Nil, + TokenKind::Number(num) => Literal::Number(num), + TokenKind::String(string) => Literal::String(string), + + TokenKind::LeftParen => { + let expr = self.expression()?; + self.consume(&TokenKind::RightParen, ErrorKind::UnmatchedParens)?; + return Ok(Expr::Grouping(Grouping(Box::new(expr)))); + } + + TokenKind::Identifier(_) => { + return Ok(Expr::Variable(Variable { + name: next, + depth: None, + })) + } + + unexpected => { + eprintln!("encountered {:?}", unexpected); + return Err(Error { + line: next.line, + kind: ErrorKind::ExpectedExpression(next.lexeme), + }); + } + }; + + Ok(Expr::Literal(literal)) + } + + // internal helpers + + fn identifier(&mut self, err: &'static str) -> Result<Token, Error> { + if let TokenKind::Identifier(_) = self.peek().kind { + Ok(self.advance()) + } else { + Err(Error { + line: self.peek().line, + kind: ErrorKind::ExpectedToken(err), + }) + } + } + + /// Check if the next token is in `oneof`, and advance if it is. + fn match_token(&mut self, token: &TokenKind) -> bool { + if self.check_token(token) { + self.advance(); + return true; + } + + false + } + + /// Return the next token and advance parser state. + fn advance(&mut self) -> Token { + if !self.is_at_end() { + self.current += 1; + } + + return self.previous().clone(); + } + + fn is_at_end(&self) -> bool { + self.check_token(&TokenKind::Eof) + } + + /// Is the next token `token`? + fn check_token(&self, token: &TokenKind) -> bool { + self.peek().kind == *token + } + + fn peek(&self) -> &Token { + &self.tokens[self.current] + } + + fn previous(&self) -> &Token { + &self.tokens[self.current - 1] + } + + fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<Token, Error> { + if self.check_token(kind) { + return Ok(self.advance()); + } + + Err(Error { + line: self.peek().line, + kind: err, + }) + } + + fn synchronise(&mut self) { + self.advance(); + + while !self.is_at_end() { + if self.previous().kind == TokenKind::Semicolon { + return; + } + + match self.peek().kind { + TokenKind::Class + | TokenKind::Fun + | TokenKind::Var + | TokenKind::For + | TokenKind::If + | TokenKind::While + | TokenKind::Print + | TokenKind::Return => return, + + _ => { + self.advance(); + } + } + } + } + + fn binary_operator( + &mut self, + oneof: &[TokenKind], + each: fn(&mut Parser) -> ExprResult, + ) -> ExprResult { + let mut expr = each(self)?; + + while oneof.iter().any(|t| self.match_token(t)) { + expr = Expr::Binary(Binary { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(each(self)?), + }) + } + + return Ok(expr); + } +} + +pub fn parse(tokens: Vec<Token>) -> Result<Block, Vec<Error>> { + let mut parser = Parser { tokens, current: 0 }; + let mut program: Block = vec![]; + let mut errors: Vec<Error> = vec![]; + + while !parser.is_at_end() { + match parser.declaration() { + Err(err) => { + errors.push(err); + parser.synchronise(); + } + Ok(decl) => { + program.push(decl); + } + } + } + + if errors.is_empty() { + Ok(program) + } else { + Err(errors) + } +} |