From 42405bfa249c3e6ec5e2b7d8fd9836b55269e68f Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Sun, 6 Dec 2020 18:03:09 +0100 Subject: feat(tazjin/rlox): Synchronise parser state after errors This lets the parser collect multiple errors instead of returning after the first one, with some optimistic synchronisation after encountering something that looks wonky. Change-Id: Ie9d0ce8de9dcc7a3d1e7aa2abe15f74cab0ab96b Reviewed-on: https://cl.tvl.fyi/c/depot/+/2236 Reviewed-by: tazjin Tested-by: BuildkiteCI --- users/tazjin/rlox/src/errors.rs | 1 + users/tazjin/rlox/src/interpreter.rs | 2 +- users/tazjin/rlox/src/parser.rs | 74 +++++++++++++++++++++++++++++++----- 3 files changed, 67 insertions(+), 10 deletions(-) diff --git a/users/tazjin/rlox/src/errors.rs b/users/tazjin/rlox/src/errors.rs index 6bd922bc6c..9ea303829e 100644 --- a/users/tazjin/rlox/src/errors.rs +++ b/users/tazjin/rlox/src/errors.rs @@ -3,6 +3,7 @@ pub enum ErrorKind { UnexpectedChar(char), UnterminatedString, UnmatchedParens, + ExpectedExpression(String), } #[derive(Debug)] diff --git a/users/tazjin/rlox/src/interpreter.rs b/users/tazjin/rlox/src/interpreter.rs index 8a4d5cfef0..7c5a18dd9a 100644 --- a/users/tazjin/rlox/src/interpreter.rs +++ b/users/tazjin/rlox/src/interpreter.rs @@ -11,7 +11,7 @@ pub fn run(code: &str) { print_tokens(&tokens); match parser::parse(tokens) { Ok(expr) => println!("Expression:\n{:?}", expr), - Err(error) => report_errors(vec![error]), + Err(errors) => report_errors(errors), } } Err(errors) => report_errors(errors), diff --git a/users/tazjin/rlox/src/parser.rs b/users/tazjin/rlox/src/parser.rs index cf96981e5b..0d5f72fe4d 100644 --- a/users/tazjin/rlox/src/parser.rs +++ b/users/tazjin/rlox/src/parser.rs @@ -100,7 +100,7 @@ impl<'a> Parser<'a> { fn unary(&mut self) -> ExprResult<'a> { if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) { return Ok(Expr::Unary(Unary { - operator: self.previous(), + operator: self.previous().clone(), right: Box::new(self.unary()?), })); } @@ -123,8 +123,13 @@ impl<'a> Parser<'a> { return Ok(Expr::Grouping(Grouping(Box::new(expr)))); } - // This branch indicates a parser bug, not invalid input. - unexpected => panic!("Parser encountered unexpected token '{:?}'", unexpected), + unexpected => { + eprintln!("encountered {:?}", unexpected); + return Err(Error { + line: next.line, + kind: ErrorKind::ExpectedExpression(next.lexeme.into_iter().collect()), + }); + } }; Ok(Expr::Literal(literal)) @@ -150,7 +155,7 @@ impl<'a> Parser<'a> { self.current += 1; } - return self.previous(); + return self.previous().clone(); } fn is_at_end(&self) -> bool { @@ -166,8 +171,8 @@ impl<'a> Parser<'a> { &self.tokens[self.current] } - fn previous(&self) -> Token<'a> { - self.tokens[self.current - 1].clone() + fn previous(&self) -> &Token<'a> { + &self.tokens[self.current - 1] } fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<(), Error> { @@ -182,6 +187,31 @@ impl<'a> Parser<'a> { }) } + fn synchronise(&mut self) { + self.advance(); + + while !self.is_at_end() { + if self.previous().kind == TokenKind::Semicolon { + return; + } + + match self.peek().kind { + TokenKind::Class + | TokenKind::Fun + | TokenKind::Var + | TokenKind::For + | TokenKind::If + | TokenKind::While + | TokenKind::Print + | TokenKind::Return => return, + + _ => { + self.advance(); + } + } + } + } + fn binary_operator( &mut self, oneof: &[TokenKind], @@ -192,7 +222,7 @@ impl<'a> Parser<'a> { while self.match_token(oneof) { expr = Expr::Binary(Binary { left: Box::new(expr), - operator: self.previous(), + operator: self.previous().clone(), right: Box::new(each(self)?), }) } @@ -201,8 +231,34 @@ impl<'a> Parser<'a> { } } -pub fn parse<'a>(tokens: Vec>) -> ExprResult<'a> { +pub fn parse<'a>(tokens: Vec>) -> Result, Vec> { let mut parser = Parser { tokens, current: 0 }; + let mut errors: Vec = vec![]; + + while !parser.is_at_end() { + match parser.expression() { + Err(err) => { + errors.push(err); + parser.synchronise(); + } + Ok(expr) => { + if !parser.is_at_end() { + // TODO(tazjin): This isn't a functional language + // - multiple statements should be allowed, at + // some point. + let current = &parser.tokens[parser.current]; + errors.push(Error { + line: current.line, + kind: ErrorKind::UnexpectedChar(current.lexeme[0]), + }); + } + + if errors.is_empty() { + return Ok(expr); + } + } + } + } - parser.expression() + return Err(errors); } -- cgit 1.4.1