about summary refs log tree commit diff
path: root/users/tazjin/rlox
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2020-12-06T17·03+0100
committertazjin <mail@tazj.in>2020-12-06T17·34+0000
commit42405bfa249c3e6ec5e2b7d8fd9836b55269e68f (patch)
treef27859483b4679b91011c71c44818da83b0c0f8b /users/tazjin/rlox
parent1835b2be990f51f4111c847aa8ad3c8477191eba (diff)
feat(tazjin/rlox): Synchronise parser state after errors r/1991
This lets the parser collect multiple errors instead of returning
after the first one, with some optimistic synchronisation after
encountering something that looks wonky.

Change-Id: Ie9d0ce8de9dcc7a3d1e7aa2abe15f74cab0ab96b
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2236
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/tazjin/rlox')
-rw-r--r--users/tazjin/rlox/src/errors.rs1
-rw-r--r--users/tazjin/rlox/src/interpreter.rs2
-rw-r--r--users/tazjin/rlox/src/parser.rs74
3 files changed, 67 insertions, 10 deletions
diff --git a/users/tazjin/rlox/src/errors.rs b/users/tazjin/rlox/src/errors.rs
index 6bd922bc6c..9ea303829e 100644
--- a/users/tazjin/rlox/src/errors.rs
+++ b/users/tazjin/rlox/src/errors.rs
@@ -3,6 +3,7 @@ pub enum ErrorKind {
     UnexpectedChar(char),
     UnterminatedString,
     UnmatchedParens,
+    ExpectedExpression(String),
 }
 
 #[derive(Debug)]
diff --git a/users/tazjin/rlox/src/interpreter.rs b/users/tazjin/rlox/src/interpreter.rs
index 8a4d5cfef0..7c5a18dd9a 100644
--- a/users/tazjin/rlox/src/interpreter.rs
+++ b/users/tazjin/rlox/src/interpreter.rs
@@ -11,7 +11,7 @@ pub fn run(code: &str) {
             print_tokens(&tokens);
             match parser::parse(tokens) {
                 Ok(expr) => println!("Expression:\n{:?}", expr),
-                Err(error) => report_errors(vec![error]),
+                Err(errors) => report_errors(errors),
             }
         }
         Err(errors) => report_errors(errors),
diff --git a/users/tazjin/rlox/src/parser.rs b/users/tazjin/rlox/src/parser.rs
index cf96981e5b..0d5f72fe4d 100644
--- a/users/tazjin/rlox/src/parser.rs
+++ b/users/tazjin/rlox/src/parser.rs
@@ -100,7 +100,7 @@ impl<'a> Parser<'a> {
     fn unary(&mut self) -> ExprResult<'a> {
         if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) {
             return Ok(Expr::Unary(Unary {
-                operator: self.previous(),
+                operator: self.previous().clone(),
                 right: Box::new(self.unary()?),
             }));
         }
@@ -123,8 +123,13 @@ impl<'a> Parser<'a> {
                 return Ok(Expr::Grouping(Grouping(Box::new(expr))));
             }
 
-            // This branch indicates a parser bug, not invalid input.
-            unexpected => panic!("Parser encountered unexpected token '{:?}'", unexpected),
+            unexpected => {
+                eprintln!("encountered {:?}", unexpected);
+                return Err(Error {
+                    line: next.line,
+                    kind: ErrorKind::ExpectedExpression(next.lexeme.into_iter().collect()),
+                });
+            }
         };
 
         Ok(Expr::Literal(literal))
@@ -150,7 +155,7 @@ impl<'a> Parser<'a> {
             self.current += 1;
         }
 
-        return self.previous();
+        return self.previous().clone();
     }
 
     fn is_at_end(&self) -> bool {
@@ -166,8 +171,8 @@ impl<'a> Parser<'a> {
         &self.tokens[self.current]
     }
 
-    fn previous(&self) -> Token<'a> {
-        self.tokens[self.current - 1].clone()
+    fn previous(&self) -> &Token<'a> {
+        &self.tokens[self.current - 1]
     }
 
     fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<(), Error> {
@@ -182,6 +187,31 @@ impl<'a> Parser<'a> {
         })
     }
 
+    fn synchronise(&mut self) {
+        self.advance();
+
+        while !self.is_at_end() {
+            if self.previous().kind == TokenKind::Semicolon {
+                return;
+            }
+
+            match self.peek().kind {
+                TokenKind::Class
+                | TokenKind::Fun
+                | TokenKind::Var
+                | TokenKind::For
+                | TokenKind::If
+                | TokenKind::While
+                | TokenKind::Print
+                | TokenKind::Return => return,
+
+                _ => {
+                    self.advance();
+                }
+            }
+        }
+    }
+
     fn binary_operator(
         &mut self,
         oneof: &[TokenKind],
@@ -192,7 +222,7 @@ impl<'a> Parser<'a> {
         while self.match_token(oneof) {
             expr = Expr::Binary(Binary {
                 left: Box::new(expr),
-                operator: self.previous(),
+                operator: self.previous().clone(),
                 right: Box::new(each(self)?),
             })
         }
@@ -201,8 +231,34 @@ impl<'a> Parser<'a> {
     }
 }
 
-pub fn parse<'a>(tokens: Vec<Token<'a>>) -> ExprResult<'a> {
+pub fn parse<'a>(tokens: Vec<Token<'a>>) -> Result<Expr<'a>, Vec<Error>> {
     let mut parser = Parser { tokens, current: 0 };
+    let mut errors: Vec<Error> = vec![];
+
+    while !parser.is_at_end() {
+        match parser.expression() {
+            Err(err) => {
+                errors.push(err);
+                parser.synchronise();
+            }
+            Ok(expr) => {
+                if !parser.is_at_end() {
+                    // TODO(tazjin): This isn't a functional language
+                    // - multiple statements should be allowed, at
+                    // some point.
+                    let current = &parser.tokens[parser.current];
+                    errors.push(Error {
+                        line: current.line,
+                        kind: ErrorKind::UnexpectedChar(current.lexeme[0]),
+                    });
+                }
+
+                if errors.is_empty() {
+                    return Ok(expr);
+                }
+            }
+        }
+    }
 
-    parser.expression()
+    return Err(errors);
 }