about summary refs log tree commit diff
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2020-12-06T13·21+0100
committertazjin <mail@tazj.in>2020-12-06T14·30+0000
commit5fcff11eae107b76177a581dc604ac0408213b02 (patch)
treea17e08bfd4a55017c9a578b1cddf623080b97c4c
parenta8371b01df1b1d16f682e6685cc62fcba4bc54d2 (diff)
feat(tazjin/rlox): Implement parsing up to unary expressions r/1987
... with the exception of parenthesised expressions, because error
threading is not implemented yet.

Change-Id: I8d455d85e647548d5b71cbfd3d078f4970dab7fb
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2232
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
-rw-r--r--users/tazjin/rlox/src/parser.rs114
-rw-r--r--users/tazjin/rlox/src/scanner.rs10
2 files changed, 109 insertions, 15 deletions
diff --git a/users/tazjin/rlox/src/parser.rs b/users/tazjin/rlox/src/parser.rs
index faac880098..bc253971eb 100644
--- a/users/tazjin/rlox/src/parser.rs
+++ b/users/tazjin/rlox/src/parser.rs
@@ -1,6 +1,6 @@
 // This implements the grammar of Lox as described starting in the
 // Crafting Interpreters chapter "Representing Code". Note that the
-// upstream Java implementation works about Java being bad at value
+// upstream Java implementation works around Java being bad at value
 // classes by writing a code generator for Java.
 //
 // My Rust implementation skips this step because it's unnecessary, we
@@ -12,21 +12,33 @@ use crate::scanner::{Token, TokenKind};
 #[derive(Debug)]
 struct Binary<'a> {
     left: Box<Expr<'a>>,
-    right: Box<Expr<'a>>,
     operator: Token<'a>,
+    right: Box<Expr<'a>>,
 }
 
 #[derive(Debug)]
 struct Grouping<'a>(Box<Expr<'a>>);
 
 #[derive(Debug)]
-struct Literal(TokenKind);
+enum Literal {
+    Boolean(bool),
+    Number(f64),
+    String(String),
+    Nil,
+}
+
+#[derive(Debug)]
+struct Unary<'a> {
+    operator: Token<'a>,
+    right: Box<Expr<'a>>,
+}
 
 #[derive(Debug)]
 enum Expr<'a> {
     Binary(Binary<'a>),
     Grouping(Grouping<'a>),
     Literal(Literal),
+    Unary(Unary<'a>),
 }
 
 // Parser
@@ -56,15 +68,76 @@ impl<'a> Parser<'a> {
     }
 
     fn equality(&mut self) -> Expr<'a> {
-        let expr = self.comparison();
-        unimplemented!()
+        self.binary_operator(
+            &[TokenKind::BangEqual, TokenKind::EqualEqual],
+            Self::comparison,
+            Self::comparison,
+        )
     }
 
     fn comparison(&mut self) -> Expr<'a> {
-        unimplemented!()
+        self.binary_operator(
+            &[
+                TokenKind::Greater,
+                TokenKind::GreaterEqual,
+                TokenKind::Less,
+                TokenKind::LessEqual,
+            ],
+            Self::term,
+            Self::term,
+        )
+    }
+
+    fn term(&mut self) -> Expr<'a> {
+        self.binary_operator(
+            &[TokenKind::Minus, TokenKind::Plus],
+            Self::factor,
+            Self::factor,
+        )
+    }
+
+    fn factor(&mut self) -> Expr<'a> {
+        self.binary_operator(
+            &[TokenKind::Slash, TokenKind::Star],
+            Self::unary,
+            Self::unary,
+        )
+    }
+
+    fn unary(&mut self) -> Expr<'a> {
+        if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) {
+            return Expr::Unary(Unary {
+                operator: self.previous(),
+                right: Box::new(self.unary()),
+            });
+        }
+
+        return self.primary();
+    }
+
+    fn primary(&mut self) -> Expr<'a> {
+        let next = self.advance();
+        let literal = match next.kind {
+            TokenKind::True => Literal::Boolean(true),
+            TokenKind::False => Literal::Boolean(false),
+            TokenKind::Nil => Literal::Nil,
+            TokenKind::Number(num) => Literal::Number(num),
+            TokenKind::String(string) => Literal::String(string),
+
+            TokenKind::LeftParen => {
+                unimplemented!("need error handling to deal with unbalanced parens");
+            }
+
+            // This branch indicates a parser bug, not invalid input.
+            unexpected => panic!("Parser encountered unexpected token '{:?}'", unexpected),
+        };
+
+        Expr::Literal(literal)
     }
 
     // internal helpers
+
+    /// Check if the next token is in `oneof`, and advance if it is.
     fn match_token(&mut self, oneof: &[TokenKind]) -> bool {
         for token in oneof {
             if self.check_token(token) {
@@ -76,7 +149,8 @@ impl<'a> Parser<'a> {
         return false;
     }
 
-    fn advance(&mut self) -> &Token {
+    /// Return the next token and advance parser state.
+    fn advance(&mut self) -> Token<'a> {
         if !self.is_at_end() {
             self.current += 1;
         }
@@ -88,15 +162,35 @@ impl<'a> Parser<'a> {
         self.check_token(&TokenKind::Eof)
     }
 
+    /// Is the next token `token`?
     fn check_token(&self, token: &TokenKind) -> bool {
         self.peek().kind == *token
     }
 
-    fn peek(&self) -> &Token {
+    fn peek(&self) -> &Token<'a> {
         &self.tokens[self.current]
     }
 
-    fn previous(&self) -> &Token {
-        &self.tokens[self.current - 1]
+    fn previous(&self) -> Token<'a> {
+        self.tokens[self.current - 1].clone()
+    }
+
+    fn binary_operator(
+        &mut self,
+        oneof: &[TokenKind],
+        left: fn(&mut Parser<'a>) -> Expr<'a>,
+        right: fn(&mut Parser<'a>) -> Expr<'a>,
+    ) -> Expr<'a> {
+        let mut expr = left(self);
+
+        while self.match_token(oneof) {
+            expr = Expr::Binary(Binary {
+                left: Box::new(expr),
+                operator: self.previous(),
+                right: Box::new(right(self)),
+            })
+        }
+
+        return expr;
     }
 }
diff --git a/users/tazjin/rlox/src/scanner.rs b/users/tazjin/rlox/src/scanner.rs
index eeb247d9bd..fde4397531 100644
--- a/users/tazjin/rlox/src/scanner.rs
+++ b/users/tazjin/rlox/src/scanner.rs
@@ -1,6 +1,6 @@
 use crate::errors::{Error, ErrorKind};
 
-#[derive(Debug, PartialEq)]
+#[derive(Clone, Debug, PartialEq)]
 pub enum TokenKind {
     // Single-character tokens.
     LeftParen,
@@ -29,22 +29,22 @@ pub enum TokenKind {
     Identifier(String),
     String(String),
     Number(f64),
+    True,
+    False,
+    Nil,
 
     // Keywords.
     And,
     Class,
     Else,
-    False,
     Fun,
     For,
     If,
-    Nil,
     Or,
     Print,
     Return,
     Super,
     This,
-    True,
     Var,
     While,
 
@@ -52,7 +52,7 @@ pub enum TokenKind {
     Eof,
 }
 
-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct Token<'a> {
     pub kind: TokenKind,
     pub lexeme: &'a [char],