feat(tazjin/rlox): Bootstrap recursive-descent parser for Lox r/1966

... mostly some AST boilerplate and a first top-level rule, plus boilerplate similar to that set up in the Scanner. Change-Id: I605d1de23c47a3b3702ab4f62cd3371bc3988c7d Reviewed-on: https://cl.tvl.fyi/c/depot/+/2194 Reviewed-by: tazjin <mail@tazj.in> Tested-by: BuildkiteCI
author: Vincent Ambo <mail@tazj.in> 2020-11-28T18·53+0100
committer: tazjin <mail@tazj.in> 2020-11-30T16·42+0000
commit: 349583d5a96dc182eda1776a94fcb4b80f4da873 (patch)
tree: 3414de3fb83ec3455f6f396df377a00edc2652dd /users/tazjin/rlox/src/parser.rs
parent: 754edb46160609c2a6387d20a136ec6766e24139 (diff)
1 files changed, 102 insertions, 0 deletions
diff --git a/users/tazjin/rlox/src/parser.rs b/users/tazjin/rlox/src/parser.rs
new file mode 100644
index 000000000000..faac88009828
--- /dev/null
+++ b/users/tazjin/rlox/src/parser.rs
@@ -0,0 +1,102 @@
+// This implements the grammar of Lox as described starting in the
+// Crafting Interpreters chapter "Representing Code". Note that the
+// upstream Java implementation works about Java being bad at value
+// classes by writing a code generator for Java.
+//
+// My Rust implementation skips this step because it's unnecessary, we
+// have real types.
+use crate::scanner::{Token, TokenKind};
+
+// AST
+
+#[derive(Debug)]
+struct Binary<'a> {
+    left: Box<Expr<'a>>,
+    right: Box<Expr<'a>>,
+    operator: Token<'a>,
+}
+
+#[derive(Debug)]
+struct Grouping<'a>(Box<Expr<'a>>);
+
+#[derive(Debug)]
+struct Literal(TokenKind);
+
+#[derive(Debug)]
+enum Expr<'a> {
+    Binary(Binary<'a>),
+    Grouping(Grouping<'a>),
+    Literal(Literal),
+}
+
+// Parser
+
+/*
+expression     → equality ;
+equality       → comparison ( ( "!=" | "==" ) comparison )* ;
+comparison     → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
+term           → factor ( ( "-" | "+" ) factor )* ;
+factor         → unary ( ( "/" | "*" ) unary )* ;
+unary          → ( "!" | "-" ) unary
+               | primary ;
+primary        → NUMBER | STRING | "true" | "false" | "nil"
+               | "(" expression ")" ;
+*/
+
+struct Parser<'a> {
+    tokens: Vec<Token<'a>>,
+    current: usize,
+}
+
+impl<'a> Parser<'a> {
+    // recursive-descent parser functions
+
+    fn expression(&mut self) -> Expr<'a> {
+        self.equality()
+    }
+
+    fn equality(&mut self) -> Expr<'a> {
+        let expr = self.comparison();
+        unimplemented!()
+    }
+
+    fn comparison(&mut self) -> Expr<'a> {
+        unimplemented!()
+    }
+
+    // internal helpers
+    fn match_token(&mut self, oneof: &[TokenKind]) -> bool {
+        for token in oneof {
+            if self.check_token(token) {
+                self.advance();
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    fn advance(&mut self) -> &Token {
+        if !self.is_at_end() {
+            self.current += 1;
+        }
+
+        return self.previous();
+    }
+
+    fn is_at_end(&self) -> bool {
+        self.check_token(&TokenKind::Eof)
+    }
+
+    fn check_token(&self, token: &TokenKind) -> bool {
+        self.peek().kind == *token
+    }
+
+    fn peek(&self) -> &Token {
+        &self.tokens[self.current]
+    }
+
+    fn previous(&self) -> &Token {
+        &self.tokens[self.current - 1]
+    }
+}
author	Vincent Ambo <mail@tazj.in>	2020-11-28T18·53+0100
committer	tazjin <mail@tazj.in>	2020-11-30T16·42+0000
commit	349583d5a96dc182eda1776a94fcb4b80f4da873 (patch)
tree	3414de3fb83ec3455f6f396df377a00edc2652dd /users/tazjin/rlox/src/parser.rs
parent	754edb46160609c2a6387d20a136ec6766e24139 (diff)