about summary refs log tree commit diff
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2020-11-28T18·53+0100
committertazjin <mail@tazj.in>2020-11-30T16·42+0000
commit349583d5a96dc182eda1776a94fcb4b80f4da873 (patch)
tree3414de3fb83ec3455f6f396df377a00edc2652dd
parent754edb46160609c2a6387d20a136ec6766e24139 (diff)
feat(tazjin/rlox): Bootstrap recursive-descent parser for Lox r/1966
... mostly some AST boilerplate and a first top-level rule, plus
boilerplate similar to that set up in the Scanner.

Change-Id: I605d1de23c47a3b3702ab4f62cd3371bc3988c7d
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2194
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
-rw-r--r--users/tazjin/rlox/src/main.rs1
-rw-r--r--users/tazjin/rlox/src/parser.rs102
-rw-r--r--users/tazjin/rlox/src/scanner.rs9
3 files changed, 107 insertions, 5 deletions
diff --git a/users/tazjin/rlox/src/main.rs b/users/tazjin/rlox/src/main.rs
index 2e0d692dba..5caac3f1ae 100644
--- a/users/tazjin/rlox/src/main.rs
+++ b/users/tazjin/rlox/src/main.rs
@@ -6,6 +6,7 @@ use std::process;
 
 mod errors;
 mod interpreter;
+mod parser;
 mod scanner;
 
 fn main() {
diff --git a/users/tazjin/rlox/src/parser.rs b/users/tazjin/rlox/src/parser.rs
new file mode 100644
index 0000000000..faac880098
--- /dev/null
+++ b/users/tazjin/rlox/src/parser.rs
@@ -0,0 +1,102 @@
+// This implements the grammar of Lox as described starting in the
+// Crafting Interpreters chapter "Representing Code". Note that the
+// upstream Java implementation works about Java being bad at value
+// classes by writing a code generator for Java.
+//
+// My Rust implementation skips this step because it's unnecessary, we
+// have real types.
+use crate::scanner::{Token, TokenKind};
+
+// AST
+
+#[derive(Debug)]
+struct Binary<'a> {
+    left: Box<Expr<'a>>,
+    right: Box<Expr<'a>>,
+    operator: Token<'a>,
+}
+
+#[derive(Debug)]
+struct Grouping<'a>(Box<Expr<'a>>);
+
+#[derive(Debug)]
+struct Literal(TokenKind);
+
+#[derive(Debug)]
+enum Expr<'a> {
+    Binary(Binary<'a>),
+    Grouping(Grouping<'a>),
+    Literal(Literal),
+}
+
+// Parser
+
+/*
+expression     → equality ;
+equality       → comparison ( ( "!=" | "==" ) comparison )* ;
+comparison     → term ( ( ">" | ">=" | "<" | "<=" ) term )* ;
+term           → factor ( ( "-" | "+" ) factor )* ;
+factor         → unary ( ( "/" | "*" ) unary )* ;
+unary          → ( "!" | "-" ) unary
+               | primary ;
+primary        → NUMBER | STRING | "true" | "false" | "nil"
+               | "(" expression ")" ;
+*/
+
+struct Parser<'a> {
+    tokens: Vec<Token<'a>>,
+    current: usize,
+}
+
+impl<'a> Parser<'a> {
+    // recursive-descent parser functions
+
+    fn expression(&mut self) -> Expr<'a> {
+        self.equality()
+    }
+
+    fn equality(&mut self) -> Expr<'a> {
+        let expr = self.comparison();
+        unimplemented!()
+    }
+
+    fn comparison(&mut self) -> Expr<'a> {
+        unimplemented!()
+    }
+
+    // internal helpers
+    fn match_token(&mut self, oneof: &[TokenKind]) -> bool {
+        for token in oneof {
+            if self.check_token(token) {
+                self.advance();
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    fn advance(&mut self) -> &Token {
+        if !self.is_at_end() {
+            self.current += 1;
+        }
+
+        return self.previous();
+    }
+
+    fn is_at_end(&self) -> bool {
+        self.check_token(&TokenKind::Eof)
+    }
+
+    fn check_token(&self, token: &TokenKind) -> bool {
+        self.peek().kind == *token
+    }
+
+    fn peek(&self) -> &Token {
+        &self.tokens[self.current]
+    }
+
+    fn previous(&self) -> &Token {
+        &self.tokens[self.current - 1]
+    }
+}
diff --git a/users/tazjin/rlox/src/scanner.rs b/users/tazjin/rlox/src/scanner.rs
index 2eed89bd6b..eeb247d9bd 100644
--- a/users/tazjin/rlox/src/scanner.rs
+++ b/users/tazjin/rlox/src/scanner.rs
@@ -1,6 +1,6 @@
 use crate::errors::{Error, ErrorKind};
 
-#[derive(Debug)]
+#[derive(Debug, PartialEq)]
 pub enum TokenKind {
     // Single-character tokens.
     LeftParen,
@@ -54,10 +54,9 @@ pub enum TokenKind {
 
 #[derive(Debug)]
 pub struct Token<'a> {
-    kind: TokenKind,
-    lexeme: &'a [char],
-    // literal: Object, // TODO(tazjin): Uhh?
-    line: usize,
+    pub kind: TokenKind,
+    pub lexeme: &'a [char],
+    pub line: usize,
 }
 
 struct Scanner<'a> {