diff options
author | Vincent Ambo <mail@tazj.in> | 2020-11-28T18·53+0100 |
---|---|---|
committer | tazjin <mail@tazj.in> | 2020-11-30T16·42+0000 |
commit | 349583d5a96dc182eda1776a94fcb4b80f4da873 (patch) | |
tree | 3414de3fb83ec3455f6f396df377a00edc2652dd | |
parent | 754edb46160609c2a6387d20a136ec6766e24139 (diff) |
feat(tazjin/rlox): Bootstrap recursive-descent parser for Lox r/1966
... mostly some AST boilerplate and a first top-level rule, plus boilerplate similar to that set up in the Scanner. Change-Id: I605d1de23c47a3b3702ab4f62cd3371bc3988c7d Reviewed-on: https://cl.tvl.fyi/c/depot/+/2194 Reviewed-by: tazjin <mail@tazj.in> Tested-by: BuildkiteCI
-rw-r--r-- | users/tazjin/rlox/src/main.rs | 1 | ||||
-rw-r--r-- | users/tazjin/rlox/src/parser.rs | 102 | ||||
-rw-r--r-- | users/tazjin/rlox/src/scanner.rs | 9 |
3 files changed, 107 insertions, 5 deletions
diff --git a/users/tazjin/rlox/src/main.rs b/users/tazjin/rlox/src/main.rs index 2e0d692dba08..5caac3f1ae03 100644 --- a/users/tazjin/rlox/src/main.rs +++ b/users/tazjin/rlox/src/main.rs @@ -6,6 +6,7 @@ use std::process; mod errors; mod interpreter; +mod parser; mod scanner; fn main() { diff --git a/users/tazjin/rlox/src/parser.rs b/users/tazjin/rlox/src/parser.rs new file mode 100644 index 000000000000..faac88009828 --- /dev/null +++ b/users/tazjin/rlox/src/parser.rs @@ -0,0 +1,102 @@ +// This implements the grammar of Lox as described starting in the +// Crafting Interpreters chapter "Representing Code". Note that the +// upstream Java implementation works about Java being bad at value +// classes by writing a code generator for Java. +// +// My Rust implementation skips this step because it's unnecessary, we +// have real types. +use crate::scanner::{Token, TokenKind}; + +// AST + +#[derive(Debug)] +struct Binary<'a> { + left: Box<Expr<'a>>, + right: Box<Expr<'a>>, + operator: Token<'a>, +} + +#[derive(Debug)] +struct Grouping<'a>(Box<Expr<'a>>); + +#[derive(Debug)] +struct Literal(TokenKind); + +#[derive(Debug)] +enum Expr<'a> { + Binary(Binary<'a>), + Grouping(Grouping<'a>), + Literal(Literal), +} + +// Parser + +/* +expression → equality ; +equality → comparison ( ( "!=" | "==" ) comparison )* ; +comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ; +term → factor ( ( "-" | "+" ) factor )* ; +factor → unary ( ( "/" | "*" ) unary )* ; +unary → ( "!" | "-" ) unary + | primary ; +primary → NUMBER | STRING | "true" | "false" | "nil" + | "(" expression ")" ; +*/ + +struct Parser<'a> { + tokens: Vec<Token<'a>>, + current: usize, +} + +impl<'a> Parser<'a> { + // recursive-descent parser functions + + fn expression(&mut self) -> Expr<'a> { + self.equality() + } + + fn equality(&mut self) -> Expr<'a> { + let expr = self.comparison(); + unimplemented!() + } + + fn comparison(&mut self) -> Expr<'a> { + unimplemented!() + } + + // internal helpers + fn match_token(&mut self, oneof: &[TokenKind]) -> bool { + for token in oneof { + if self.check_token(token) { + self.advance(); + return true; + } + } + + return false; + } + + fn advance(&mut self) -> &Token { + if !self.is_at_end() { + self.current += 1; + } + + return self.previous(); + } + + fn is_at_end(&self) -> bool { + self.check_token(&TokenKind::Eof) + } + + fn check_token(&self, token: &TokenKind) -> bool { + self.peek().kind == *token + } + + fn peek(&self) -> &Token { + &self.tokens[self.current] + } + + fn previous(&self) -> &Token { + &self.tokens[self.current - 1] + } +} diff --git a/users/tazjin/rlox/src/scanner.rs b/users/tazjin/rlox/src/scanner.rs index 2eed89bd6beb..eeb247d9bd53 100644 --- a/users/tazjin/rlox/src/scanner.rs +++ b/users/tazjin/rlox/src/scanner.rs @@ -1,6 +1,6 @@ use crate::errors::{Error, ErrorKind}; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum TokenKind { // Single-character tokens. LeftParen, @@ -54,10 +54,9 @@ pub enum TokenKind { #[derive(Debug)] pub struct Token<'a> { - kind: TokenKind, - lexeme: &'a [char], - // literal: Object, // TODO(tazjin): Uhh? - line: usize, + pub kind: TokenKind, + pub lexeme: &'a [char], + pub line: usize, } struct Scanner<'a> { |