From 349583d5a96dc182eda1776a94fcb4b80f4da873 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Sat, 28 Nov 2020 19:53:51 +0100 Subject: feat(tazjin/rlox): Bootstrap recursive-descent parser for Lox ... mostly some AST boilerplate and a first top-level rule, plus boilerplate similar to that set up in the Scanner. Change-Id: I605d1de23c47a3b3702ab4f62cd3371bc3988c7d Reviewed-on: https://cl.tvl.fyi/c/depot/+/2194 Reviewed-by: tazjin Tested-by: BuildkiteCI --- users/tazjin/rlox/src/main.rs | 1 + users/tazjin/rlox/src/parser.rs | 102 +++++++++++++++++++++++++++++++++++++++ users/tazjin/rlox/src/scanner.rs | 9 ++-- 3 files changed, 107 insertions(+), 5 deletions(-) create mode 100644 users/tazjin/rlox/src/parser.rs diff --git a/users/tazjin/rlox/src/main.rs b/users/tazjin/rlox/src/main.rs index 2e0d692dba..5caac3f1ae 100644 --- a/users/tazjin/rlox/src/main.rs +++ b/users/tazjin/rlox/src/main.rs @@ -6,6 +6,7 @@ use std::process; mod errors; mod interpreter; +mod parser; mod scanner; fn main() { diff --git a/users/tazjin/rlox/src/parser.rs b/users/tazjin/rlox/src/parser.rs new file mode 100644 index 0000000000..faac880098 --- /dev/null +++ b/users/tazjin/rlox/src/parser.rs @@ -0,0 +1,102 @@ +// This implements the grammar of Lox as described starting in the +// Crafting Interpreters chapter "Representing Code". Note that the +// upstream Java implementation works about Java being bad at value +// classes by writing a code generator for Java. +// +// My Rust implementation skips this step because it's unnecessary, we +// have real types. +use crate::scanner::{Token, TokenKind}; + +// AST + +#[derive(Debug)] +struct Binary<'a> { + left: Box>, + right: Box>, + operator: Token<'a>, +} + +#[derive(Debug)] +struct Grouping<'a>(Box>); + +#[derive(Debug)] +struct Literal(TokenKind); + +#[derive(Debug)] +enum Expr<'a> { + Binary(Binary<'a>), + Grouping(Grouping<'a>), + Literal(Literal), +} + +// Parser + +/* +expression → equality ; +equality → comparison ( ( "!=" | "==" ) comparison )* ; +comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ; +term → factor ( ( "-" | "+" ) factor )* ; +factor → unary ( ( "/" | "*" ) unary )* ; +unary → ( "!" | "-" ) unary + | primary ; +primary → NUMBER | STRING | "true" | "false" | "nil" + | "(" expression ")" ; +*/ + +struct Parser<'a> { + tokens: Vec>, + current: usize, +} + +impl<'a> Parser<'a> { + // recursive-descent parser functions + + fn expression(&mut self) -> Expr<'a> { + self.equality() + } + + fn equality(&mut self) -> Expr<'a> { + let expr = self.comparison(); + unimplemented!() + } + + fn comparison(&mut self) -> Expr<'a> { + unimplemented!() + } + + // internal helpers + fn match_token(&mut self, oneof: &[TokenKind]) -> bool { + for token in oneof { + if self.check_token(token) { + self.advance(); + return true; + } + } + + return false; + } + + fn advance(&mut self) -> &Token { + if !self.is_at_end() { + self.current += 1; + } + + return self.previous(); + } + + fn is_at_end(&self) -> bool { + self.check_token(&TokenKind::Eof) + } + + fn check_token(&self, token: &TokenKind) -> bool { + self.peek().kind == *token + } + + fn peek(&self) -> &Token { + &self.tokens[self.current] + } + + fn previous(&self) -> &Token { + &self.tokens[self.current - 1] + } +} diff --git a/users/tazjin/rlox/src/scanner.rs b/users/tazjin/rlox/src/scanner.rs index 2eed89bd6b..eeb247d9bd 100644 --- a/users/tazjin/rlox/src/scanner.rs +++ b/users/tazjin/rlox/src/scanner.rs @@ -1,6 +1,6 @@ use crate::errors::{Error, ErrorKind}; -#[derive(Debug)] +#[derive(Debug, PartialEq)] pub enum TokenKind { // Single-character tokens. LeftParen, @@ -54,10 +54,9 @@ pub enum TokenKind { #[derive(Debug)] pub struct Token<'a> { - kind: TokenKind, - lexeme: &'a [char], - // literal: Object, // TODO(tazjin): Uhh? - line: usize, + pub kind: TokenKind, + pub lexeme: &'a [char], + pub line: usize, } struct Scanner<'a> { -- cgit 1.4.1