From 5fcff11eae107b76177a581dc604ac0408213b02 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Sun, 6 Dec 2020 14:21:06 +0100 Subject: feat(tazjin/rlox): Implement parsing up to unary expressions ... with the exception of parenthesised expressions, because error threading is not implemented yet. Change-Id: I8d455d85e647548d5b71cbfd3d078f4970dab7fb Reviewed-on: https://cl.tvl.fyi/c/depot/+/2232 Reviewed-by: tazjin Tested-by: BuildkiteCI --- users/tazjin/rlox/src/parser.rs | 114 +++++++++++++++++++++++++++++++++++---- users/tazjin/rlox/src/scanner.rs | 10 ++-- 2 files changed, 109 insertions(+), 15 deletions(-) diff --git a/users/tazjin/rlox/src/parser.rs b/users/tazjin/rlox/src/parser.rs index faac880098..bc253971eb 100644 --- a/users/tazjin/rlox/src/parser.rs +++ b/users/tazjin/rlox/src/parser.rs @@ -1,6 +1,6 @@ // This implements the grammar of Lox as described starting in the // Crafting Interpreters chapter "Representing Code". Note that the -// upstream Java implementation works about Java being bad at value +// upstream Java implementation works around Java being bad at value // classes by writing a code generator for Java. // // My Rust implementation skips this step because it's unnecessary, we @@ -12,21 +12,33 @@ use crate::scanner::{Token, TokenKind}; #[derive(Debug)] struct Binary<'a> { left: Box>, - right: Box>, operator: Token<'a>, + right: Box>, } #[derive(Debug)] struct Grouping<'a>(Box>); #[derive(Debug)] -struct Literal(TokenKind); +enum Literal { + Boolean(bool), + Number(f64), + String(String), + Nil, +} + +#[derive(Debug)] +struct Unary<'a> { + operator: Token<'a>, + right: Box>, +} #[derive(Debug)] enum Expr<'a> { Binary(Binary<'a>), Grouping(Grouping<'a>), Literal(Literal), + Unary(Unary<'a>), } // Parser @@ -56,15 +68,76 @@ impl<'a> Parser<'a> { } fn equality(&mut self) -> Expr<'a> { - let expr = self.comparison(); - unimplemented!() + self.binary_operator( + &[TokenKind::BangEqual, TokenKind::EqualEqual], + Self::comparison, + Self::comparison, + ) } fn comparison(&mut self) -> Expr<'a> { - unimplemented!() + self.binary_operator( + &[ + TokenKind::Greater, + TokenKind::GreaterEqual, + TokenKind::Less, + TokenKind::LessEqual, + ], + Self::term, + Self::term, + ) + } + + fn term(&mut self) -> Expr<'a> { + self.binary_operator( + &[TokenKind::Minus, TokenKind::Plus], + Self::factor, + Self::factor, + ) + } + + fn factor(&mut self) -> Expr<'a> { + self.binary_operator( + &[TokenKind::Slash, TokenKind::Star], + Self::unary, + Self::unary, + ) + } + + fn unary(&mut self) -> Expr<'a> { + if self.match_token(&[TokenKind::Bang, TokenKind::Minus]) { + return Expr::Unary(Unary { + operator: self.previous(), + right: Box::new(self.unary()), + }); + } + + return self.primary(); + } + + fn primary(&mut self) -> Expr<'a> { + let next = self.advance(); + let literal = match next.kind { + TokenKind::True => Literal::Boolean(true), + TokenKind::False => Literal::Boolean(false), + TokenKind::Nil => Literal::Nil, + TokenKind::Number(num) => Literal::Number(num), + TokenKind::String(string) => Literal::String(string), + + TokenKind::LeftParen => { + unimplemented!("need error handling to deal with unbalanced parens"); + } + + // This branch indicates a parser bug, not invalid input. + unexpected => panic!("Parser encountered unexpected token '{:?}'", unexpected), + }; + + Expr::Literal(literal) } // internal helpers + + /// Check if the next token is in `oneof`, and advance if it is. fn match_token(&mut self, oneof: &[TokenKind]) -> bool { for token in oneof { if self.check_token(token) { @@ -76,7 +149,8 @@ impl<'a> Parser<'a> { return false; } - fn advance(&mut self) -> &Token { + /// Return the next token and advance parser state. + fn advance(&mut self) -> Token<'a> { if !self.is_at_end() { self.current += 1; } @@ -88,15 +162,35 @@ impl<'a> Parser<'a> { self.check_token(&TokenKind::Eof) } + /// Is the next token `token`? fn check_token(&self, token: &TokenKind) -> bool { self.peek().kind == *token } - fn peek(&self) -> &Token { + fn peek(&self) -> &Token<'a> { &self.tokens[self.current] } - fn previous(&self) -> &Token { - &self.tokens[self.current - 1] + fn previous(&self) -> Token<'a> { + self.tokens[self.current - 1].clone() + } + + fn binary_operator( + &mut self, + oneof: &[TokenKind], + left: fn(&mut Parser<'a>) -> Expr<'a>, + right: fn(&mut Parser<'a>) -> Expr<'a>, + ) -> Expr<'a> { + let mut expr = left(self); + + while self.match_token(oneof) { + expr = Expr::Binary(Binary { + left: Box::new(expr), + operator: self.previous(), + right: Box::new(right(self)), + }) + } + + return expr; } } diff --git a/users/tazjin/rlox/src/scanner.rs b/users/tazjin/rlox/src/scanner.rs index eeb247d9bd..fde4397531 100644 --- a/users/tazjin/rlox/src/scanner.rs +++ b/users/tazjin/rlox/src/scanner.rs @@ -1,6 +1,6 @@ use crate::errors::{Error, ErrorKind}; -#[derive(Debug, PartialEq)] +#[derive(Clone, Debug, PartialEq)] pub enum TokenKind { // Single-character tokens. LeftParen, @@ -29,22 +29,22 @@ pub enum TokenKind { Identifier(String), String(String), Number(f64), + True, + False, + Nil, // Keywords. And, Class, Else, - False, Fun, For, If, - Nil, Or, Print, Return, Super, This, - True, Var, While, @@ -52,7 +52,7 @@ pub enum TokenKind { Eof, } -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct Token<'a> { pub kind: TokenKind, pub lexeme: &'a [char], -- cgit 1.4.1