From b1d0e22b1f5fe907ba3d48931e5a38b9a75b0dcf Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Sun, 17 Jan 2021 21:13:57 +0300 Subject: chore(tazjin/rlox): Move other modules under treewalk:: It's unclear if the second part of the book can reuse anything from the first part (I'm guessing probably the scanner, but I'll move that back if it turns out to be the case). Change-Id: I9411355929e31ac6e953599e51665406b1f48d55 Reviewed-on: https://cl.tvl.fyi/c/depot/+/2415 Reviewed-by: tazjin Tested-by: BuildkiteCI --- users/tazjin/rlox/src/errors.rs | 38 -- users/tazjin/rlox/src/main.rs | 9 - users/tazjin/rlox/src/parser.rs | 702 --------------------- users/tazjin/rlox/src/scanner.rs | 283 --------- users/tazjin/rlox/src/treewalk/errors.rs | 38 ++ users/tazjin/rlox/src/treewalk/interpreter.rs | 8 +- .../rlox/src/treewalk/interpreter/builtins.rs | 4 +- users/tazjin/rlox/src/treewalk/mod.rs | 11 +- users/tazjin/rlox/src/treewalk/parser.rs | 702 +++++++++++++++++++++ users/tazjin/rlox/src/treewalk/resolver.rs | 6 +- users/tazjin/rlox/src/treewalk/scanner.rs | 283 +++++++++ 11 files changed, 1042 insertions(+), 1042 deletions(-) delete mode 100644 users/tazjin/rlox/src/errors.rs delete mode 100644 users/tazjin/rlox/src/parser.rs delete mode 100644 users/tazjin/rlox/src/scanner.rs create mode 100644 users/tazjin/rlox/src/treewalk/errors.rs create mode 100644 users/tazjin/rlox/src/treewalk/parser.rs create mode 100644 users/tazjin/rlox/src/treewalk/scanner.rs (limited to 'users/tazjin') diff --git a/users/tazjin/rlox/src/errors.rs b/users/tazjin/rlox/src/errors.rs deleted file mode 100644 index 3d5c28f9f3bb..000000000000 --- a/users/tazjin/rlox/src/errors.rs +++ /dev/null @@ -1,38 +0,0 @@ -use crate::treewalk::interpreter::Value; - -#[derive(Debug)] -pub enum ErrorKind { - UnexpectedChar(char), - UnterminatedString, - UnmatchedParens, - ExpectedExpression(String), - ExpectedSemicolon, - ExpectedClosingBrace, - ExpectedToken(&'static str), - TypeError(String), - UndefinedVariable(String), - InternalError(String), - InvalidAssignmentTarget(String), - RuntimeError(String), - StaticError(String), - - // This variant is not an error, rather it is used for - // short-circuiting out of a function body that hits a `return` - // statement. - // - // It's implemented this way because in the original book the - // author uses exceptions for control flow, and this is the - // closest equivalent that I had available without diverging too - // much. - FunctionReturn(Value), -} - -#[derive(Debug)] -pub struct Error { - pub line: usize, - pub kind: ErrorKind, -} - -pub fn report(err: &Error) { - eprintln!("[line {}] Error: {:?}", err.line, err.kind); -} diff --git a/users/tazjin/rlox/src/main.rs b/users/tazjin/rlox/src/main.rs index 13a5748187ad..e3bc2783150c 100644 --- a/users/tazjin/rlox/src/main.rs +++ b/users/tazjin/rlox/src/main.rs @@ -5,9 +5,6 @@ use std::io::Write; use std::process; mod bytecode; -mod errors; -mod parser; -mod scanner; mod treewalk; fn main() { @@ -16,9 +13,3 @@ fn main() { _ => bytecode::main(), } } - -fn report_errors(errors: Vec) { - for error in errors { - errors::report(&error); - } -} diff --git a/users/tazjin/rlox/src/parser.rs b/users/tazjin/rlox/src/parser.rs deleted file mode 100644 index e28404aa9fce..000000000000 --- a/users/tazjin/rlox/src/parser.rs +++ /dev/null @@ -1,702 +0,0 @@ -// This implements the grammar of Lox as described starting in the -// Crafting Interpreters chapter "Representing Code". Note that the -// upstream Java implementation works around Java being bad at value -// classes by writing a code generator for Java. -// -// My Rust implementation skips this step because it's unnecessary, we -// have real types. -use crate::errors::{Error, ErrorKind}; -use crate::scanner::{Token, TokenKind}; -use std::rc::Rc; - -// AST - -#[derive(Debug)] -pub struct Assign { - pub name: Token, - pub value: Box, - pub depth: Option, -} - -#[derive(Debug)] -pub struct Binary { - pub left: Box, - pub operator: Token, - pub right: Box, -} - -#[derive(Debug)] -pub struct Logical { - pub left: Box, - pub operator: Token, - pub right: Box, -} - -#[derive(Debug)] -pub struct Grouping(pub Box); - -#[derive(Debug, Clone, PartialEq)] -pub enum Literal { - Boolean(bool), - Number(f64), - String(String), - Nil, -} - -#[derive(Debug)] -pub struct Unary { - pub operator: Token, - pub right: Box, -} - -#[derive(Debug)] -pub struct Call { - pub callee: Box, - pub paren: Token, - pub args: Vec, -} - -// Not to be confused with `Var`, which is for assignment. -#[derive(Debug)] -pub struct Variable { - pub name: Token, - pub depth: Option, -} - -#[derive(Debug)] -pub enum Expr { - Assign(Assign), - Binary(Binary), - Grouping(Grouping), - Literal(Literal), - Unary(Unary), - Call(Call), - Variable(Variable), - Logical(Logical), -} - -// Variable assignment. Not to be confused with `Variable`, which is -// for access. -#[derive(Debug)] -pub struct Var { - pub name: Token, - pub initialiser: Option, -} - -#[derive(Debug)] -pub struct Return { - pub value: Expr, -} - -#[derive(Debug)] -pub struct If { - pub condition: Expr, - pub then_branch: Box, - pub else_branch: Option>, -} - -#[derive(Debug)] -pub struct While { - pub condition: Expr, - pub body: Box, -} - -pub type Block = Vec; - -#[derive(Debug)] -pub struct Function { - pub name: Token, - pub params: Vec, - pub body: Block, -} - -#[derive(Debug)] -pub enum Statement { - Expr(Expr), - Print(Expr), - Var(Var), - Block(Block), - If(If), - While(While), - Function(Rc), - Return(Return), -} - -// Parser - -/* -program → declaration* EOF ; - -declaration → funDecl - | varDecl - | statement ; - -funDecl → "fun" function ; -function → IDENTIFIER "(" parameters? ")" block ; -parameters → IDENTIFIER ( "," IDENTIFIER )* ; - - -statement → exprStmt - | forStmt - | ifStmt - | printStmt - | returnStmt - | whileStmt - | block ; - -forStmt → "for" "(" ( varDecl | exprStmt | ";" ) - expression? ";" - expression? ")" statement ; - -returnStmt → "return" expression? ";" ; - -whileStmt → "while" "(" expression ")" statement ; - -exprStmt → expression ";" ; - -ifStmt → "if" "(" expression ")" statement - ( "else" statement )? ; - -printStmt → "print" expression ";" ; - -expression → assignment ; -assignment → IDENTIFIER "=" assignment - | logic_or ; -logic_or → logic_and ( "or" logic_and )* ; -logic_and → equality ( "and" equality )* ; -equality → comparison ( ( "!=" | "==" ) comparison )* ; -comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ; -term → factor ( ( "-" | "+" ) factor )* ; -factor → unary ( ( "/" | "*" ) unary )* ; -unary → ( "!" | "-" ) unary | call ; -call → primary ( "(" arguments? ")" )* ; -arguments → expression ( "," expression )* ; -primary → NUMBER | STRING | "true" | "false" | "nil" - | "(" expression ")" ; -*/ - -struct Parser { - tokens: Vec, - current: usize, -} - -type ExprResult = Result; -type StmtResult = Result; - -impl Parser { - // recursive-descent parser functions - - fn declaration(&mut self) -> StmtResult { - if self.match_token(&TokenKind::Fun) { - return self.function(); - } - - if self.match_token(&TokenKind::Var) { - return self.var_declaration(); - } - - self.statement() - } - - fn function(&mut self) -> StmtResult { - let name = self.identifier("Expected function name.")?; - - self.consume( - &TokenKind::LeftParen, - ErrorKind::ExpectedToken("Expect '(' after function name."), - )?; - - let mut params = vec![]; - - if !self.check_token(&TokenKind::RightParen) { - loop { - if params.len() >= 255 { - return Err(Error { - line: self.peek().line, - kind: ErrorKind::InternalError("255 parameter limit exceeded.".into()), - }); - } - - params.push(self.identifier("Expected parameter name.")?); - - if !self.match_token(&TokenKind::Comma) { - break; - } - } - } - - self.consume( - &TokenKind::RightParen, - ErrorKind::ExpectedToken("Expect ')' after parameters."), - )?; - - self.consume( - &TokenKind::LeftBrace, - ErrorKind::ExpectedToken("Expect '{' before function body."), - )?; - - Ok(Statement::Function(Rc::new(Function { - name, - params, - body: self.block_statement()?, - }))) - } - - fn var_declaration(&mut self) -> StmtResult { - // Since `TokenKind::Identifier` carries data, we can't use - // `consume`. - let mut var = Var { - name: self.identifier("Expected variable name.")?, - initialiser: None, - }; - - if self.match_token(&TokenKind::Equal) { - var.initialiser = Some(self.expression()?); - } - - self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; - Ok(Statement::Var(var)) - } - - fn statement(&mut self) -> StmtResult { - if self.match_token(&TokenKind::Print) { - self.print_statement() - } else if self.match_token(&TokenKind::LeftBrace) { - Ok(Statement::Block(self.block_statement()?)) - } else if self.match_token(&TokenKind::If) { - self.if_statement() - } else if self.match_token(&TokenKind::While) { - self.while_statement() - } else if self.match_token(&TokenKind::For) { - self.for_statement() - } else if self.match_token(&TokenKind::Return) { - self.return_statement() - } else { - self.expr_statement() - } - } - - fn print_statement(&mut self) -> StmtResult { - let expr = self.expression()?; - self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; - Ok(Statement::Print(expr)) - } - - fn block_statement(&mut self) -> Result { - let mut block: Block = vec![]; - - while !self.check_token(&TokenKind::RightBrace) && !self.is_at_end() { - block.push(self.declaration()?); - } - - self.consume(&TokenKind::RightBrace, ErrorKind::ExpectedClosingBrace)?; - - Ok(block) - } - - fn if_statement(&mut self) -> StmtResult { - self.consume( - &TokenKind::LeftParen, - ErrorKind::ExpectedToken("Expected '(' after 'if'"), - )?; - let condition = self.expression()?; - self.consume( - &TokenKind::RightParen, - ErrorKind::ExpectedToken("Expected ')' after condition"), - )?; - - let then_branch = Box::new(self.statement()?); - - let mut stmt = If { - condition, - then_branch, - else_branch: Option::None, - }; - - if self.match_token(&TokenKind::Else) { - stmt.else_branch = Some(Box::new(self.statement()?)); - } - - Ok(Statement::If(stmt)) - } - - fn while_statement(&mut self) -> StmtResult { - self.consume( - &TokenKind::LeftParen, - ErrorKind::ExpectedToken("Expected '(' after 'while'"), - )?; - - let condition = self.expression()?; - - self.consume( - &TokenKind::RightParen, - ErrorKind::ExpectedToken("Expected ')' after 'while'"), - )?; - - Ok(Statement::While(While { - condition, - body: Box::new(self.statement()?), - })) - } - - fn for_statement(&mut self) -> StmtResult { - // Parsing of clauses ... - self.consume( - &TokenKind::LeftParen, - ErrorKind::ExpectedToken("Expected '(' after 'for'"), - )?; - - let initialiser = if self.match_token(&TokenKind::Semicolon) { - None - } else if self.match_token(&TokenKind::Var) { - Some(self.var_declaration()?) - } else { - Some(self.expr_statement()?) - }; - - let condition = if self.check_token(&TokenKind::Semicolon) { - // unspecified condition => infinite loop - Expr::Literal(Literal::Boolean(true)) - } else { - self.expression()? - }; - - self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; - - let increment = if self.check_token(&TokenKind::RightParen) { - None - } else { - Some(self.expression()?) - }; - - self.consume( - &TokenKind::RightParen, - ErrorKind::ExpectedToken("Expected ')' after for clauses"), - )?; - - let mut body = self.statement()?; - - // ... desugaring to while - - if let Some(inc) = increment { - body = Statement::Block(vec![body, Statement::Expr(inc)]); - } - - body = Statement::While(While { - condition, - body: Box::new(body), - }); - - if let Some(init) = initialiser { - body = Statement::Block(vec![init, body]); - } - - Ok(body) - } - - fn return_statement(&mut self) -> StmtResult { - let value = self.expression()?; - self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; - Ok(Statement::Return(Return { value })) - } - - fn expr_statement(&mut self) -> StmtResult { - let expr = self.expression()?; - self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; - Ok(Statement::Expr(expr)) - } - - fn expression(&mut self) -> ExprResult { - self.assignment() - } - - fn assignment(&mut self) -> ExprResult { - let expr = self.logic_or()?; - - if self.match_token(&TokenKind::Equal) { - let equals = self.previous().clone(); - let value = self.assignment()?; - - if let Expr::Variable(Variable { name, .. }) = expr { - return Ok(Expr::Assign(Assign { - name, - value: Box::new(value), - depth: None, - })); - } - - return Err(Error { - line: equals.line, - kind: ErrorKind::InvalidAssignmentTarget(format!("{:?}", equals)), - }); - } - - Ok(expr) - } - - fn logic_or(&mut self) -> ExprResult { - let mut expr = self.logic_and()?; - - while self.match_token(&TokenKind::Or) { - expr = Expr::Logical(Logical { - left: Box::new(expr), - operator: self.previous().clone(), - right: Box::new(self.logic_and()?), - }) - } - - Ok(expr) - } - - fn logic_and(&mut self) -> ExprResult { - let mut expr = self.equality()?; - - while self.match_token(&TokenKind::And) { - expr = Expr::Logical(Logical { - left: Box::new(expr), - operator: self.previous().clone(), - right: Box::new(self.equality()?), - }) - } - - Ok(expr) - } - - fn equality(&mut self) -> ExprResult { - self.binary_operator( - &[TokenKind::BangEqual, TokenKind::EqualEqual], - Self::comparison, - ) - } - - fn comparison(&mut self) -> ExprResult { - self.binary_operator( - &[ - TokenKind::Greater, - TokenKind::GreaterEqual, - TokenKind::Less, - TokenKind::LessEqual, - ], - Self::term, - ) - } - - fn term(&mut self) -> ExprResult { - self.binary_operator(&[TokenKind::Minus, TokenKind::Plus], Self::factor) - } - - fn factor(&mut self) -> ExprResult { - self.binary_operator(&[TokenKind::Slash, TokenKind::Star], Self::unary) - } - - fn unary(&mut self) -> ExprResult { - if self.match_token(&TokenKind::Bang) || self.match_token(&TokenKind::Minus) { - return Ok(Expr::Unary(Unary { - operator: self.previous().clone(), - right: Box::new(self.unary()?), - })); - } - - return self.call(); - } - - fn call(&mut self) -> ExprResult { - let mut expr = self.primary()?; - - loop { - if self.match_token(&TokenKind::LeftParen) { - expr = self.finish_call(expr)?; - } else { - break; - } - } - - Ok(expr) - } - - fn finish_call(&mut self, callee: Expr) -> ExprResult { - let mut args = vec![]; - - if !self.check_token(&TokenKind::RightParen) { - loop { - // TODO(tazjin): Check for max args count - args.push(self.expression()?); - if !self.match_token(&TokenKind::Comma) { - break; - } - } - } - - let paren = self.consume( - &TokenKind::RightParen, - ErrorKind::ExpectedToken("Expect ')' after arguments."), - )?; - - Ok(Expr::Call(Call { - args, - callee: Box::new(callee), - paren, - })) - } - - fn primary(&mut self) -> ExprResult { - let next = self.advance(); - let literal = match next.kind { - TokenKind::True => Literal::Boolean(true), - TokenKind::False => Literal::Boolean(false), - TokenKind::Nil => Literal::Nil, - TokenKind::Number(num) => Literal::Number(num), - TokenKind::String(string) => Literal::String(string), - - TokenKind::LeftParen => { - let expr = self.expression()?; - self.consume(&TokenKind::RightParen, ErrorKind::UnmatchedParens)?; - return Ok(Expr::Grouping(Grouping(Box::new(expr)))); - } - - TokenKind::Identifier(_) => { - return Ok(Expr::Variable(Variable { - name: next, - depth: None, - })) - } - - unexpected => { - eprintln!("encountered {:?}", unexpected); - return Err(Error { - line: next.line, - kind: ErrorKind::ExpectedExpression(next.lexeme), - }); - } - }; - - Ok(Expr::Literal(literal)) - } - - // internal helpers - - fn identifier(&mut self, err: &'static str) -> Result { - if let TokenKind::Identifier(_) = self.peek().kind { - Ok(self.advance()) - } else { - Err(Error { - line: self.peek().line, - kind: ErrorKind::ExpectedToken(err), - }) - } - } - - /// Check if the next token is in `oneof`, and advance if it is. - fn match_token(&mut self, token: &TokenKind) -> bool { - if self.check_token(token) { - self.advance(); - return true; - } - - false - } - - /// Return the next token and advance parser state. - fn advance(&mut self) -> Token { - if !self.is_at_end() { - self.current += 1; - } - - return self.previous().clone(); - } - - fn is_at_end(&self) -> bool { - self.check_token(&TokenKind::Eof) - } - - /// Is the next token `token`? - fn check_token(&self, token: &TokenKind) -> bool { - self.peek().kind == *token - } - - fn peek(&self) -> &Token { - &self.tokens[self.current] - } - - fn previous(&self) -> &Token { - &self.tokens[self.current - 1] - } - - fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result { - if self.check_token(kind) { - return Ok(self.advance()); - } - - Err(Error { - line: self.peek().line, - kind: err, - }) - } - - fn synchronise(&mut self) { - self.advance(); - - while !self.is_at_end() { - if self.previous().kind == TokenKind::Semicolon { - return; - } - - match self.peek().kind { - TokenKind::Class - | TokenKind::Fun - | TokenKind::Var - | TokenKind::For - | TokenKind::If - | TokenKind::While - | TokenKind::Print - | TokenKind::Return => return, - - _ => { - self.advance(); - } - } - } - } - - fn binary_operator( - &mut self, - oneof: &[TokenKind], - each: fn(&mut Parser) -> ExprResult, - ) -> ExprResult { - let mut expr = each(self)?; - - while oneof.iter().any(|t| self.match_token(t)) { - expr = Expr::Binary(Binary { - left: Box::new(expr), - operator: self.previous().clone(), - right: Box::new(each(self)?), - }) - } - - return Ok(expr); - } -} - -pub fn parse(tokens: Vec) -> Result> { - let mut parser = Parser { tokens, current: 0 }; - let mut program: Block = vec![]; - let mut errors: Vec = vec![]; - - while !parser.is_at_end() { - match parser.declaration() { - Err(err) => { - errors.push(err); - parser.synchronise(); - } - Ok(decl) => { - program.push(decl); - } - } - } - - if errors.is_empty() { - Ok(program) - } else { - Err(errors) - } -} diff --git a/users/tazjin/rlox/src/scanner.rs b/users/tazjin/rlox/src/scanner.rs deleted file mode 100644 index 36c2d6a966ae..000000000000 --- a/users/tazjin/rlox/src/scanner.rs +++ /dev/null @@ -1,283 +0,0 @@ -use crate::errors::{Error, ErrorKind}; - -#[derive(Clone, Debug, PartialEq)] -pub enum TokenKind { - // Single-character tokens. - LeftParen, - RightParen, - LeftBrace, - RightBrace, - Comma, - Dot, - Minus, - Plus, - Semicolon, - Slash, - Star, - - // One or two character tokens. - Bang, - BangEqual, - Equal, - EqualEqual, - Greater, - GreaterEqual, - Less, - LessEqual, - - // Literals. - Identifier(String), - String(String), - Number(f64), - True, - False, - Nil, - - // Keywords. - And, - Class, - Else, - Fun, - For, - If, - Or, - Print, - Return, - Super, - This, - Var, - While, - - // Special things - Eof, -} - -#[derive(Clone, Debug)] -pub struct Token { - pub kind: TokenKind, - pub lexeme: String, - pub line: usize, -} - -struct Scanner<'a> { - source: &'a [char], - tokens: Vec, - errors: Vec, - start: usize, // offset of first character in current lexeme - current: usize, // current offset into source - line: usize, // current line in source -} - -impl<'a> Scanner<'a> { - fn is_at_end(&self) -> bool { - return self.current >= self.source.len(); - } - - fn advance(&mut self) -> char { - self.current += 1; - self.source[self.current - 1] - } - - fn add_token(&mut self, kind: TokenKind) { - let lexeme = &self.source[self.start..self.current]; - self.tokens.push(Token { - kind, - lexeme: lexeme.into_iter().collect(), - line: self.line, - }) - } - - fn scan_token(&mut self) { - match self.advance() { - // simple single-character tokens - '(' => self.add_token(TokenKind::LeftParen), - ')' => self.add_token(TokenKind::RightParen), - '{' => self.add_token(TokenKind::LeftBrace), - '}' => self.add_token(TokenKind::RightBrace), - ',' => self.add_token(TokenKind::Comma), - '.' => self.add_token(TokenKind::Dot), - '-' => self.add_token(TokenKind::Minus), - '+' => self.add_token(TokenKind::Plus), - ';' => self.add_token(TokenKind::Semicolon), - '*' => self.add_token(TokenKind::Star), - - // possible multi-character tokens - '!' => self.add_if_next('=', TokenKind::BangEqual, TokenKind::Bang), - '=' => self.add_if_next('=', TokenKind::EqualEqual, TokenKind::Equal), - '<' => self.add_if_next('=', TokenKind::LessEqual, TokenKind::Less), - '>' => self.add_if_next('=', TokenKind::GreaterEqual, TokenKind::Greater), - - '/' => { - // support comments until EOL by discarding characters - if self.match_next('/') { - while self.peek() != '\n' && !self.is_at_end() { - self.advance(); - } - } else { - self.add_token(TokenKind::Slash); - } - } - - // ignore whitespace - ws if ws.is_whitespace() => { - if ws == '\n' { - self.line += 1 - } - } - - '"' => self.scan_string(), - - digit if digit.is_digit(10) => self.scan_number(), - - chr if chr.is_alphabetic() || chr == '_' => self.scan_identifier(), - - unexpected => self.errors.push(Error { - line: self.line, - kind: ErrorKind::UnexpectedChar(unexpected), - }), - }; - } - - fn match_next(&mut self, expected: char) -> bool { - if self.is_at_end() || self.source[self.current] != expected { - false - } else { - self.current += 1; - true - } - } - - fn add_if_next(&mut self, expected: char, then: TokenKind, or: TokenKind) { - if self.match_next(expected) { - self.add_token(then); - } else { - self.add_token(or); - } - } - - fn peek(&self) -> char { - if self.is_at_end() { - return '\0'; - } else { - return self.source[self.current]; - } - } - - fn peek_next(&self) -> char { - if self.current + 1 >= self.source.len() { - return '\0'; - } else { - return self.source[self.current + 1]; - } - } - - fn scan_string(&mut self) { - while self.peek() != '"' && !self.is_at_end() { - if self.peek() == '\n' { - self.line += 1; - } - - self.advance(); - } - - if self.is_at_end() { - self.errors.push(Error { - line: self.line, - kind: ErrorKind::UnterminatedString, - }); - return; - } - - // closing '"' - self.advance(); - - // add token without surrounding quotes - let string: String = self.source[(self.start + 1)..(self.current - 1)] - .iter() - .collect(); - self.add_token(TokenKind::String(string)); - } - - fn scan_number(&mut self) { - while self.peek().is_digit(10) { - self.advance(); - } - - // Look for a fractional part - if self.peek() == '.' && self.peek_next().is_digit(10) { - // consume '.' - self.advance(); - - while self.peek().is_digit(10) { - self.advance(); - } - } - - let num: f64 = self.source[self.start..self.current] - .iter() - .collect::() - .parse() - .expect("float parsing should always work"); - - self.add_token(TokenKind::Number(num)); - } - - fn scan_identifier(&mut self) { - while self.peek().is_alphanumeric() || self.peek() == '_' { - self.advance(); - } - - let ident: String = self.source[self.start..self.current].iter().collect(); - - // Determine whether this is an identifier, or a keyword: - let token_kind = match ident.as_str() { - "and" => TokenKind::And, - "class" => TokenKind::Class, - "else" => TokenKind::Else, - "false" => TokenKind::False, - "for" => TokenKind::For, - "fun" => TokenKind::Fun, - "if" => TokenKind::If, - "nil" => TokenKind::Nil, - "or" => TokenKind::Or, - "print" => TokenKind::Print, - "return" => TokenKind::Return, - "super" => TokenKind::Super, - "this" => TokenKind::This, - "true" => TokenKind::True, - "var" => TokenKind::Var, - "while" => TokenKind::While, - _ => TokenKind::Identifier(ident), - }; - - self.add_token(token_kind); - } - - fn scan_tokens(&mut self) { - while !self.is_at_end() { - self.start = self.current; - self.scan_token(); - } - - self.add_token(TokenKind::Eof); - } -} - -pub fn scan<'a>(input: &'a [char]) -> Result, Vec> { - let mut scanner = Scanner { - source: &input, - tokens: vec![], - errors: vec![], - start: 0, - current: 0, - line: 0, - }; - - scanner.scan_tokens(); - - if !scanner.errors.is_empty() { - return Err(scanner.errors); - } - - return Ok(scanner.tokens); -} diff --git a/users/tazjin/rlox/src/treewalk/errors.rs b/users/tazjin/rlox/src/treewalk/errors.rs new file mode 100644 index 000000000000..3d5c28f9f3bb --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/errors.rs @@ -0,0 +1,38 @@ +use crate::treewalk::interpreter::Value; + +#[derive(Debug)] +pub enum ErrorKind { + UnexpectedChar(char), + UnterminatedString, + UnmatchedParens, + ExpectedExpression(String), + ExpectedSemicolon, + ExpectedClosingBrace, + ExpectedToken(&'static str), + TypeError(String), + UndefinedVariable(String), + InternalError(String), + InvalidAssignmentTarget(String), + RuntimeError(String), + StaticError(String), + + // This variant is not an error, rather it is used for + // short-circuiting out of a function body that hits a `return` + // statement. + // + // It's implemented this way because in the original book the + // author uses exceptions for control flow, and this is the + // closest equivalent that I had available without diverging too + // much. + FunctionReturn(Value), +} + +#[derive(Debug)] +pub struct Error { + pub line: usize, + pub kind: ErrorKind, +} + +pub fn report(err: &Error) { + eprintln!("[line {}] Error: {:?}", err.line, err.kind); +} diff --git a/users/tazjin/rlox/src/treewalk/interpreter.rs b/users/tazjin/rlox/src/treewalk/interpreter.rs index 5b47dc0248f9..a096716d9155 100644 --- a/users/tazjin/rlox/src/treewalk/interpreter.rs +++ b/users/tazjin/rlox/src/treewalk/interpreter.rs @@ -1,7 +1,7 @@ -use crate::errors::{Error, ErrorKind}; -use crate::parser::{self, Block, Expr, Literal, Statement}; -use crate::scanner::{self, TokenKind}; -use crate::treewalk::resolver; +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::parser::{self, Block, Expr, Literal, Statement}; +use crate::treewalk::scanner::{self, TokenKind}; +use crate::treewalk::treewalk::resolver; use std::collections::HashMap; use std::rc::Rc; use std::sync::RwLock; diff --git a/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs b/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs index 0ec6ae08c354..709e53c7c0eb 100644 --- a/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs +++ b/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs @@ -1,8 +1,8 @@ use std::fmt; use std::time::{SystemTime, UNIX_EPOCH}; -use crate::errors::Error; -use crate::parser::Literal; +use crate::treewalk::errors::Error; +use crate::treewalk::parser::Literal; use crate::treewalk::interpreter::Value; pub trait Builtin: fmt::Debug { diff --git a/users/tazjin/rlox/src/treewalk/mod.rs b/users/tazjin/rlox/src/treewalk/mod.rs index 95136b2fa141..ae1049a12eff 100644 --- a/users/tazjin/rlox/src/treewalk/mod.rs +++ b/users/tazjin/rlox/src/treewalk/mod.rs @@ -1,7 +1,10 @@ use crate::*; -pub mod interpreter; +mod errors; +mod parser; mod resolver; +mod scanner; +pub mod interpreter; pub fn main() { let mut args = env::args(); @@ -50,3 +53,9 @@ fn run(lox: &mut treewalk::interpreter::Interpreter, code: &str) { report_errors(errors); } } + +fn report_errors(errors: Vec) { + for error in errors { + errors::report(&error); + } +} diff --git a/users/tazjin/rlox/src/treewalk/parser.rs b/users/tazjin/rlox/src/treewalk/parser.rs new file mode 100644 index 000000000000..8117c29726c9 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/parser.rs @@ -0,0 +1,702 @@ +// This implements the grammar of Lox as described starting in the +// Crafting Interpreters chapter "Representing Code". Note that the +// upstream Java implementation works around Java being bad at value +// classes by writing a code generator for Java. +// +// My Rust implementation skips this step because it's unnecessary, we +// have real types. +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::scanner::{Token, TokenKind}; +use std::rc::Rc; + +// AST + +#[derive(Debug)] +pub struct Assign { + pub name: Token, + pub value: Box, + pub depth: Option, +} + +#[derive(Debug)] +pub struct Binary { + pub left: Box, + pub operator: Token, + pub right: Box, +} + +#[derive(Debug)] +pub struct Logical { + pub left: Box, + pub operator: Token, + pub right: Box, +} + +#[derive(Debug)] +pub struct Grouping(pub Box); + +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + Boolean(bool), + Number(f64), + String(String), + Nil, +} + +#[derive(Debug)] +pub struct Unary { + pub operator: Token, + pub right: Box, +} + +#[derive(Debug)] +pub struct Call { + pub callee: Box, + pub paren: Token, + pub args: Vec, +} + +// Not to be confused with `Var`, which is for assignment. +#[derive(Debug)] +pub struct Variable { + pub name: Token, + pub depth: Option, +} + +#[derive(Debug)] +pub enum Expr { + Assign(Assign), + Binary(Binary), + Grouping(Grouping), + Literal(Literal), + Unary(Unary), + Call(Call), + Variable(Variable), + Logical(Logical), +} + +// Variable assignment. Not to be confused with `Variable`, which is +// for access. +#[derive(Debug)] +pub struct Var { + pub name: Token, + pub initialiser: Option, +} + +#[derive(Debug)] +pub struct Return { + pub value: Expr, +} + +#[derive(Debug)] +pub struct If { + pub condition: Expr, + pub then_branch: Box, + pub else_branch: Option>, +} + +#[derive(Debug)] +pub struct While { + pub condition: Expr, + pub body: Box, +} + +pub type Block = Vec; + +#[derive(Debug)] +pub struct Function { + pub name: Token, + pub params: Vec, + pub body: Block, +} + +#[derive(Debug)] +pub enum Statement { + Expr(Expr), + Print(Expr), + Var(Var), + Block(Block), + If(If), + While(While), + Function(Rc), + Return(Return), +} + +// Parser + +/* +program → declaration* EOF ; + +declaration → funDecl + | varDecl + | statement ; + +funDecl → "fun" function ; +function → IDENTIFIER "(" parameters? ")" block ; +parameters → IDENTIFIER ( "," IDENTIFIER )* ; + + +statement → exprStmt + | forStmt + | ifStmt + | printStmt + | returnStmt + | whileStmt + | block ; + +forStmt → "for" "(" ( varDecl | exprStmt | ";" ) + expression? ";" + expression? ")" statement ; + +returnStmt → "return" expression? ";" ; + +whileStmt → "while" "(" expression ")" statement ; + +exprStmt → expression ";" ; + +ifStmt → "if" "(" expression ")" statement + ( "else" statement )? ; + +printStmt → "print" expression ";" ; + +expression → assignment ; +assignment → IDENTIFIER "=" assignment + | logic_or ; +logic_or → logic_and ( "or" logic_and )* ; +logic_and → equality ( "and" equality )* ; +equality → comparison ( ( "!=" | "==" ) comparison )* ; +comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ; +term → factor ( ( "-" | "+" ) factor )* ; +factor → unary ( ( "/" | "*" ) unary )* ; +unary → ( "!" | "-" ) unary | call ; +call → primary ( "(" arguments? ")" )* ; +arguments → expression ( "," expression )* ; +primary → NUMBER | STRING | "true" | "false" | "nil" + | "(" expression ")" ; +*/ + +struct Parser { + tokens: Vec, + current: usize, +} + +type ExprResult = Result; +type StmtResult = Result; + +impl Parser { + // recursive-descent parser functions + + fn declaration(&mut self) -> StmtResult { + if self.match_token(&TokenKind::Fun) { + return self.function(); + } + + if self.match_token(&TokenKind::Var) { + return self.var_declaration(); + } + + self.statement() + } + + fn function(&mut self) -> StmtResult { + let name = self.identifier("Expected function name.")?; + + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expect '(' after function name."), + )?; + + let mut params = vec![]; + + if !self.check_token(&TokenKind::RightParen) { + loop { + if params.len() >= 255 { + return Err(Error { + line: self.peek().line, + kind: ErrorKind::InternalError("255 parameter limit exceeded.".into()), + }); + } + + params.push(self.identifier("Expected parameter name.")?); + + if !self.match_token(&TokenKind::Comma) { + break; + } + } + } + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expect ')' after parameters."), + )?; + + self.consume( + &TokenKind::LeftBrace, + ErrorKind::ExpectedToken("Expect '{' before function body."), + )?; + + Ok(Statement::Function(Rc::new(Function { + name, + params, + body: self.block_statement()?, + }))) + } + + fn var_declaration(&mut self) -> StmtResult { + // Since `TokenKind::Identifier` carries data, we can't use + // `consume`. + let mut var = Var { + name: self.identifier("Expected variable name.")?, + initialiser: None, + }; + + if self.match_token(&TokenKind::Equal) { + var.initialiser = Some(self.expression()?); + } + + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Var(var)) + } + + fn statement(&mut self) -> StmtResult { + if self.match_token(&TokenKind::Print) { + self.print_statement() + } else if self.match_token(&TokenKind::LeftBrace) { + Ok(Statement::Block(self.block_statement()?)) + } else if self.match_token(&TokenKind::If) { + self.if_statement() + } else if self.match_token(&TokenKind::While) { + self.while_statement() + } else if self.match_token(&TokenKind::For) { + self.for_statement() + } else if self.match_token(&TokenKind::Return) { + self.return_statement() + } else { + self.expr_statement() + } + } + + fn print_statement(&mut self) -> StmtResult { + let expr = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Print(expr)) + } + + fn block_statement(&mut self) -> Result { + let mut block: Block = vec![]; + + while !self.check_token(&TokenKind::RightBrace) && !self.is_at_end() { + block.push(self.declaration()?); + } + + self.consume(&TokenKind::RightBrace, ErrorKind::ExpectedClosingBrace)?; + + Ok(block) + } + + fn if_statement(&mut self) -> StmtResult { + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'if'"), + )?; + let condition = self.expression()?; + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after condition"), + )?; + + let then_branch = Box::new(self.statement()?); + + let mut stmt = If { + condition, + then_branch, + else_branch: Option::None, + }; + + if self.match_token(&TokenKind::Else) { + stmt.else_branch = Some(Box::new(self.statement()?)); + } + + Ok(Statement::If(stmt)) + } + + fn while_statement(&mut self) -> StmtResult { + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'while'"), + )?; + + let condition = self.expression()?; + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after 'while'"), + )?; + + Ok(Statement::While(While { + condition, + body: Box::new(self.statement()?), + })) + } + + fn for_statement(&mut self) -> StmtResult { + // Parsing of clauses ... + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'for'"), + )?; + + let initialiser = if self.match_token(&TokenKind::Semicolon) { + None + } else if self.match_token(&TokenKind::Var) { + Some(self.var_declaration()?) + } else { + Some(self.expr_statement()?) + }; + + let condition = if self.check_token(&TokenKind::Semicolon) { + // unspecified condition => infinite loop + Expr::Literal(Literal::Boolean(true)) + } else { + self.expression()? + }; + + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + + let increment = if self.check_token(&TokenKind::RightParen) { + None + } else { + Some(self.expression()?) + }; + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after for clauses"), + )?; + + let mut body = self.statement()?; + + // ... desugaring to while + + if let Some(inc) = increment { + body = Statement::Block(vec![body, Statement::Expr(inc)]); + } + + body = Statement::While(While { + condition, + body: Box::new(body), + }); + + if let Some(init) = initialiser { + body = Statement::Block(vec![init, body]); + } + + Ok(body) + } + + fn return_statement(&mut self) -> StmtResult { + let value = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Return(Return { value })) + } + + fn expr_statement(&mut self) -> StmtResult { + let expr = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Expr(expr)) + } + + fn expression(&mut self) -> ExprResult { + self.assignment() + } + + fn assignment(&mut self) -> ExprResult { + let expr = self.logic_or()?; + + if self.match_token(&TokenKind::Equal) { + let equals = self.previous().clone(); + let value = self.assignment()?; + + if let Expr::Variable(Variable { name, .. }) = expr { + return Ok(Expr::Assign(Assign { + name, + value: Box::new(value), + depth: None, + })); + } + + return Err(Error { + line: equals.line, + kind: ErrorKind::InvalidAssignmentTarget(format!("{:?}", equals)), + }); + } + + Ok(expr) + } + + fn logic_or(&mut self) -> ExprResult { + let mut expr = self.logic_and()?; + + while self.match_token(&TokenKind::Or) { + expr = Expr::Logical(Logical { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(self.logic_and()?), + }) + } + + Ok(expr) + } + + fn logic_and(&mut self) -> ExprResult { + let mut expr = self.equality()?; + + while self.match_token(&TokenKind::And) { + expr = Expr::Logical(Logical { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(self.equality()?), + }) + } + + Ok(expr) + } + + fn equality(&mut self) -> ExprResult { + self.binary_operator( + &[TokenKind::BangEqual, TokenKind::EqualEqual], + Self::comparison, + ) + } + + fn comparison(&mut self) -> ExprResult { + self.binary_operator( + &[ + TokenKind::Greater, + TokenKind::GreaterEqual, + TokenKind::Less, + TokenKind::LessEqual, + ], + Self::term, + ) + } + + fn term(&mut self) -> ExprResult { + self.binary_operator(&[TokenKind::Minus, TokenKind::Plus], Self::factor) + } + + fn factor(&mut self) -> ExprResult { + self.binary_operator(&[TokenKind::Slash, TokenKind::Star], Self::unary) + } + + fn unary(&mut self) -> ExprResult { + if self.match_token(&TokenKind::Bang) || self.match_token(&TokenKind::Minus) { + return Ok(Expr::Unary(Unary { + operator: self.previous().clone(), + right: Box::new(self.unary()?), + })); + } + + return self.call(); + } + + fn call(&mut self) -> ExprResult { + let mut expr = self.primary()?; + + loop { + if self.match_token(&TokenKind::LeftParen) { + expr = self.finish_call(expr)?; + } else { + break; + } + } + + Ok(expr) + } + + fn finish_call(&mut self, callee: Expr) -> ExprResult { + let mut args = vec![]; + + if !self.check_token(&TokenKind::RightParen) { + loop { + // TODO(tazjin): Check for max args count + args.push(self.expression()?); + if !self.match_token(&TokenKind::Comma) { + break; + } + } + } + + let paren = self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expect ')' after arguments."), + )?; + + Ok(Expr::Call(Call { + args, + callee: Box::new(callee), + paren, + })) + } + + fn primary(&mut self) -> ExprResult { + let next = self.advance(); + let literal = match next.kind { + TokenKind::True => Literal::Boolean(true), + TokenKind::False => Literal::Boolean(false), + TokenKind::Nil => Literal::Nil, + TokenKind::Number(num) => Literal::Number(num), + TokenKind::String(string) => Literal::String(string), + + TokenKind::LeftParen => { + let expr = self.expression()?; + self.consume(&TokenKind::RightParen, ErrorKind::UnmatchedParens)?; + return Ok(Expr::Grouping(Grouping(Box::new(expr)))); + } + + TokenKind::Identifier(_) => { + return Ok(Expr::Variable(Variable { + name: next, + depth: None, + })) + } + + unexpected => { + eprintln!("encountered {:?}", unexpected); + return Err(Error { + line: next.line, + kind: ErrorKind::ExpectedExpression(next.lexeme), + }); + } + }; + + Ok(Expr::Literal(literal)) + } + + // internal helpers + + fn identifier(&mut self, err: &'static str) -> Result { + if let TokenKind::Identifier(_) = self.peek().kind { + Ok(self.advance()) + } else { + Err(Error { + line: self.peek().line, + kind: ErrorKind::ExpectedToken(err), + }) + } + } + + /// Check if the next token is in `oneof`, and advance if it is. + fn match_token(&mut self, token: &TokenKind) -> bool { + if self.check_token(token) { + self.advance(); + return true; + } + + false + } + + /// Return the next token and advance parser state. + fn advance(&mut self) -> Token { + if !self.is_at_end() { + self.current += 1; + } + + return self.previous().clone(); + } + + fn is_at_end(&self) -> bool { + self.check_token(&TokenKind::Eof) + } + + /// Is the next token `token`? + fn check_token(&self, token: &TokenKind) -> bool { + self.peek().kind == *token + } + + fn peek(&self) -> &Token { + &self.tokens[self.current] + } + + fn previous(&self) -> &Token { + &self.tokens[self.current - 1] + } + + fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result { + if self.check_token(kind) { + return Ok(self.advance()); + } + + Err(Error { + line: self.peek().line, + kind: err, + }) + } + + fn synchronise(&mut self) { + self.advance(); + + while !self.is_at_end() { + if self.previous().kind == TokenKind::Semicolon { + return; + } + + match self.peek().kind { + TokenKind::Class + | TokenKind::Fun + | TokenKind::Var + | TokenKind::For + | TokenKind::If + | TokenKind::While + | TokenKind::Print + | TokenKind::Return => return, + + _ => { + self.advance(); + } + } + } + } + + fn binary_operator( + &mut self, + oneof: &[TokenKind], + each: fn(&mut Parser) -> ExprResult, + ) -> ExprResult { + let mut expr = each(self)?; + + while oneof.iter().any(|t| self.match_token(t)) { + expr = Expr::Binary(Binary { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(each(self)?), + }) + } + + return Ok(expr); + } +} + +pub fn parse(tokens: Vec) -> Result> { + let mut parser = Parser { tokens, current: 0 }; + let mut program: Block = vec![]; + let mut errors: Vec = vec![]; + + while !parser.is_at_end() { + match parser.declaration() { + Err(err) => { + errors.push(err); + parser.synchronise(); + } + Ok(decl) => { + program.push(decl); + } + } + } + + if errors.is_empty() { + Ok(program) + } else { + Err(errors) + } +} diff --git a/users/tazjin/rlox/src/treewalk/resolver.rs b/users/tazjin/rlox/src/treewalk/resolver.rs index 5e15d386c73f..3d12973aa089 100644 --- a/users/tazjin/rlox/src/treewalk/resolver.rs +++ b/users/tazjin/rlox/src/treewalk/resolver.rs @@ -6,9 +6,9 @@ use std::collections::HashMap; use std::rc::Rc; -use crate::errors::{Error, ErrorKind}; -use crate::parser::{self, Expr, Statement}; -use crate::scanner::Token; +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::parser::{self, Expr, Statement}; +use crate::treewalk::scanner::Token; #[derive(Default)] struct Resolver<'a> { diff --git a/users/tazjin/rlox/src/treewalk/scanner.rs b/users/tazjin/rlox/src/treewalk/scanner.rs new file mode 100644 index 000000000000..af9075484145 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/scanner.rs @@ -0,0 +1,283 @@ +use crate::treewalk::errors::{Error, ErrorKind}; + +#[derive(Clone, Debug, PartialEq)] +pub enum TokenKind { + // Single-character tokens. + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Minus, + Plus, + Semicolon, + Slash, + Star, + + // One or two character tokens. + Bang, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + + // Literals. + Identifier(String), + String(String), + Number(f64), + True, + False, + Nil, + + // Keywords. + And, + Class, + Else, + Fun, + For, + If, + Or, + Print, + Return, + Super, + This, + Var, + While, + + // Special things + Eof, +} + +#[derive(Clone, Debug)] +pub struct Token { + pub kind: TokenKind, + pub lexeme: String, + pub line: usize, +} + +struct Scanner<'a> { + source: &'a [char], + tokens: Vec, + errors: Vec, + start: usize, // offset of first character in current lexeme + current: usize, // current offset into source + line: usize, // current line in source +} + +impl<'a> Scanner<'a> { + fn is_at_end(&self) -> bool { + return self.current >= self.source.len(); + } + + fn advance(&mut self) -> char { + self.current += 1; + self.source[self.current - 1] + } + + fn add_token(&mut self, kind: TokenKind) { + let lexeme = &self.source[self.start..self.current]; + self.tokens.push(Token { + kind, + lexeme: lexeme.into_iter().collect(), + line: self.line, + }) + } + + fn scan_token(&mut self) { + match self.advance() { + // simple single-character tokens + '(' => self.add_token(TokenKind::LeftParen), + ')' => self.add_token(TokenKind::RightParen), + '{' => self.add_token(TokenKind::LeftBrace), + '}' => self.add_token(TokenKind::RightBrace), + ',' => self.add_token(TokenKind::Comma), + '.' => self.add_token(TokenKind::Dot), + '-' => self.add_token(TokenKind::Minus), + '+' => self.add_token(TokenKind::Plus), + ';' => self.add_token(TokenKind::Semicolon), + '*' => self.add_token(TokenKind::Star), + + // possible multi-character tokens + '!' => self.add_if_next('=', TokenKind::BangEqual, TokenKind::Bang), + '=' => self.add_if_next('=', TokenKind::EqualEqual, TokenKind::Equal), + '<' => self.add_if_next('=', TokenKind::LessEqual, TokenKind::Less), + '>' => self.add_if_next('=', TokenKind::GreaterEqual, TokenKind::Greater), + + '/' => { + // support comments until EOL by discarding characters + if self.match_next('/') { + while self.peek() != '\n' && !self.is_at_end() { + self.advance(); + } + } else { + self.add_token(TokenKind::Slash); + } + } + + // ignore whitespace + ws if ws.is_whitespace() => { + if ws == '\n' { + self.line += 1 + } + } + + '"' => self.scan_string(), + + digit if digit.is_digit(10) => self.scan_number(), + + chr if chr.is_alphabetic() || chr == '_' => self.scan_identifier(), + + unexpected => self.errors.push(Error { + line: self.line, + kind: ErrorKind::UnexpectedChar(unexpected), + }), + }; + } + + fn match_next(&mut self, expected: char) -> bool { + if self.is_at_end() || self.source[self.current] != expected { + false + } else { + self.current += 1; + true + } + } + + fn add_if_next(&mut self, expected: char, then: TokenKind, or: TokenKind) { + if self.match_next(expected) { + self.add_token(then); + } else { + self.add_token(or); + } + } + + fn peek(&self) -> char { + if self.is_at_end() { + return '\0'; + } else { + return self.source[self.current]; + } + } + + fn peek_next(&self) -> char { + if self.current + 1 >= self.source.len() { + return '\0'; + } else { + return self.source[self.current + 1]; + } + } + + fn scan_string(&mut self) { + while self.peek() != '"' && !self.is_at_end() { + if self.peek() == '\n' { + self.line += 1; + } + + self.advance(); + } + + if self.is_at_end() { + self.errors.push(Error { + line: self.line, + kind: ErrorKind::UnterminatedString, + }); + return; + } + + // closing '"' + self.advance(); + + // add token without surrounding quotes + let string: String = self.source[(self.start + 1)..(self.current - 1)] + .iter() + .collect(); + self.add_token(TokenKind::String(string)); + } + + fn scan_number(&mut self) { + while self.peek().is_digit(10) { + self.advance(); + } + + // Look for a fractional part + if self.peek() == '.' && self.peek_next().is_digit(10) { + // consume '.' + self.advance(); + + while self.peek().is_digit(10) { + self.advance(); + } + } + + let num: f64 = self.source[self.start..self.current] + .iter() + .collect::() + .parse() + .expect("float parsing should always work"); + + self.add_token(TokenKind::Number(num)); + } + + fn scan_identifier(&mut self) { + while self.peek().is_alphanumeric() || self.peek() == '_' { + self.advance(); + } + + let ident: String = self.source[self.start..self.current].iter().collect(); + + // Determine whether this is an identifier, or a keyword: + let token_kind = match ident.as_str() { + "and" => TokenKind::And, + "class" => TokenKind::Class, + "else" => TokenKind::Else, + "false" => TokenKind::False, + "for" => TokenKind::For, + "fun" => TokenKind::Fun, + "if" => TokenKind::If, + "nil" => TokenKind::Nil, + "or" => TokenKind::Or, + "print" => TokenKind::Print, + "return" => TokenKind::Return, + "super" => TokenKind::Super, + "this" => TokenKind::This, + "true" => TokenKind::True, + "var" => TokenKind::Var, + "while" => TokenKind::While, + _ => TokenKind::Identifier(ident), + }; + + self.add_token(token_kind); + } + + fn scan_tokens(&mut self) { + while !self.is_at_end() { + self.start = self.current; + self.scan_token(); + } + + self.add_token(TokenKind::Eof); + } +} + +pub fn scan<'a>(input: &'a [char]) -> Result, Vec> { + let mut scanner = Scanner { + source: &input, + tokens: vec![], + errors: vec![], + start: 0, + current: 0, + line: 0, + }; + + scanner.scan_tokens(); + + if !scanner.errors.is_empty() { + return Err(scanner.errors); + } + + return Ok(scanner.tokens); +} -- cgit 1.4.1