use crate::treewalk::errors::{Error, ErrorKind}; #[derive(Clone, Debug, PartialEq)] pub enum TokenKind { // Single-character tokens. LeftParen, RightParen, LeftBrace, RightBrace, Comma, Dot, Minus, Plus, Semicolon, Slash, Star, // One or two character tokens. Bang, BangEqual, Equal, EqualEqual, Greater, GreaterEqual, Less, LessEqual, // Literals. Identifier(String), String(String), Number(f64), True, False, Nil, // Keywords. And, Class, Else, Fun, For, If, Or, Print, Return, Super, This, Var, While, // Special things Eof, } #[derive(Clone, Debug)] pub struct Token { pub kind: TokenKind, pub lexeme: String, pub line: usize, } struct Scanner<'a> { source: &'a [char], tokens: Vec<Token>, errors: Vec<Error>, start: usize, // offset of first character in current lexeme current: usize, // current offset into source line: usize, // current line in source } impl<'a> Scanner<'a> { fn is_at_end(&self) -> bool { return self.current >= self.source.len(); } fn advance(&mut self) -> char { self.current += 1; self.source[self.current - 1] } fn add_token(&mut self, kind: TokenKind) { let lexeme = &self.source[self.start..self.current]; self.tokens.push(Token { kind, lexeme: lexeme.into_iter().collect(), line: self.line, }) } fn scan_token(&mut self) { match self.advance() { // simple single-character tokens '(' => self.add_token(TokenKind::LeftParen), ')' => self.add_token(TokenKind::RightParen), '{' => self.add_token(TokenKind::LeftBrace), '}' => self.add_token(TokenKind::RightBrace), ',' => self.add_token(TokenKind::Comma), '.' => self.add_token(TokenKind::Dot), '-' => self.add_token(TokenKind::Minus), '+' => self.add_token(TokenKind::Plus), ';' => self.add_token(TokenKind::Semicolon), '*' => self.add_token(TokenKind::Star), // possible multi-character tokens '!' => self.add_if_next('=', TokenKind::BangEqual, TokenKind::Bang), '=' => self.add_if_next('=', TokenKind::EqualEqual, TokenKind::Equal), '<' => self.add_if_next('=', TokenKind::LessEqual, TokenKind::Less), '>' => self.add_if_next('=', TokenKind::GreaterEqual, TokenKind::Greater), '/' => { // support comments until EOL by discarding characters if self.match_next('/') { while self.peek() != '\n' && !self.is_at_end() { self.advance(); } } else { self.add_token(TokenKind::Slash); } } // ignore whitespace ws if ws.is_whitespace() => { if ws == '\n' { self.line += 1 } } '"' => self.scan_string(), digit if digit.is_digit(10) => self.scan_number(), chr if chr.is_alphabetic() || chr == '_' => self.scan_identifier(), unexpected => self.errors.push(Error { line: self.line, kind: ErrorKind::UnexpectedChar(unexpected), }), }; } fn match_next(&mut self, expected: char) -> bool { if self.is_at_end() || self.source[self.current] != expected { false } else { self.current += 1; true } } fn add_if_next(&mut self, expected: char, then: TokenKind, or: TokenKind) { if self.match_next(expected) { self.add_token(then); } else { self.add_token(or); } } fn peek(&self) -> char { if self.is_at_end() { return '\0'; } else { return self.source[self.current]; } } fn peek_next(&self) -> char { if self.current + 1 >= self.source.len() { return '\0'; } else { return self.source[self.current + 1]; } } fn scan_string(&mut self) { while self.peek() != '"' && !self.is_at_end() { if self.peek() == '\n' { self.line += 1; } self.advance(); } if self.is_at_end() { self.errors.push(Error { line: self.line, kind: ErrorKind::UnterminatedString, }); return; } // closing '"' self.advance(); // add token without surrounding quotes let string: String = self.source[(self.start + 1)..(self.current - 1)] .iter() .collect(); self.add_token(TokenKind::String(string)); } fn scan_number(&mut self) { while self.peek().is_digit(10) { self.advance(); } // Look for a fractional part if self.peek() == '.' && self.peek_next().is_digit(10) { // consume '.' self.advance(); while self.peek().is_digit(10) { self.advance(); } } let num: f64 = self.source[self.start..self.current] .iter() .collect::<String>() .parse() .expect("float parsing should always work"); self.add_token(TokenKind::Number(num)); } fn scan_identifier(&mut self) { while self.peek().is_alphanumeric() || self.peek() == '_' { self.advance(); } let ident: String = self.source[self.start..self.current].iter().collect(); // Determine whether this is an identifier, or a keyword: let token_kind = match ident.as_str() { "and" => TokenKind::And, "class" => TokenKind::Class, "else" => TokenKind::Else, "false" => TokenKind::False, "for" => TokenKind::For, "fun" => TokenKind::Fun, "if" => TokenKind::If, "nil" => TokenKind::Nil, "or" => TokenKind::Or, "print" => TokenKind::Print, "return" => TokenKind::Return, "super" => TokenKind::Super, "this" => TokenKind::This, "true" => TokenKind::True, "var" => TokenKind::Var, "while" => TokenKind::While, _ => TokenKind::Identifier(ident), }; self.add_token(token_kind); } fn scan_tokens(&mut self) { while !self.is_at_end() { self.start = self.current; self.scan_token(); } self.add_token(TokenKind::Eof); } } pub fn scan<'a>(input: &'a [char]) -> Result<Vec<Token>, Vec<Error>> { let mut scanner = Scanner { source: &input, tokens: vec![], errors: vec![], start: 0, current: 0, line: 0, }; scanner.scan_tokens(); if !scanner.errors.is_empty() { return Err(scanner.errors); } return Ok(scanner.tokens); }