From 5868d4bd49a7b80a395f1ecabedeb0b8f4ddffce Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Mon, 18 Jan 2021 20:27:14 +0300 Subject: refactor(tazjin/rlox): Prepare scanner for shared use In the book, the clox interpreter has its own scanner which uses a pull-based model for a single pass compiler. I can't be bothered to write another scanner, or amend this one into pull-mode to work with the treewalk interpreter, so instead I will just reuse it and pull from a vector of tokens. The tokens are shared between both interpreters and the scanner is not what I'm interested in here. Change-Id: Ib07e89127fce2b047f9b3e1ff7e9908d798b3b2b Reviewed-on: https://cl.tvl.fyi/c/depot/+/2420 Reviewed-by: tazjin Tested-by: BuildkiteCI --- users/tazjin/rlox/src/treewalk/scanner.rs | 283 ------------------------------ 1 file changed, 283 deletions(-) delete mode 100644 users/tazjin/rlox/src/treewalk/scanner.rs (limited to 'users/tazjin/rlox/src/treewalk/scanner.rs') diff --git a/users/tazjin/rlox/src/treewalk/scanner.rs b/users/tazjin/rlox/src/treewalk/scanner.rs deleted file mode 100644 index af9075484145..000000000000 --- a/users/tazjin/rlox/src/treewalk/scanner.rs +++ /dev/null @@ -1,283 +0,0 @@ -use crate::treewalk::errors::{Error, ErrorKind}; - -#[derive(Clone, Debug, PartialEq)] -pub enum TokenKind { - // Single-character tokens. - LeftParen, - RightParen, - LeftBrace, - RightBrace, - Comma, - Dot, - Minus, - Plus, - Semicolon, - Slash, - Star, - - // One or two character tokens. - Bang, - BangEqual, - Equal, - EqualEqual, - Greater, - GreaterEqual, - Less, - LessEqual, - - // Literals. - Identifier(String), - String(String), - Number(f64), - True, - False, - Nil, - - // Keywords. - And, - Class, - Else, - Fun, - For, - If, - Or, - Print, - Return, - Super, - This, - Var, - While, - - // Special things - Eof, -} - -#[derive(Clone, Debug)] -pub struct Token { - pub kind: TokenKind, - pub lexeme: String, - pub line: usize, -} - -struct Scanner<'a> { - source: &'a [char], - tokens: Vec, - errors: Vec, - start: usize, // offset of first character in current lexeme - current: usize, // current offset into source - line: usize, // current line in source -} - -impl<'a> Scanner<'a> { - fn is_at_end(&self) -> bool { - return self.current >= self.source.len(); - } - - fn advance(&mut self) -> char { - self.current += 1; - self.source[self.current - 1] - } - - fn add_token(&mut self, kind: TokenKind) { - let lexeme = &self.source[self.start..self.current]; - self.tokens.push(Token { - kind, - lexeme: lexeme.into_iter().collect(), - line: self.line, - }) - } - - fn scan_token(&mut self) { - match self.advance() { - // simple single-character tokens - '(' => self.add_token(TokenKind::LeftParen), - ')' => self.add_token(TokenKind::RightParen), - '{' => self.add_token(TokenKind::LeftBrace), - '}' => self.add_token(TokenKind::RightBrace), - ',' => self.add_token(TokenKind::Comma), - '.' => self.add_token(TokenKind::Dot), - '-' => self.add_token(TokenKind::Minus), - '+' => self.add_token(TokenKind::Plus), - ';' => self.add_token(TokenKind::Semicolon), - '*' => self.add_token(TokenKind::Star), - - // possible multi-character tokens - '!' => self.add_if_next('=', TokenKind::BangEqual, TokenKind::Bang), - '=' => self.add_if_next('=', TokenKind::EqualEqual, TokenKind::Equal), - '<' => self.add_if_next('=', TokenKind::LessEqual, TokenKind::Less), - '>' => self.add_if_next('=', TokenKind::GreaterEqual, TokenKind::Greater), - - '/' => { - // support comments until EOL by discarding characters - if self.match_next('/') { - while self.peek() != '\n' && !self.is_at_end() { - self.advance(); - } - } else { - self.add_token(TokenKind::Slash); - } - } - - // ignore whitespace - ws if ws.is_whitespace() => { - if ws == '\n' { - self.line += 1 - } - } - - '"' => self.scan_string(), - - digit if digit.is_digit(10) => self.scan_number(), - - chr if chr.is_alphabetic() || chr == '_' => self.scan_identifier(), - - unexpected => self.errors.push(Error { - line: self.line, - kind: ErrorKind::UnexpectedChar(unexpected), - }), - }; - } - - fn match_next(&mut self, expected: char) -> bool { - if self.is_at_end() || self.source[self.current] != expected { - false - } else { - self.current += 1; - true - } - } - - fn add_if_next(&mut self, expected: char, then: TokenKind, or: TokenKind) { - if self.match_next(expected) { - self.add_token(then); - } else { - self.add_token(or); - } - } - - fn peek(&self) -> char { - if self.is_at_end() { - return '\0'; - } else { - return self.source[self.current]; - } - } - - fn peek_next(&self) -> char { - if self.current + 1 >= self.source.len() { - return '\0'; - } else { - return self.source[self.current + 1]; - } - } - - fn scan_string(&mut self) { - while self.peek() != '"' && !self.is_at_end() { - if self.peek() == '\n' { - self.line += 1; - } - - self.advance(); - } - - if self.is_at_end() { - self.errors.push(Error { - line: self.line, - kind: ErrorKind::UnterminatedString, - }); - return; - } - - // closing '"' - self.advance(); - - // add token without surrounding quotes - let string: String = self.source[(self.start + 1)..(self.current - 1)] - .iter() - .collect(); - self.add_token(TokenKind::String(string)); - } - - fn scan_number(&mut self) { - while self.peek().is_digit(10) { - self.advance(); - } - - // Look for a fractional part - if self.peek() == '.' && self.peek_next().is_digit(10) { - // consume '.' - self.advance(); - - while self.peek().is_digit(10) { - self.advance(); - } - } - - let num: f64 = self.source[self.start..self.current] - .iter() - .collect::() - .parse() - .expect("float parsing should always work"); - - self.add_token(TokenKind::Number(num)); - } - - fn scan_identifier(&mut self) { - while self.peek().is_alphanumeric() || self.peek() == '_' { - self.advance(); - } - - let ident: String = self.source[self.start..self.current].iter().collect(); - - // Determine whether this is an identifier, or a keyword: - let token_kind = match ident.as_str() { - "and" => TokenKind::And, - "class" => TokenKind::Class, - "else" => TokenKind::Else, - "false" => TokenKind::False, - "for" => TokenKind::For, - "fun" => TokenKind::Fun, - "if" => TokenKind::If, - "nil" => TokenKind::Nil, - "or" => TokenKind::Or, - "print" => TokenKind::Print, - "return" => TokenKind::Return, - "super" => TokenKind::Super, - "this" => TokenKind::This, - "true" => TokenKind::True, - "var" => TokenKind::Var, - "while" => TokenKind::While, - _ => TokenKind::Identifier(ident), - }; - - self.add_token(token_kind); - } - - fn scan_tokens(&mut self) { - while !self.is_at_end() { - self.start = self.current; - self.scan_token(); - } - - self.add_token(TokenKind::Eof); - } -} - -pub fn scan<'a>(input: &'a [char]) -> Result, Vec> { - let mut scanner = Scanner { - source: &input, - tokens: vec![], - errors: vec![], - start: 0, - current: 0, - line: 0, - }; - - scanner.scan_tokens(); - - if !scanner.errors.is_empty() { - return Err(scanner.errors); - } - - return Ok(scanner.tokens); -} -- cgit 1.4.1