about summary refs log tree commit diff
path: root/users/tazjin/rlox/src/treewalk/scanner.rs
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2021-01-18T17·27+0300
committertazjin <mail@tazj.in>2021-01-19T09·57+0000
commit5868d4bd49a7b80a395f1ecabedeb0b8f4ddffce (patch)
tree7f9b786e665f7ea103b256d9cab92bbb982d4770 /users/tazjin/rlox/src/treewalk/scanner.rs
parent2d136e03279e481021a23948fdf5556f25394cd3 (diff)
refactor(tazjin/rlox): Prepare scanner for shared use r/2132
In the book, the clox interpreter has its own scanner which uses a
pull-based model for a single pass compiler.

I can't be bothered to write another scanner, or amend this one into
pull-mode to work with the treewalk interpreter, so instead I will
just reuse it and pull from a vector of tokens.

The tokens are shared between both interpreters and the scanner is not
what I'm interested in here.

Change-Id: Ib07e89127fce2b047f9b3e1ff7e9908d798b3b2b
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2420
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
Diffstat (limited to 'users/tazjin/rlox/src/treewalk/scanner.rs')
-rw-r--r--users/tazjin/rlox/src/treewalk/scanner.rs283
1 files changed, 0 insertions, 283 deletions
diff --git a/users/tazjin/rlox/src/treewalk/scanner.rs b/users/tazjin/rlox/src/treewalk/scanner.rs
deleted file mode 100644
index af9075484145..000000000000
--- a/users/tazjin/rlox/src/treewalk/scanner.rs
+++ /dev/null
@@ -1,283 +0,0 @@
-use crate::treewalk::errors::{Error, ErrorKind};
-
-#[derive(Clone, Debug, PartialEq)]
-pub enum TokenKind {
-    // Single-character tokens.
-    LeftParen,
-    RightParen,
-    LeftBrace,
-    RightBrace,
-    Comma,
-    Dot,
-    Minus,
-    Plus,
-    Semicolon,
-    Slash,
-    Star,
-
-    // One or two character tokens.
-    Bang,
-    BangEqual,
-    Equal,
-    EqualEqual,
-    Greater,
-    GreaterEqual,
-    Less,
-    LessEqual,
-
-    // Literals.
-    Identifier(String),
-    String(String),
-    Number(f64),
-    True,
-    False,
-    Nil,
-
-    // Keywords.
-    And,
-    Class,
-    Else,
-    Fun,
-    For,
-    If,
-    Or,
-    Print,
-    Return,
-    Super,
-    This,
-    Var,
-    While,
-
-    // Special things
-    Eof,
-}
-
-#[derive(Clone, Debug)]
-pub struct Token {
-    pub kind: TokenKind,
-    pub lexeme: String,
-    pub line: usize,
-}
-
-struct Scanner<'a> {
-    source: &'a [char],
-    tokens: Vec<Token>,
-    errors: Vec<Error>,
-    start: usize,   // offset of first character in current lexeme
-    current: usize, // current offset into source
-    line: usize,    // current line in source
-}
-
-impl<'a> Scanner<'a> {
-    fn is_at_end(&self) -> bool {
-        return self.current >= self.source.len();
-    }
-
-    fn advance(&mut self) -> char {
-        self.current += 1;
-        self.source[self.current - 1]
-    }
-
-    fn add_token(&mut self, kind: TokenKind) {
-        let lexeme = &self.source[self.start..self.current];
-        self.tokens.push(Token {
-            kind,
-            lexeme: lexeme.into_iter().collect(),
-            line: self.line,
-        })
-    }
-
-    fn scan_token(&mut self) {
-        match self.advance() {
-            // simple single-character tokens
-            '(' => self.add_token(TokenKind::LeftParen),
-            ')' => self.add_token(TokenKind::RightParen),
-            '{' => self.add_token(TokenKind::LeftBrace),
-            '}' => self.add_token(TokenKind::RightBrace),
-            ',' => self.add_token(TokenKind::Comma),
-            '.' => self.add_token(TokenKind::Dot),
-            '-' => self.add_token(TokenKind::Minus),
-            '+' => self.add_token(TokenKind::Plus),
-            ';' => self.add_token(TokenKind::Semicolon),
-            '*' => self.add_token(TokenKind::Star),
-
-            // possible multi-character tokens
-            '!' => self.add_if_next('=', TokenKind::BangEqual, TokenKind::Bang),
-            '=' => self.add_if_next('=', TokenKind::EqualEqual, TokenKind::Equal),
-            '<' => self.add_if_next('=', TokenKind::LessEqual, TokenKind::Less),
-            '>' => self.add_if_next('=', TokenKind::GreaterEqual, TokenKind::Greater),
-
-            '/' => {
-                // support comments until EOL by discarding characters
-                if self.match_next('/') {
-                    while self.peek() != '\n' && !self.is_at_end() {
-                        self.advance();
-                    }
-                } else {
-                    self.add_token(TokenKind::Slash);
-                }
-            }
-
-            // ignore whitespace
-            ws if ws.is_whitespace() => {
-                if ws == '\n' {
-                    self.line += 1
-                }
-            }
-
-            '"' => self.scan_string(),
-
-            digit if digit.is_digit(10) => self.scan_number(),
-
-            chr if chr.is_alphabetic() || chr == '_' => self.scan_identifier(),
-
-            unexpected => self.errors.push(Error {
-                line: self.line,
-                kind: ErrorKind::UnexpectedChar(unexpected),
-            }),
-        };
-    }
-
-    fn match_next(&mut self, expected: char) -> bool {
-        if self.is_at_end() || self.source[self.current] != expected {
-            false
-        } else {
-            self.current += 1;
-            true
-        }
-    }
-
-    fn add_if_next(&mut self, expected: char, then: TokenKind, or: TokenKind) {
-        if self.match_next(expected) {
-            self.add_token(then);
-        } else {
-            self.add_token(or);
-        }
-    }
-
-    fn peek(&self) -> char {
-        if self.is_at_end() {
-            return '\0';
-        } else {
-            return self.source[self.current];
-        }
-    }
-
-    fn peek_next(&self) -> char {
-        if self.current + 1 >= self.source.len() {
-            return '\0';
-        } else {
-            return self.source[self.current + 1];
-        }
-    }
-
-    fn scan_string(&mut self) {
-        while self.peek() != '"' && !self.is_at_end() {
-            if self.peek() == '\n' {
-                self.line += 1;
-            }
-
-            self.advance();
-        }
-
-        if self.is_at_end() {
-            self.errors.push(Error {
-                line: self.line,
-                kind: ErrorKind::UnterminatedString,
-            });
-            return;
-        }
-
-        // closing '"'
-        self.advance();
-
-        // add token without surrounding quotes
-        let string: String = self.source[(self.start + 1)..(self.current - 1)]
-            .iter()
-            .collect();
-        self.add_token(TokenKind::String(string));
-    }
-
-    fn scan_number(&mut self) {
-        while self.peek().is_digit(10) {
-            self.advance();
-        }
-
-        // Look for a fractional part
-        if self.peek() == '.' && self.peek_next().is_digit(10) {
-            // consume '.'
-            self.advance();
-
-            while self.peek().is_digit(10) {
-                self.advance();
-            }
-        }
-
-        let num: f64 = self.source[self.start..self.current]
-            .iter()
-            .collect::<String>()
-            .parse()
-            .expect("float parsing should always work");
-
-        self.add_token(TokenKind::Number(num));
-    }
-
-    fn scan_identifier(&mut self) {
-        while self.peek().is_alphanumeric() || self.peek() == '_' {
-            self.advance();
-        }
-
-        let ident: String = self.source[self.start..self.current].iter().collect();
-
-        // Determine whether this is an identifier, or a keyword:
-        let token_kind = match ident.as_str() {
-            "and" => TokenKind::And,
-            "class" => TokenKind::Class,
-            "else" => TokenKind::Else,
-            "false" => TokenKind::False,
-            "for" => TokenKind::For,
-            "fun" => TokenKind::Fun,
-            "if" => TokenKind::If,
-            "nil" => TokenKind::Nil,
-            "or" => TokenKind::Or,
-            "print" => TokenKind::Print,
-            "return" => TokenKind::Return,
-            "super" => TokenKind::Super,
-            "this" => TokenKind::This,
-            "true" => TokenKind::True,
-            "var" => TokenKind::Var,
-            "while" => TokenKind::While,
-            _ => TokenKind::Identifier(ident),
-        };
-
-        self.add_token(token_kind);
-    }
-
-    fn scan_tokens(&mut self) {
-        while !self.is_at_end() {
-            self.start = self.current;
-            self.scan_token();
-        }
-
-        self.add_token(TokenKind::Eof);
-    }
-}
-
-pub fn scan<'a>(input: &'a [char]) -> Result<Vec<Token>, Vec<Error>> {
-    let mut scanner = Scanner {
-        source: &input,
-        tokens: vec![],
-        errors: vec![],
-        start: 0,
-        current: 0,
-        line: 0,
-    };
-
-    scanner.scan_tokens();
-
-    if !scanner.errors.is_empty() {
-        return Err(scanner.errors);
-    }
-
-    return Ok(scanner.tokens);
-}