From 3d1b116f7fdcdd7af77a4abc7dbedef4df8fd0a4 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Mon, 23 Nov 2020 02:00:02 +0100 Subject: feat(tazjin/rlox): Implement single-character scanning ... still not that interesting, but at this point slightly divergent from the book: The book embraces mutability for interpreter state, initially for tracking whether an error condition has occured. I avoid this by instead defining an error type and collecting the error values, to be handled later on. Notes: So far nothing special, but this is just the beginning of the book. I like the style it is written in and it has pointed to some interesting resources, such as a 1965 paper titled "The Next 700 Languages". Change-Id: I030b38438fec9eb55372bf547af225138908230a Reviewed-on: https://cl.tvl.fyi/c/depot/+/2144 Reviewed-by: tazjin Tested-by: BuildkiteCI --- users/tazjin/rlox/src/errors.rs | 14 +++++ users/tazjin/rlox/src/main.rs | 2 + users/tazjin/rlox/src/scanner.rs | 123 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 139 insertions(+) create mode 100644 users/tazjin/rlox/src/errors.rs create mode 100644 users/tazjin/rlox/src/scanner.rs diff --git a/users/tazjin/rlox/src/errors.rs b/users/tazjin/rlox/src/errors.rs new file mode 100644 index 000000000000..46c739ef2f46 --- /dev/null +++ b/users/tazjin/rlox/src/errors.rs @@ -0,0 +1,14 @@ +#[derive(Debug)] +pub enum ErrorKind { + UnexpectedChar(char), +} + +#[derive(Debug)] +pub struct Error { + pub line: usize, + pub kind: ErrorKind, +} + +pub fn report(loc: &str, err: &Error) { + eprintln!("[line {}] Error {}: {:?}", err.line, loc, err.kind); +} diff --git a/users/tazjin/rlox/src/main.rs b/users/tazjin/rlox/src/main.rs index 83d220c81631..b14ed97d5eda 100644 --- a/users/tazjin/rlox/src/main.rs +++ b/users/tazjin/rlox/src/main.rs @@ -4,7 +4,9 @@ use std::io; use std::io::Write; use std::process; +mod errors; mod interpreter; +mod scanner; fn main() { let mut args = env::args(); diff --git a/users/tazjin/rlox/src/scanner.rs b/users/tazjin/rlox/src/scanner.rs new file mode 100644 index 000000000000..c1809010547a --- /dev/null +++ b/users/tazjin/rlox/src/scanner.rs @@ -0,0 +1,123 @@ +use crate::errors::{Error, ErrorKind}; + +#[derive(Debug)] +pub enum TokenKind { + // Single-character tokens. + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Minus, + Plus, + Semicolon, + Slash, + Star, + + // One or two character tokens. + Bang, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + + // Literals. + Identifier, + String, + Number, + + // Keywords. + And, + Class, + Else, + False, + Fun, + For, + If, + Nil, + Or, + Print, + Return, + Super, + This, + True, + Var, + While, + + // Special things + Eof, +} + +#[derive(Debug)] +pub struct Token<'a> { + kind: TokenKind, + lexeme: &'a str, + // literal: Object, // TODO(tazjin): Uhh? + line: usize, +} + +struct Scanner<'a> { + source: &'a str, + tokens: Vec>, + errors: Vec, + start: usize, // offset of first character in current lexeme + current: usize, // current offset into source + line: usize, // current line in source +} + +impl<'a> Scanner<'a> { + fn is_at_end(&self) -> bool { + return self.current >= self.source.len(); + } + + fn advance(&mut self) -> char { + self.current += 1; + + // TODO(tazjin): Due to utf8-safety, this is a bit annoying. + // Since string iteration is not the point here I'm just + // leaving this as is for now. + self.source.chars().nth(self.current - 1).unwrap() + } + + fn add_token(&mut self, kind: TokenKind) { + let lexeme = &self.source[self.start..self.current]; + self.tokens.push(Token { + kind, + lexeme, + line: self.line, + }) + } + + fn scan_token(&mut self) { + match self.advance() { + '(' => self.add_token(TokenKind::LeftParen), + ')' => self.add_token(TokenKind::RightParen), + '{' => self.add_token(TokenKind::LeftBrace), + '}' => self.add_token(TokenKind::RightBrace), + ',' => self.add_token(TokenKind::Comma), + '.' => self.add_token(TokenKind::Dot), + '-' => self.add_token(TokenKind::Minus), + '+' => self.add_token(TokenKind::Plus), + ';' => self.add_token(TokenKind::Semicolon), + '*' => self.add_token(TokenKind::Star), + + unexpected => self.errors.push(Error { + line: self.line, + kind: ErrorKind::UnexpectedChar(unexpected), + }), + }; + } + + fn scan_tokens(mut self) -> Vec> { + while !self.is_at_end() { + self.start = self.current; + self.scan_token(); + } + + return self.tokens; + } +} -- cgit 1.4.1