diff options
Diffstat (limited to 'users/tazjin/rlox')
34 files changed, 3574 insertions, 0 deletions
diff --git a/users/tazjin/rlox/.gitignore b/users/tazjin/rlox/.gitignore new file mode 100644 index 000000000000..29e65519ba35 --- /dev/null +++ b/users/tazjin/rlox/.gitignore @@ -0,0 +1,3 @@ +result +/target +**/*.rs.bk diff --git a/users/tazjin/rlox/Cargo.lock b/users/tazjin/rlox/Cargo.lock new file mode 100644 index 000000000000..d8107726e067 --- /dev/null +++ b/users/tazjin/rlox/Cargo.lock @@ -0,0 +1,6 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +[[package]] +name = "rlox" +version = "0.1.0" + diff --git a/users/tazjin/rlox/Cargo.toml b/users/tazjin/rlox/Cargo.toml new file mode 100644 index 000000000000..b66af6ba85d3 --- /dev/null +++ b/users/tazjin/rlox/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "rlox" +version = "0.1.0" +authors = ["Vincent Ambo <mail@tazj.in>"] +edition = "2018" + +[features] +# Enables debugging/disassembling in the bytecode interpreter. Off by +# default as it is quite spammy. +disassemble = [] diff --git a/users/tazjin/rlox/README.md b/users/tazjin/rlox/README.md new file mode 100644 index 000000000000..1d2692d09cc1 --- /dev/null +++ b/users/tazjin/rlox/README.md @@ -0,0 +1,7 @@ +This is an interpreter for the Lox language, based on the book "[Crafting +Interpreters](https://craftinginterpreters.com/)". + +The book's original code uses Java, but I don't want to use Java, so I've +decided to take on the extra complexity of porting it to Rust. + +Note: This implements the first of two Lox interpreters. diff --git a/users/tazjin/rlox/default.nix b/users/tazjin/rlox/default.nix new file mode 100644 index 000000000000..e50ac32be452 --- /dev/null +++ b/users/tazjin/rlox/default.nix @@ -0,0 +1,5 @@ +{ depot, ... }: + +depot.third_party.naersk.buildPackage { + src = ./.; +} diff --git a/users/tazjin/rlox/examples/builtins.lox b/users/tazjin/rlox/examples/builtins.lox new file mode 100644 index 000000000000..39af1d73c4c9 --- /dev/null +++ b/users/tazjin/rlox/examples/builtins.lox @@ -0,0 +1 @@ +print clock(); diff --git a/users/tazjin/rlox/examples/fib.lox b/users/tazjin/rlox/examples/fib.lox new file mode 100644 index 000000000000..1b91e9db94ac --- /dev/null +++ b/users/tazjin/rlox/examples/fib.lox @@ -0,0 +1,6 @@ +fun fib(n) { + if (n <= 1) return n; + return fib(n - 2) + fib(n - 1); +} + +print fib(30); diff --git a/users/tazjin/rlox/examples/func.lox b/users/tazjin/rlox/examples/func.lox new file mode 100644 index 000000000000..d197ad11383f --- /dev/null +++ b/users/tazjin/rlox/examples/func.lox @@ -0,0 +1,5 @@ +fun foo(name) { + print("hello " + name); +} + +foo("bar"); diff --git a/users/tazjin/rlox/examples/hello.lox b/users/tazjin/rlox/examples/hello.lox new file mode 100644 index 000000000000..31752d9e2f5e --- /dev/null +++ b/users/tazjin/rlox/examples/hello.lox @@ -0,0 +1,34 @@ +var a = 12; +var b = a * 2; + +{ + var b = a * 3; + a = 42; + print b; +} + +print a; +print b; + +if (5 > 4) + print "it's true"; +else + print "it's false"; + +if (false) + print "it's not true"; + +if (true and false) + print "won't happen"; + +if (true or false) + print "will happen"; + +var n = 5; +while (n > 0) { + print "counting down"; + n = n - 1; +} + +for(var i = 0; i < 10; i = i + 1) + print "bla"; diff --git a/users/tazjin/rlox/examples/if.lox b/users/tazjin/rlox/examples/if.lox new file mode 100644 index 000000000000..5f335c0e8b29 --- /dev/null +++ b/users/tazjin/rlox/examples/if.lox @@ -0,0 +1,7 @@ +if (false) { + print "yes"; +} else { + print "no"; +} + +print "afterwards"; diff --git a/users/tazjin/rlox/examples/scope.lox b/users/tazjin/rlox/examples/scope.lox new file mode 100644 index 000000000000..d563807943ba --- /dev/null +++ b/users/tazjin/rlox/examples/scope.lox @@ -0,0 +1,19 @@ +var a = "global a"; +var b = "global b"; +var c = "global c"; +{ + var a = "outer a"; + var b = "outer b"; + { + var a = "inner a"; + print a; + print b; + print c; + } + print a; + print b; + print c; +} +print a; +print b; +print c; diff --git a/users/tazjin/rlox/examples/scope2.lox b/users/tazjin/rlox/examples/scope2.lox new file mode 100644 index 000000000000..f826c8658803 --- /dev/null +++ b/users/tazjin/rlox/examples/scope2.lox @@ -0,0 +1,10 @@ +var a = "global"; +{ + fun showA() { + print a; + } + + showA(); + var a = "block"; + showA(); +} diff --git a/users/tazjin/rlox/examples/slow.lox b/users/tazjin/rlox/examples/slow.lox new file mode 100644 index 000000000000..dd6fb5e4bf4d --- /dev/null +++ b/users/tazjin/rlox/examples/slow.lox @@ -0,0 +1,9 @@ +fun fib(n) { + if (n < 2) return n; + return fib(n - 1) + fib(n - 2); +} + +var before = clock(); +print fib(40); +var after = clock(); +print after - before; diff --git a/users/tazjin/rlox/examples/var.lox b/users/tazjin/rlox/examples/var.lox new file mode 100644 index 000000000000..7af90b3f0bee --- /dev/null +++ b/users/tazjin/rlox/examples/var.lox @@ -0,0 +1,8 @@ +var a = 10; +var b = 5; + +{ + var b = 10; + var c = 2; + a * b * c; +} diff --git a/users/tazjin/rlox/rustfmt.toml b/users/tazjin/rlox/rustfmt.toml new file mode 100644 index 000000000000..df99c69198f5 --- /dev/null +++ b/users/tazjin/rlox/rustfmt.toml @@ -0,0 +1 @@ +max_width = 80 diff --git a/users/tazjin/rlox/src/bytecode/chunk.rs b/users/tazjin/rlox/src/bytecode/chunk.rs new file mode 100644 index 000000000000..fc5cd34fdf4f --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/chunk.rs @@ -0,0 +1,93 @@ +use std::ops::Index; + +use super::opcode::{CodeIdx, ConstantIdx, OpCode}; +use super::value; + +// In the book, this type is a hand-rolled dynamic array +// implementation in C. The main benefit of following that approach +// would be avoiding issues with OpCode variants not having equal +// sizes, but for the purpose of this I'm going to ignore that +// problem. +#[derive(Debug, Default)] +pub struct Chunk { + pub code: Vec<OpCode>, + lines: Vec<Span>, + constants: Vec<value::Value>, +} + +#[derive(Debug)] +struct Span { + /// Source code line + line: usize, + + /// Number of instructions derived from this line + count: usize, +} + +impl Chunk { + pub fn add_op(&mut self, data: OpCode, line: usize) -> CodeIdx { + let idx = self.code.len(); + self.code.push(data); + self.add_line(line); + CodeIdx(idx) + } + + pub fn add_constant(&mut self, data: value::Value) -> usize { + let idx = self.constants.len(); + self.constants.push(data); + idx + } + + pub fn constant(&self, idx: ConstantIdx) -> &value::Value { + self.constants.index(idx.0) + } + + fn add_line(&mut self, line: usize) { + match self.lines.last_mut() { + Some(span) if span.line == line => span.count += 1, + _ => self.lines.push(Span { line, count: 1 }), + } + } + + pub fn get_line(&self, offset: usize) -> usize { + let mut pos = 0; + for span in &self.lines { + pos += span.count; + if pos > offset { + return span.line; + } + } + + panic!("invalid chunk state: line missing for offset {}", offset); + } +} + +// Disassembler + +/// Print a single disassembled instruction at the specified offset. +/// Some instructions are printed "raw", others have special handling. +#[cfg(feature = "disassemble")] +pub fn disassemble_instruction(chunk: &Chunk, offset: usize) { + print!("{:04} ", offset); + + let line = chunk.get_line(offset); + if offset > 0 && line == chunk.get_line(offset - 1) { + print!(" | "); + } else { + print!("{:4} ", line); + } + + match chunk.code.index(offset) { + OpCode::OpConstant(idx) => { + println!("OpConstant({:?}) '{:?}'", idx, chunk.constant(*idx)) + } + op => println!("{:?}", op), + } +} + +#[cfg(feature = "disassemble")] +pub fn disassemble_chunk(chunk: &Chunk) { + for (idx, _) in chunk.code.iter().enumerate() { + disassemble_instruction(chunk, idx); + } +} diff --git a/users/tazjin/rlox/src/bytecode/compiler.rs b/users/tazjin/rlox/src/bytecode/compiler.rs new file mode 100644 index 000000000000..89584f19d720 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/compiler.rs @@ -0,0 +1,702 @@ +use super::chunk::Chunk; +use super::errors::{Error, ErrorKind, LoxResult}; +use super::interner::{InternedStr, Interner}; +use super::opcode::{CodeIdx, CodeOffset, ConstantIdx, OpCode, StackIdx}; +use super::value::Value; +use crate::scanner::{self, Token, TokenKind}; + +#[cfg(feature = "disassemble")] +use super::chunk; + +#[derive(Debug)] +enum Depth { + Unitialised, + At(usize), +} + +impl Depth { + fn above(&self, theirs: usize) -> bool { + match self { + Depth::Unitialised => false, + Depth::At(ours) => *ours > theirs, + } + } + + fn below(&self, theirs: usize) -> bool { + match self { + Depth::Unitialised => false, + Depth::At(ours) => *ours < theirs, + } + } +} + +#[derive(Debug)] +struct Local { + name: Token, + depth: Depth, +} + +#[derive(Debug, Default)] +struct Locals { + locals: Vec<Local>, + scope_depth: usize, +} + +struct Compiler<T: Iterator<Item = Token>> { + tokens: T, + chunk: Chunk, + panic: bool, + errors: Vec<Error>, + strings: Interner, + locals: Locals, + + current: Option<Token>, + previous: Option<Token>, +} + +#[derive(Debug, PartialEq, PartialOrd)] +enum Precedence { + None, + Assignment, // = + Or, // or + And, // and + Equality, // == != + Comparison, // < > <= >= + Term, // + - + Factor, // + // + // * / + Unary, // ! - + Call, // . () + Primary, +} + +type ParseFn<T> = fn(&mut Compiler<T>) -> LoxResult<()>; + +struct ParseRule<T: Iterator<Item = Token>> { + prefix: Option<ParseFn<T>>, + infix: Option<ParseFn<T>>, + precedence: Precedence, +} + +impl<T: Iterator<Item = Token>> ParseRule<T> { + fn new(prefix: Option<ParseFn<T>>, infix: Option<ParseFn<T>>, precedence: Precedence) -> Self { + ParseRule { + prefix, + infix, + precedence, + } + } +} + +impl Precedence { + // Return the next highest precedence, if there is one. + fn next(&self) -> Self { + match self { + Precedence::None => Precedence::Assignment, + Precedence::Assignment => Precedence::Or, + Precedence::Or => Precedence::And, + Precedence::And => Precedence::Equality, + Precedence::Equality => Precedence::Comparison, + Precedence::Comparison => Precedence::Term, + Precedence::Term => Precedence::Factor, + Precedence::Factor => Precedence::Unary, + Precedence::Unary => Precedence::Call, + Precedence::Call => Precedence::Primary, + Precedence::Primary => { + panic!("invalid parser state: no higher precedence than Primary") + } + } + } +} + +fn rule_for<T: Iterator<Item = Token>>(token: &TokenKind) -> ParseRule<T> { + match token { + TokenKind::LeftParen => ParseRule::new(Some(Compiler::grouping), None, Precedence::None), + + TokenKind::Minus => ParseRule::new( + Some(Compiler::unary), + Some(Compiler::binary), + Precedence::Term, + ), + + TokenKind::Plus => ParseRule::new(None, Some(Compiler::binary), Precedence::Term), + + TokenKind::Slash => ParseRule::new(None, Some(Compiler::binary), Precedence::Factor), + + TokenKind::Star => ParseRule::new(None, Some(Compiler::binary), Precedence::Factor), + + TokenKind::Number(_) => ParseRule::new(Some(Compiler::number), None, Precedence::None), + + TokenKind::True => ParseRule::new(Some(Compiler::literal), None, Precedence::None), + + TokenKind::False => ParseRule::new(Some(Compiler::literal), None, Precedence::None), + + TokenKind::Nil => ParseRule::new(Some(Compiler::literal), None, Precedence::None), + + TokenKind::Bang => ParseRule::new(Some(Compiler::unary), None, Precedence::None), + + TokenKind::BangEqual => ParseRule::new(None, Some(Compiler::binary), Precedence::Equality), + + TokenKind::EqualEqual => ParseRule::new(None, Some(Compiler::binary), Precedence::Equality), + + TokenKind::Greater => ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison), + + TokenKind::GreaterEqual => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison) + } + + TokenKind::Less => ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison), + + TokenKind::LessEqual => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison) + } + + TokenKind::Identifier(_) => { + ParseRule::new(Some(Compiler::variable), None, Precedence::None) + } + + TokenKind::String(_) => ParseRule::new(Some(Compiler::string), None, Precedence::None), + + _ => ParseRule::new(None, None, Precedence::None), + } +} + +macro_rules! consume { + ( $self:ident, $expected:pat, $err:expr ) => { + match $self.current().kind { + $expected => $self.advance(), + _ => $self.error_at($self.current().line, $err), + } + }; +} + +impl<T: Iterator<Item = Token>> Compiler<T> { + fn compile(&mut self) -> LoxResult<()> { + self.advance(); + + while !self.match_token(&TokenKind::Eof) { + self.declaration()?; + } + + self.end_compiler() + } + + fn advance(&mut self) { + self.previous = self.current.take(); + self.current = self.tokens.next(); + } + + fn expression(&mut self) -> LoxResult<()> { + self.parse_precedence(Precedence::Assignment) + } + + fn var_declaration(&mut self) -> LoxResult<()> { + let idx = self.parse_variable()?; + + if self.match_token(&TokenKind::Equal) { + self.expression()?; + } else { + self.emit_op(OpCode::OpNil); + } + + self.expect_semicolon("expect ';' after variable declaration")?; + self.define_variable(idx) + } + + fn define_variable(&mut self, var: Option<ConstantIdx>) -> LoxResult<()> { + if self.locals.scope_depth == 0 { + self.emit_op(OpCode::OpDefineGlobal(var.expect("should be global"))); + } else { + self.locals + .locals + .last_mut() + .expect("fatal: variable not yet added at definition") + .depth = Depth::At(self.locals.scope_depth); + } + + Ok(()) + } + + fn declaration(&mut self) -> LoxResult<()> { + if self.match_token(&TokenKind::Var) { + self.var_declaration()?; + } else { + self.statement()?; + } + + if self.panic { + self.synchronise(); + } + + Ok(()) + } + + fn statement(&mut self) -> LoxResult<()> { + if self.match_token(&TokenKind::Print) { + self.print_statement() + } else if self.match_token(&TokenKind::If) { + self.if_statement() + } else if self.match_token(&TokenKind::LeftBrace) { + self.begin_scope(); + self.block()?; + self.end_scope(); + Ok(()) + } else { + self.expression_statement() + } + } + + fn print_statement(&mut self) -> LoxResult<()> { + self.expression()?; + self.expect_semicolon("expect ';' after print statement")?; + self.emit_op(OpCode::OpPrint); + Ok(()) + } + + fn begin_scope(&mut self) { + self.locals.scope_depth += 1; + } + + fn end_scope(&mut self) { + debug_assert!(self.locals.scope_depth > 0, "tried to end global scope"); + self.locals.scope_depth -= 1; + + while self.locals.locals.len() > 0 + && self.locals.locals[self.locals.locals.len() - 1] + .depth + .above(self.locals.scope_depth) + { + self.emit_op(OpCode::OpPop); + self.locals.locals.remove(self.locals.locals.len() - 1); + } + } + + fn block(&mut self) -> LoxResult<()> { + while !self.check(&TokenKind::RightBrace) && !self.check(&TokenKind::Eof) { + self.declaration()?; + } + + consume!( + self, + TokenKind::RightBrace, + ErrorKind::ExpectedToken("Expected '}' after block.") + ); + Ok(()) + } + + fn expression_statement(&mut self) -> LoxResult<()> { + self.expression()?; + self.expect_semicolon("expect ';' after expression")?; + // TODO(tazjin): Why did I add this originally? + // self.emit_op(OpCode::OpPop); + Ok(()) + } + + fn if_statement(&mut self) -> LoxResult<()> { + consume!( + self, + TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'if'") + ); + + self.expression()?; + + consume!( + self, + TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after condition") + ); + + let then_jump = self.emit_op(OpCode::OpJumpPlaceholder(false)); + self.emit_op(OpCode::OpPop); + self.statement()?; + let else_jump = self.emit_op(OpCode::OpJumpPlaceholder(true)); + self.patch_jump(then_jump); + self.emit_op(OpCode::OpPop); + + if self.match_token(&TokenKind::Else) { + self.statement()?; + } + + self.patch_jump(else_jump); + + Ok(()) + } + + fn number(&mut self) -> LoxResult<()> { + if let TokenKind::Number(num) = self.previous().kind { + self.emit_constant(Value::Number(num), true); + return Ok(()); + } + + unreachable!("internal parser error: entered number() incorrectly") + } + + fn grouping(&mut self) -> LoxResult<()> { + self.expression()?; + consume!( + self, + TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after expression") + ); + Ok(()) + } + + fn unary(&mut self) -> LoxResult<()> { + // TODO(tazjin): Avoid clone + let kind = self.previous().kind.clone(); + + // Compile the operand + self.parse_precedence(Precedence::Unary)?; + + // Emit operator instruction + match kind { + TokenKind::Bang => self.emit_op(OpCode::OpNot), + TokenKind::Minus => self.emit_op(OpCode::OpNegate), + _ => unreachable!("only called for unary operator tokens"), + }; + + Ok(()) + } + + fn binary(&mut self) -> LoxResult<()> { + // Remember the operator + let operator = self.previous().kind.clone(); + + // Compile the right operand + let rule: ParseRule<T> = rule_for(&operator); + self.parse_precedence(rule.precedence.next())?; + + // Emit operator instruction + match operator { + TokenKind::Minus => self.emit_op(OpCode::OpSubtract), + TokenKind::Plus => self.emit_op(OpCode::OpAdd), + TokenKind::Star => self.emit_op(OpCode::OpMultiply), + TokenKind::Slash => self.emit_op(OpCode::OpDivide), + + TokenKind::BangEqual => { + self.emit_op(OpCode::OpEqual); + self.emit_op(OpCode::OpNot) + } + + TokenKind::EqualEqual => self.emit_op(OpCode::OpEqual), + TokenKind::Greater => self.emit_op(OpCode::OpGreater), + + TokenKind::GreaterEqual => { + self.emit_op(OpCode::OpLess); + self.emit_op(OpCode::OpNot) + } + + TokenKind::Less => self.emit_op(OpCode::OpLess), + TokenKind::LessEqual => { + self.emit_op(OpCode::OpGreater); + self.emit_op(OpCode::OpNot) + } + + _ => unreachable!("only called for binary operator tokens"), + }; + + Ok(()) + } + + fn literal(&mut self) -> LoxResult<()> { + match self.previous().kind { + TokenKind::Nil => self.emit_op(OpCode::OpNil), + TokenKind::True => self.emit_op(OpCode::OpTrue), + TokenKind::False => self.emit_op(OpCode::OpFalse), + _ => unreachable!("only called for literal value tokens"), + }; + + Ok(()) + } + + fn string(&mut self) -> LoxResult<()> { + let val = match &self.previous().kind { + TokenKind::String(s) => s.clone(), + _ => unreachable!("only called for strings"), + }; + + let id = self.strings.intern(val); + self.emit_constant(Value::String(id.into()), true); + + Ok(()) + } + + fn named_variable(&mut self, name: Token) -> LoxResult<()> { + let local_idx = self.resolve_local(&name); + + let ident = if local_idx.is_some() { + None + } else { + Some(self.identifier_constant(&name)?) + }; + + if self.match_token(&TokenKind::Equal) { + self.expression()?; + match local_idx { + Some(idx) => self.emit_op(OpCode::OpSetLocal(idx)), + None => self.emit_op(OpCode::OpSetGlobal(ident.unwrap())), + }; + } else { + match local_idx { + Some(idx) => self.emit_op(OpCode::OpGetLocal(idx)), + None => self.emit_op(OpCode::OpGetGlobal(ident.unwrap())), + }; + } + + Ok(()) + } + + fn variable(&mut self) -> LoxResult<()> { + let name = self.previous().clone(); + self.named_variable(name) + } + + fn parse_precedence(&mut self, precedence: Precedence) -> LoxResult<()> { + self.advance(); + let rule: ParseRule<T> = rule_for(&self.previous().kind); + let prefix_fn = match rule.prefix { + None => unimplemented!("expected expression or something, unclear"), + Some(func) => func, + }; + + prefix_fn(self)?; + + while precedence <= rule_for::<T>(&self.current().kind).precedence { + self.advance(); + match rule_for::<T>(&self.previous().kind).infix { + Some(func) => { + func(self)?; + } + None => { + unreachable!("invalid compiler state: error in parse rules") + } + } + } + + Ok(()) + } + + fn identifier_str(&mut self, token: &Token) -> LoxResult<InternedStr> { + let ident = match &token.kind { + TokenKind::Identifier(ident) => ident.to_string(), + _ => { + return Err(Error { + line: self.current().line, + kind: ErrorKind::ExpectedToken("Expected identifier"), + }) + } + }; + + Ok(self.strings.intern(ident)) + } + + fn identifier_constant(&mut self, name: &Token) -> LoxResult<ConstantIdx> { + let ident = self.identifier_str(name)?; + Ok(self.emit_constant(Value::String(ident.into()), false)) + } + + fn resolve_local(&self, name: &Token) -> Option<StackIdx> { + for (idx, local) in self.locals.locals.iter().enumerate().rev() { + if name.lexeme == local.name.lexeme { + if let Depth::Unitialised = local.depth { + // TODO(tazjin): *return* err + panic!("can't read variable in its own initialiser"); + } + return Some(StackIdx(idx)); + } + } + + None + } + + fn add_local(&mut self, name: Token) { + let local = Local { + name, + depth: Depth::Unitialised, + }; + + self.locals.locals.push(local); + } + + fn declare_variable(&mut self) -> LoxResult<()> { + if self.locals.scope_depth == 0 { + return Ok(()); + } + + let name = self.previous().clone(); + + for local in self.locals.locals.iter().rev() { + if local.depth.below(self.locals.scope_depth) { + break; + } + + if name.lexeme == local.name.lexeme { + return Err(Error { + kind: ErrorKind::VariableShadowed(name.lexeme.into()), + line: name.line, + }); + } + } + + self.add_local(name); + Ok(()) + } + + fn parse_variable(&mut self) -> LoxResult<Option<ConstantIdx>> { + consume!( + self, + TokenKind::Identifier(_), + ErrorKind::ExpectedToken("expected identifier") + ); + + self.declare_variable()?; + if self.locals.scope_depth > 0 { + return Ok(None); + } + + let name = self.previous().clone(); + let id = self.identifier_str(&name)?; + Ok(Some(self.emit_constant(Value::String(id.into()), false))) + } + + fn current_chunk(&mut self) -> &mut Chunk { + &mut self.chunk + } + + fn end_compiler(&mut self) -> LoxResult<()> { + self.emit_op(OpCode::OpReturn); + + #[cfg(feature = "disassemble")] + { + chunk::disassemble_chunk(&self.chunk); + println!("== compilation finished =="); + } + + Ok(()) + } + + fn emit_op(&mut self, op: OpCode) -> CodeIdx { + let line = self.previous().line; + self.current_chunk().add_op(op, line) + } + + fn emit_constant(&mut self, val: Value, with_op: bool) -> ConstantIdx { + let idx = ConstantIdx(self.chunk.add_constant(val)); + + if with_op { + self.emit_op(OpCode::OpConstant(idx)); + } + + idx + } + + fn patch_jump(&mut self, idx: CodeIdx) { + let offset = CodeOffset(self.chunk.code.len() - idx.0 - 1); + + if let OpCode::OpJumpPlaceholder(true) = self.chunk.code[idx.0] { + self.chunk.code[idx.0] = OpCode::OpJump(offset); + return; + } + + if let OpCode::OpJumpPlaceholder(false) = self.chunk.code[idx.0] { + self.chunk.code[idx.0] = OpCode::OpJumpIfFalse(offset); + return; + } + + panic!( + "attempted to patch unsupported op: {:?}", + self.chunk.code[idx.0] + ); + } + + fn previous(&self) -> &Token { + self.previous + .as_ref() + .expect("invalid internal compiler state: missing previous token") + } + + fn current(&self) -> &Token { + self.current + .as_ref() + .expect("invalid internal compiler state: missing current token") + } + + fn error_at(&mut self, line: usize, kind: ErrorKind) { + if self.panic { + return; + } + + self.panic = true; + self.errors.push(Error { kind, line }) + } + + fn match_token(&mut self, token: &TokenKind) -> bool { + if !self.check(token) { + return false; + } + + self.advance(); + true + } + + fn check(&self, token: &TokenKind) -> bool { + return self.current().kind == *token; + } + + fn synchronise(&mut self) { + self.panic = false; + + while self.current().kind != TokenKind::Eof { + if self.previous().kind == TokenKind::Semicolon { + return; + } + + match self.current().kind { + TokenKind::Class + | TokenKind::Fun + | TokenKind::Var + | TokenKind::For + | TokenKind::If + | TokenKind::While + | TokenKind::Print + | TokenKind::Return => return, + + _ => { + self.advance(); + } + } + } + } + + fn expect_semicolon(&mut self, msg: &'static str) -> LoxResult<()> { + consume!(self, TokenKind::Semicolon, ErrorKind::ExpectedToken(msg)); + Ok(()) + } +} + +pub fn compile(code: &str) -> Result<(Interner, Chunk), Vec<Error>> { + let chars = code.chars().collect::<Vec<char>>(); + let tokens = scanner::scan(&chars) + .map_err(|errors| errors.into_iter().map(Into::into).collect::<Vec<Error>>())?; + + let mut compiler = Compiler { + tokens: tokens.into_iter().peekable(), + chunk: Default::default(), + panic: false, + errors: vec![], + strings: Interner::with_capacity(1024), + locals: Default::default(), + current: None, + previous: None, + }; + + compiler.compile()?; + + if compiler.errors.is_empty() { + Ok((compiler.strings, compiler.chunk)) + } else { + Err(compiler.errors) + } +} diff --git a/users/tazjin/rlox/src/bytecode/errors.rs b/users/tazjin/rlox/src/bytecode/errors.rs new file mode 100644 index 000000000000..988031f763cf --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/errors.rs @@ -0,0 +1,51 @@ +use crate::scanner::ScannerError; + +use std::fmt; + +#[derive(Debug)] +pub enum ErrorKind { + UnexpectedChar(char), + UnterminatedString, + ExpectedToken(&'static str), + InternalError(&'static str), + TypeError(String), + VariableShadowed(String), +} + +#[derive(Debug)] +pub struct Error { + pub kind: ErrorKind, + pub line: usize, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[line NYI] Error: {:?}", self.kind) + } +} + +impl From<ScannerError> for Error { + fn from(err: ScannerError) -> Self { + match err { + ScannerError::UnexpectedChar { line, unexpected } => Error { + line, + kind: ErrorKind::UnexpectedChar(unexpected), + }, + + ScannerError::UnterminatedString { line } => Error { + line, + kind: ErrorKind::UnterminatedString, + }, + } + } +} + +// Convenience implementation as we're often dealing with vectors of +// errors (to report as many issues as possible before terminating) +impl From<Error> for Vec<Error> { + fn from(err: Error) -> Self { + vec![err] + } +} + +pub type LoxResult<T> = Result<T, Error>; diff --git a/users/tazjin/rlox/src/bytecode/interner/mod.rs b/users/tazjin/rlox/src/bytecode/interner/mod.rs new file mode 100644 index 000000000000..1da1a24b2c5f --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/interner/mod.rs @@ -0,0 +1,87 @@ +//! String-interning implementation for values that are likely to +//! benefit from fast comparisons and deduplication (e.g. instances of +//! variable names). +//! +//! This uses a trick from the typed-arena crate for guaranteeing +//! stable addresses by never resizing the existing String buffer, and +//! collecting full buffers in a vector. + +use std::collections::HashMap; + +#[cfg(test)] +mod tests; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct InternedStr { + id: usize, +} + +#[derive(Default)] +pub struct Interner { + map: HashMap<&'static str, InternedStr>, + vec: Vec<&'static str>, + buf: String, + full: Vec<String>, +} + +impl Interner { + pub fn with_capacity(cap: usize) -> Self { + Interner { + buf: String::with_capacity(cap), + ..Default::default() + } + } + + pub fn intern<S: AsRef<str>>(&mut self, name: S) -> InternedStr { + let name = name.as_ref(); + if let Some(&id) = self.map.get(name) { + return id; + } + + let name = self.alloc(name); + let id = InternedStr { + id: self.vec.len() as usize, + }; + + self.map.insert(name, id); + self.vec.push(name); + + debug_assert!(self.lookup(id) == name); + debug_assert!(self.intern(name) == id); + + id + } + + pub fn lookup<'a>(&'a self, id: InternedStr) -> &'a str { + self.vec[id.id] + } + + fn alloc<'a>(&'a mut self, name: &str) -> &'static str { + let cap = self.buf.capacity(); + if cap < self.buf.len() + name.len() { + let new_cap = (cap.max(name.len()) + 1).next_power_of_two(); + let new_buf = String::with_capacity(new_cap); + let old_buf = std::mem::replace(&mut self.buf, new_buf); + self.full.push(old_buf); + } + + let interned: &'a str = { + let start = self.buf.len(); + self.buf.push_str(name); + &self.buf[start..] + }; + + unsafe { + // This is sound for two reasons: + // + // 1. This function (Interner::alloc) is private, which + // prevents users from allocating a supposedly static + // reference. + // + // 2. Interner::lookup explicitly shortens the lifetime of + // references that are handed out to that of the + // reference to self. + return &*(interned as *const str); + } + } +} diff --git a/users/tazjin/rlox/src/bytecode/interner/tests.rs b/users/tazjin/rlox/src/bytecode/interner/tests.rs new file mode 100644 index 000000000000..b34bf6835389 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/interner/tests.rs @@ -0,0 +1,24 @@ +use super::*; + +#[test] +fn interns_strings() { + let mut interner = Interner::with_capacity(128); + let id = interner.intern("hello world"); + assert_eq!("hello world", interner.lookup(id)); +} + +#[test] +fn deduplicates_strings() { + let mut interner = Interner::with_capacity(128); + let id_1 = interner.intern("hello world"); + let id_2 = interner.intern("hello world"); + assert_eq!(id_1, id_2); +} + +#[test] +fn ids_survive_growing() { + let mut interner = Interner::with_capacity(16); + let id = interner.intern("hello"); + interner.intern("excessively large string that will cause eallocation"); + assert_eq!("hello", interner.lookup(id)); +} diff --git a/users/tazjin/rlox/src/bytecode/mod.rs b/users/tazjin/rlox/src/bytecode/mod.rs new file mode 100644 index 000000000000..117f17824ac6 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/mod.rs @@ -0,0 +1,30 @@ +//! Bytecode interpreter for Lox. +//! +//! https://craftinginterpreters.com/chunks-of-bytecode.html + +mod chunk; +mod compiler; +mod errors; +mod interner; +mod opcode; +mod value; +mod vm; + +#[cfg(test)] +mod tests; + +pub struct Interpreter {} + +impl crate::Lox for Interpreter { + type Error = errors::Error; + type Value = value::Value; + + fn create() -> Self { + Interpreter {} + } + + fn interpret(&mut self, code: String) -> Result<Self::Value, Vec<Self::Error>> { + let (strings, chunk) = compiler::compile(&code)?; + vm::interpret(strings, chunk).map_err(|e| vec![e]) + } +} diff --git a/users/tazjin/rlox/src/bytecode/opcode.rs b/users/tazjin/rlox/src/bytecode/opcode.rs new file mode 100644 index 000000000000..8a106f96917d --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/opcode.rs @@ -0,0 +1,56 @@ +#[derive(Clone, Copy, Debug)] +pub struct ConstantIdx(pub usize); + +#[derive(Clone, Copy, Debug)] +pub struct StackIdx(pub usize); + +#[derive(Clone, Copy, Debug)] +pub struct CodeIdx(pub usize); + +#[derive(Clone, Copy, Debug)] +pub struct CodeOffset(pub usize); + +#[derive(Debug)] +pub enum OpCode { + /// Push a constant onto the stack. + OpConstant(ConstantIdx), + + // Literal pushes + OpNil, + OpTrue, + OpFalse, + + /// Return from the current function. + OpReturn, + + // Boolean & comparison operators + OpNot, + OpEqual, + OpGreater, + OpLess, + + /// Unary negation + OpNegate, + + // Arithmetic operators + OpAdd, + OpSubtract, + OpMultiply, + OpDivide, + + // Built in operations + OpPrint, + OpPop, + + // Variable management + OpDefineGlobal(ConstantIdx), + OpGetGlobal(ConstantIdx), + OpSetGlobal(ConstantIdx), + OpGetLocal(StackIdx), + OpSetLocal(StackIdx), + + // Control flow + OpJumpPlaceholder(bool), + OpJump(CodeOffset), + OpJumpIfFalse(CodeOffset), +} diff --git a/users/tazjin/rlox/src/bytecode/tests.rs b/users/tazjin/rlox/src/bytecode/tests.rs new file mode 100644 index 000000000000..bc7d6cb878f8 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/tests.rs @@ -0,0 +1,152 @@ +use super::value::Value; +use super::*; + +use crate::Lox; + +fn expect(code: &str, value: Value) { + let result = Interpreter::create() + .interpret(code.into()) + .expect("evaluation failed"); + assert_eq!(result, value); +} + +fn expect_num(code: &str, value: f64) { + expect(code, Value::Number(value)) +} + +fn expect_bool(code: &str, value: bool) { + expect(code, Value::Bool(value)) +} + +fn expect_str(code: &str, value: &str) { + expect(code, Value::String(value.to_string().into())) +} + +#[test] +fn numbers() { + expect_num("1;", 1.0); + expect_num("13.37;", 13.37); +} + +#[test] +fn negative_numbers() { + // Note: This technically tests unary operators. + expect_num("-1;", -1.0); + expect_num("-13.37;", -13.37); +} + +#[test] +fn terms() { + expect_num("1 + 2;", 3.0); + expect_num("3 - 1;", 2.0); + expect_num("0.7 + 0.3;", 1.0); + expect_num("1 + -3;", -2.0); + expect_num("-1 - -1;", 0.0); + expect_num("10 - -10 + 10;", 30.0); +} + +#[test] +fn factors() { + expect_num("1 * 2;", 2.0); + expect_num("10 / 5;", 2.0); + expect_num("0.7 * 4 / 1.4;", 2.0); + expect_num("10 * -10 / 10;", -10.0); +} + +#[test] +fn arithmetic() { + expect_num("10 - 3 * 2;", 4.0); + expect_num("-4 * -4 + (14 - 5);", 25.0); + expect_num("(702 + 408) - ((239 - 734) / -5) + -4;", 1007.0); +} + +#[test] +fn trivial_literals() { + expect("true;", Value::Bool(true)); + expect("false;", Value::Bool(false)); + expect("nil;", Value::Nil); +} + +#[test] +fn negation() { + expect_bool("!true;", false); + expect_bool("!false;", true); + expect_bool("!nil;", true); + expect_bool("!13.5;", false); + expect_bool("!-42;", false); +} + +#[test] +fn equality() { + expect_bool("42 == 42;", true); + expect_bool("42 != 42;", false); + expect_bool("42 == 42.0;", true); + + expect_bool("true == true;", true); + expect_bool("true == false;", false); + expect_bool("true == !false;", true); + expect_bool("true != true;", false); + expect_bool("true != false;", true); + + expect_bool("42 == false;", false); + expect_bool("42 == true;", false); + expect_bool("!42 == !true;", true); +} + +#[test] +fn comparisons() { + expect_bool("42 > 23;", true); + expect_bool("42 < 23;", false); + expect_bool("42 <= 42;", true); + expect_bool("42 <= 23;", false); + expect_bool("42 >= 42;", true); + expect_bool("42 >= 23;", true); +} + +#[test] +fn strings() { + expect_str("\"hello\";", "hello"); + expect_str("\"hello\" + \" world\";", "hello world"); +} + +#[test] +fn global_variables() { + expect_num("var a = 5; a;", 5.0); + expect_num("var a = 5; var b = 2; a * b;", 10.0); + expect_str( + "var greeting = \"hello\"; var name = \"Zubnog\"; greeting + \" \" + name;", + "hello Zubnog", + ); +} + +#[test] +fn global_assignment() { + expect_str( + r#" + var breakfast = "beignets"; + var beverage = "cafe au lait"; + breakfast = "beignets with " + beverage; + breakfast; + "#, + "beignets with cafe au lait", + ); +} + +#[test] +fn local_variables() { + expect_num( + r#" + var a = 10; + var b = 5; + var result = 0; + { + var b = 10; + var c = 2; + result = a * b * c; + } + + result; + "#, + 200.0, + ); +} diff --git a/users/tazjin/rlox/src/bytecode/value.rs b/users/tazjin/rlox/src/bytecode/value.rs new file mode 100644 index 000000000000..4170efadf8fe --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/value.rs @@ -0,0 +1,37 @@ +use super::interner::InternedStr; + +#[derive(Clone, Debug, PartialEq)] +pub enum Value { + Nil, + Bool(bool), + Number(f64), + String(LoxString), +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum LoxString { + Heap(String), + Interned(InternedStr), +} + +impl From<String> for LoxString { + fn from(s: String) -> Self { + LoxString::Heap(s) + } +} + +impl From<InternedStr> for LoxString { + fn from(s: InternedStr) -> Self { + LoxString::Interned(s) + } +} + +impl Value { + pub fn is_falsey(&self) -> bool { + match self { + Value::Nil => true, + Value::Bool(false) => true, + _ => false, + } + } +} diff --git a/users/tazjin/rlox/src/bytecode/vm.rs b/users/tazjin/rlox/src/bytecode/vm.rs new file mode 100644 index 000000000000..30ffebc79cf7 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/vm.rs @@ -0,0 +1,272 @@ +use std::collections::HashMap; + +use super::chunk; +use super::errors::*; +use super::interner::Interner; +use super::opcode::OpCode; +use super::value::{LoxString, Value}; + +pub struct VM { + chunk: chunk::Chunk, + + // TODO(tazjin): Accessing array elements constantly is not ideal, + // lets see if something clever can be done with iterators. + ip: usize, + + stack: Vec<Value>, + strings: Interner, + + globals: HashMap<LoxString, Value>, + + // Operations that consume values from the stack without pushing + // anything leave their last value in this slot, which makes it + // possible to return values from interpreters that ran code which + // ended with a statement. + last_drop: Option<Value>, +} + +impl VM { + fn push(&mut self, value: Value) { + self.stack.push(value) + } + + fn pop(&mut self) -> Value { + self.stack.pop().expect("fatal error: stack empty!") + } +} + +macro_rules! with_type { + ( $self:ident, $val:ident, $type:pat, $body:expr ) => { + match $val { + $type => $body, + _ => { + return Err(Error { + line: $self.chunk.get_line($self.ip - 1), + kind: ErrorKind::TypeError(format!( + "Expected type {}, but found value: {:?}", + stringify!($type), + $val, + )), + }) + } + } + }; +} + +macro_rules! binary_op { + ( $vm:ident, $type:tt, $op:tt ) => { + binary_op!($vm, $type, $type, $op) + }; + + ( $vm:ident, $in_type:tt, $out_type:tt, $op:tt ) => {{ + let b = $vm.pop(); + let a = $vm.pop(); + + with_type!($vm, b, Value::$in_type(val_b), { + with_type!($vm, a, Value::$in_type(val_a), { + $vm.push(Value::$out_type(val_a $op val_b)) + }) + }) + }}; +} + +impl VM { + fn run(&mut self) -> LoxResult<Value> { + loop { + let op = &self.chunk.code[self.ip]; + + #[cfg(feature = "disassemble")] + chunk::disassemble_instruction(&self.chunk, self.ip); + + self.ip += 1; + + match op { + OpCode::OpReturn => { + if !self.stack.is_empty() { + let val = self.pop(); + return Ok(self.return_value(val)); + } else if self.last_drop.is_some() { + let val = self.last_drop.take().unwrap(); + return Ok(self.return_value(val)); + } else { + return Ok(Value::Nil); + } + } + + OpCode::OpConstant(idx) => { + let c = self.chunk.constant(*idx).clone(); + self.push(c); + } + + OpCode::OpNil => self.push(Value::Nil), + OpCode::OpTrue => self.push(Value::Bool(true)), + OpCode::OpFalse => self.push(Value::Bool(false)), + + OpCode::OpNot => { + let v = self.pop(); + self.push(Value::Bool(v.is_falsey())); + } + + OpCode::OpEqual => { + let b = self.pop(); + let a = self.pop(); + self.push(Value::Bool(a == b)); + } + + OpCode::OpLess => binary_op!(self, Number, Bool, <), + OpCode::OpGreater => binary_op!(self, Number, Bool, >), + + OpCode::OpNegate => { + let v = self.pop(); + with_type!(self, v, Value::Number(num), self.push(Value::Number(-num))); + } + + OpCode::OpSubtract => binary_op!(self, Number, -), + OpCode::OpMultiply => binary_op!(self, Number, *), + OpCode::OpDivide => binary_op!(self, Number, /), + + OpCode::OpAdd => { + let b = self.pop(); + let a = self.pop(); + + match (a, b) { + (Value::String(s_a), Value::String(s_b)) => { + let mut new_s = self.resolve_str(&s_a).to_string(); + new_s.push_str(self.resolve_str(&s_b)); + self.push(Value::String(new_s.into())); + } + + (Value::Number(n_a), Value::Number(n_b)) => { + self.push(Value::Number(n_a + n_b)) + } + + _ => { + return Err(Error { + line: self.chunk.get_line(self.ip - 1), + kind: ErrorKind::TypeError( + "'+' operator only works on strings and numbers".into(), + ), + }) + } + } + } + + OpCode::OpPrint => { + let val = self.pop(); + println!("{}", self.print_value(val)); + } + + OpCode::OpPop => { + self.last_drop = Some(self.pop()); + } + + OpCode::OpDefineGlobal(name_idx) => { + let name = self.chunk.constant(*name_idx); + with_type!(self, name, Value::String(name), { + let name = name.clone(); + let val = self.pop(); + self.globals.insert(name, val); + }); + } + + OpCode::OpGetGlobal(name_idx) => { + let name = self.chunk.constant(*name_idx); + with_type!(self, name, Value::String(name), { + let val = match self.globals.get(name) { + None => unimplemented!("variable not found error"), + Some(val) => val.clone(), + }; + self.push(val) + }); + } + + OpCode::OpSetGlobal(name_idx) => { + let name = self.chunk.constant(*name_idx).clone(); + let new_val = self.pop(); + with_type!(self, name, Value::String(name), { + match self.globals.get_mut(&name) { + None => unimplemented!("variable not found error"), + Some(val) => { + *val = new_val; + } + } + }); + } + + OpCode::OpGetLocal(local_idx) => { + let value = self.stack[local_idx.0].clone(); + self.push(value); + } + + OpCode::OpSetLocal(local_idx) => { + debug_assert!( + self.stack.len() > local_idx.0, + "stack is not currently large enough for local" + ); + self.stack[local_idx.0] = self.stack.last().unwrap().clone(); + } + + OpCode::OpJumpPlaceholder(_) => { + panic!("unpatched jump detected - this is a fatal compiler error!"); + } + + OpCode::OpJump(offset) => { + self.ip += offset.0; + } + + OpCode::OpJumpIfFalse(offset) => { + if self + .stack + .last() + .expect("condition should leave a value on the stack") + .is_falsey() + { + self.ip += offset.0; + } + } + } + + #[cfg(feature = "disassemble")] + println!("=> {:?}", self.stack); + } + } + + // For some types of values (e.g. interned strings), returns + // should no longer include any references into the interpreter. + fn return_value(&self, val: Value) -> Value { + match val { + Value::String(string @ LoxString::Interned(_)) => { + Value::String(self.resolve_str(&string).to_string().into()) + } + _ => val, + } + } + + fn resolve_str<'a>(&'a self, string: &'a LoxString) -> &'a str { + match string { + LoxString::Heap(s) => s.as_str(), + LoxString::Interned(id) => self.strings.lookup(*id), + } + } + + fn print_value(&self, val: Value) -> String { + match val { + Value::String(LoxString::Heap(s)) => s, + Value::String(LoxString::Interned(id)) => self.strings.lookup(id).into(), + _ => format!("{:?}", val), + } + } +} + +pub fn interpret(strings: Interner, chunk: chunk::Chunk) -> LoxResult<Value> { + let mut vm = VM { + chunk, + strings, + globals: HashMap::new(), + ip: 0, + stack: vec![], + last_drop: None, + }; + + vm.run() +} diff --git a/users/tazjin/rlox/src/main.rs b/users/tazjin/rlox/src/main.rs new file mode 100644 index 000000000000..ee61ae01a106 --- /dev/null +++ b/users/tazjin/rlox/src/main.rs @@ -0,0 +1,71 @@ +use std::io::Write; +use std::{env, fs, io, process}; + +mod bytecode; +mod scanner; +mod treewalk; + +/// Trait for making the different interpreters callable in the same +/// way. +pub trait Lox { + type Value: std::fmt::Debug; + type Error: std::fmt::Display; + + fn create() -> Self; + fn interpret(&mut self, source: String) -> Result<Self::Value, Vec<Self::Error>>; +} + +fn main() { + let mut args = env::args(); + if args.len() > 2 { + println!("Usage: rlox [script]"); + process::exit(1); + } + + match env::var("LOX_INTERPRETER").as_ref().map(String::as_str) { + Ok("treewalk") => pick::<treewalk::interpreter::Interpreter>(args.nth(1)), + _ => pick::<bytecode::Interpreter>(args.nth(1)), + } +} + +fn pick<I: Lox>(file_arg: Option<String>) { + if let Some(file) = file_arg { + run_file::<I>(&file); + } else { + run_prompt::<I>(); + } +} + +// Run Lox code from a file and print results to stdout +fn run_file<I: Lox>(file: &str) { + let contents = fs::read_to_string(file).expect("failed to read the input file"); + let mut lox = I::create(); + run(&mut lox, contents); +} + +// Evaluate Lox code interactively in a shitty REPL. +fn run_prompt<I: Lox>() { + let mut line = String::new(); + let mut lox = I::create(); + + loop { + print!("> "); + io::stdout().flush().unwrap(); + io::stdin() + .read_line(&mut line) + .expect("failed to read user input"); + run(&mut lox, std::mem::take(&mut line)); + line.clear(); + } +} + +fn run<I: Lox>(lox: &mut I, code: String) { + match lox.interpret(code) { + Ok(result) => println!("=> {:?}", result), + Err(errors) => { + for error in errors { + eprintln!("{}", error); + } + } + } +} diff --git a/users/tazjin/rlox/src/scanner.rs b/users/tazjin/rlox/src/scanner.rs new file mode 100644 index 000000000000..314b56d6d380 --- /dev/null +++ b/users/tazjin/rlox/src/scanner.rs @@ -0,0 +1,284 @@ +#[derive(Clone, Debug, PartialEq)] +pub enum TokenKind { + // Single-character tokens. + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Minus, + Plus, + Semicolon, + Slash, + Star, + + // One or two character tokens. + Bang, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + + // Literals. + Identifier(String), + String(String), + Number(f64), + True, + False, + Nil, + + // Keywords. + And, + Class, + Else, + Fun, + For, + If, + Or, + Print, + Return, + Super, + This, + Var, + While, + + // Special things + Eof, +} + +#[derive(Clone, Debug)] +pub struct Token { + pub kind: TokenKind, + pub lexeme: String, + pub line: usize, +} + +pub enum ScannerError { + UnexpectedChar { line: usize, unexpected: char }, + UnterminatedString { line: usize }, +} + +struct Scanner<'a> { + source: &'a [char], + tokens: Vec<Token>, + errors: Vec<ScannerError>, + start: usize, // offset of first character in current lexeme + current: usize, // current offset into source + line: usize, // current line in source +} + +impl<'a> Scanner<'a> { + fn is_at_end(&self) -> bool { + return self.current >= self.source.len(); + } + + fn advance(&mut self) -> char { + self.current += 1; + self.source[self.current - 1] + } + + fn add_token(&mut self, kind: TokenKind) { + let lexeme = &self.source[self.start..self.current]; + self.tokens.push(Token { + kind, + lexeme: lexeme.into_iter().collect(), + line: self.line, + }) + } + + fn scan_token(&mut self) { + match self.advance() { + // simple single-character tokens + '(' => self.add_token(TokenKind::LeftParen), + ')' => self.add_token(TokenKind::RightParen), + '{' => self.add_token(TokenKind::LeftBrace), + '}' => self.add_token(TokenKind::RightBrace), + ',' => self.add_token(TokenKind::Comma), + '.' => self.add_token(TokenKind::Dot), + '-' => self.add_token(TokenKind::Minus), + '+' => self.add_token(TokenKind::Plus), + ';' => self.add_token(TokenKind::Semicolon), + '*' => self.add_token(TokenKind::Star), + + // possible multi-character tokens + '!' => self.add_if_next('=', TokenKind::BangEqual, TokenKind::Bang), + '=' => self.add_if_next('=', TokenKind::EqualEqual, TokenKind::Equal), + '<' => self.add_if_next('=', TokenKind::LessEqual, TokenKind::Less), + '>' => self.add_if_next('=', TokenKind::GreaterEqual, TokenKind::Greater), + + '/' => { + // support comments until EOL by discarding characters + if self.match_next('/') { + while self.peek() != '\n' && !self.is_at_end() { + self.advance(); + } + } else { + self.add_token(TokenKind::Slash); + } + } + + // ignore whitespace + ws if ws.is_whitespace() => { + if ws == '\n' { + self.line += 1 + } + } + + '"' => self.scan_string(), + + digit if digit.is_digit(10) => self.scan_number(), + + chr if chr.is_alphabetic() || chr == '_' => self.scan_identifier(), + + unexpected => self.errors.push(ScannerError::UnexpectedChar { + line: self.line, + unexpected, + }), + }; + } + + fn match_next(&mut self, expected: char) -> bool { + if self.is_at_end() || self.source[self.current] != expected { + false + } else { + self.current += 1; + true + } + } + + fn add_if_next(&mut self, expected: char, then: TokenKind, or: TokenKind) { + if self.match_next(expected) { + self.add_token(then); + } else { + self.add_token(or); + } + } + + fn peek(&self) -> char { + if self.is_at_end() { + return '\0'; + } else { + return self.source[self.current]; + } + } + + fn peek_next(&self) -> char { + if self.current + 1 >= self.source.len() { + return '\0'; + } else { + return self.source[self.current + 1]; + } + } + + fn scan_string(&mut self) { + while self.peek() != '"' && !self.is_at_end() { + if self.peek() == '\n' { + self.line += 1; + } + + self.advance(); + } + + if self.is_at_end() { + self.errors + .push(ScannerError::UnterminatedString { line: self.line }); + return; + } + + // closing '"' + self.advance(); + + // add token without surrounding quotes + let string: String = self.source[(self.start + 1)..(self.current - 1)] + .iter() + .collect(); + self.add_token(TokenKind::String(string)); + } + + fn scan_number(&mut self) { + while self.peek().is_digit(10) { + self.advance(); + } + + // Look for a fractional part + if self.peek() == '.' && self.peek_next().is_digit(10) { + // consume '.' + self.advance(); + + while self.peek().is_digit(10) { + self.advance(); + } + } + + let num: f64 = self.source[self.start..self.current] + .iter() + .collect::<String>() + .parse() + .expect("float parsing should always work"); + + self.add_token(TokenKind::Number(num)); + } + + fn scan_identifier(&mut self) { + while self.peek().is_alphanumeric() || self.peek() == '_' { + self.advance(); + } + + let ident: String = self.source[self.start..self.current].iter().collect(); + + // Determine whether this is an identifier, or a keyword: + let token_kind = match ident.as_str() { + "and" => TokenKind::And, + "class" => TokenKind::Class, + "else" => TokenKind::Else, + "false" => TokenKind::False, + "for" => TokenKind::For, + "fun" => TokenKind::Fun, + "if" => TokenKind::If, + "nil" => TokenKind::Nil, + "or" => TokenKind::Or, + "print" => TokenKind::Print, + "return" => TokenKind::Return, + "super" => TokenKind::Super, + "this" => TokenKind::This, + "true" => TokenKind::True, + "var" => TokenKind::Var, + "while" => TokenKind::While, + _ => TokenKind::Identifier(ident), + }; + + self.add_token(token_kind); + } + + fn scan_tokens(&mut self) { + while !self.is_at_end() { + self.start = self.current; + self.scan_token(); + } + + self.add_token(TokenKind::Eof); + } +} + +pub fn scan<'a>(input: &'a [char]) -> Result<Vec<Token>, Vec<ScannerError>> { + let mut scanner = Scanner { + source: &input, + tokens: vec![], + errors: vec![], + start: 0, + current: 0, + line: 0, + }; + + scanner.scan_tokens(); + + if !scanner.errors.is_empty() { + return Err(scanner.errors); + } + + return Ok(scanner.tokens); +} diff --git a/users/tazjin/rlox/src/treewalk/errors.rs b/users/tazjin/rlox/src/treewalk/errors.rs new file mode 100644 index 000000000000..391663d51b18 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/errors.rs @@ -0,0 +1,59 @@ +use crate::scanner::ScannerError; +use crate::treewalk::interpreter::Value; + +use std::fmt; + +#[derive(Debug)] +pub enum ErrorKind { + UnexpectedChar(char), + UnterminatedString, + UnmatchedParens, + ExpectedExpression(String), + ExpectedSemicolon, + ExpectedClosingBrace, + ExpectedToken(&'static str), + TypeError(String), + UndefinedVariable(String), + InternalError(String), + InvalidAssignmentTarget(String), + RuntimeError(String), + StaticError(String), + + // This variant is not an error, rather it is used for + // short-circuiting out of a function body that hits a `return` + // statement. + // + // It's implemented this way because in the original book the + // author uses exceptions for control flow, and this is the + // closest equivalent that I had available without diverging too + // much. + FunctionReturn(Value), +} + +#[derive(Debug)] +pub struct Error { + pub line: usize, + pub kind: ErrorKind, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[line {}] Error: {:?}", self.line, self.kind) + } +} + +impl From<ScannerError> for Error { + fn from(err: ScannerError) -> Self { + match err { + ScannerError::UnexpectedChar { line, unexpected } => Error { + line, + kind: ErrorKind::UnexpectedChar(unexpected), + }, + + ScannerError::UnterminatedString { line } => Error { + line, + kind: ErrorKind::UnterminatedString, + }, + } + } +} diff --git a/users/tazjin/rlox/src/treewalk/interpreter.rs b/users/tazjin/rlox/src/treewalk/interpreter.rs new file mode 100644 index 000000000000..3285775bbec6 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/interpreter.rs @@ -0,0 +1,498 @@ +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::parser::{self, Block, Expr, Literal, Statement}; +use crate::treewalk::resolver; +use crate::treewalk::scanner::{self, TokenKind}; +use crate::Lox; +use std::collections::HashMap; +use std::rc::Rc; +use std::sync::RwLock; + +// Implementation of built-in functions. +mod builtins; + +#[cfg(test)] +mod tests; + +// Tree-walk interpreter + +// Representation of all callables, including builtins & user-defined +// functions. +#[derive(Clone, Debug)] +pub enum Callable { + Builtin(&'static dyn builtins::Builtin), + Function { + func: Rc<parser::Function>, + closure: Rc<RwLock<Environment>>, + }, +} + +impl Callable { + fn arity(&self) -> usize { + match self { + Callable::Builtin(builtin) => builtin.arity(), + Callable::Function { func, .. } => func.params.len(), + } + } + + fn call(&self, lox: &mut Interpreter, args: Vec<Value>) -> Result<Value, Error> { + match self { + Callable::Builtin(builtin) => builtin.call(args), + + Callable::Function { func, closure } => { + let mut fn_env: Environment = Default::default(); + fn_env.enclosing = Some(closure.clone()); + + for (param, value) in func.params.iter().zip(args.into_iter()) { + fn_env.define(param, value)?; + } + + let result = + lox.interpret_block_with_env(Some(Rc::new(RwLock::new(fn_env))), &func.body); + + match result { + // extract returned values if applicable + Err(Error { + kind: ErrorKind::FunctionReturn(value), + .. + }) => Ok(value), + + // otherwise just return the result itself + _ => result, + } + } + } + } +} + +// Representation of an in-language value. +#[derive(Clone, Debug)] +pub enum Value { + Literal(Literal), + Callable(Callable), +} + +impl PartialEq for Value { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Value::Literal(lhs), Value::Literal(rhs)) => lhs == rhs, + // functions do not have equality + _ => false, + } + } +} + +impl From<Literal> for Value { + fn from(lit: Literal) -> Value { + Value::Literal(lit) + } +} + +impl Value { + fn expect_literal(self) -> Result<Literal, Error> { + match self { + Value::Literal(lit) => Ok(lit), + _ => unimplemented!(), // which error? which line? + } + } +} + +#[derive(Debug, Default)] +pub struct Environment { + enclosing: Option<Rc<RwLock<Environment>>>, + values: HashMap<String, Value>, +} + +impl Environment { + fn define(&mut self, name: &scanner::Token, value: Value) -> Result<(), Error> { + let ident = identifier_str(name)?; + self.values.insert(ident.into(), value); + Ok(()) + } + + fn get(&self, ident: &str, line: usize, depth: usize) -> Result<Value, Error> { + if depth > 0 { + match &self.enclosing { + None => { + return Err(Error { + line, + kind: ErrorKind::InternalError(format!( + "invalid depth {} for {}", + depth, ident + )), + }) + } + Some(parent) => { + let env = parent.read().expect("fatal: environment lock poisoned"); + return env.get(ident, line, depth - 1); + } + } + } + + self.values + .get(ident) + .map(Clone::clone) + .ok_or_else(|| Error { + line, + kind: ErrorKind::UndefinedVariable(ident.into()), + }) + } + + fn assign(&mut self, name: &scanner::Token, value: Value) -> Result<(), Error> { + let ident = identifier_str(name)?; + + match self.values.get_mut(ident) { + Some(target) => { + *target = value; + Ok(()) + } + None => { + if let Some(parent) = &self.enclosing { + return parent.write().unwrap().assign(name, value); + } + + Err(Error { + line: name.line, + kind: ErrorKind::UndefinedVariable(ident.into()), + }) + } + } + } +} + +fn identifier_str(name: &scanner::Token) -> Result<&str, Error> { + if let TokenKind::Identifier(ident) = &name.kind { + Ok(ident) + } else { + Err(Error { + line: name.line, + kind: ErrorKind::InternalError("unexpected identifier kind".into()), + }) + } +} + +#[derive(Debug)] +pub struct Interpreter { + env: Rc<RwLock<Environment>>, +} + +impl Lox for Interpreter { + type Value = Value; + type Error = Error; + + /// Create a new interpreter and configure the initial global + /// variable set. + fn create() -> Self { + let mut globals = HashMap::new(); + + globals.insert( + "clock".into(), + Value::Callable(Callable::Builtin(&builtins::Clock {})), + ); + + Interpreter { + env: Rc::new(RwLock::new(Environment { + enclosing: None, + values: globals, + })), + } + } + + fn interpret(&mut self, code: String) -> Result<Value, Vec<Error>> { + let chars: Vec<char> = code.chars().collect(); + + let mut program = scanner::scan(&chars) + .map_err(|errors| errors.into_iter().map(Into::into).collect()) + .and_then(|tokens| parser::parse(tokens))?; + + let globals = self + .env + .read() + .expect("static globals lock poisoned") + .values + .keys() + .map(Clone::clone) + .collect::<Vec<String>>(); + + resolver::resolve(&globals, &mut program).map_err(|e| vec![e])?; + self.interpret_block_with_env(None, &program) + .map_err(|e| vec![e]) + } +} + +impl Interpreter { + // Environment modification helpers + fn define_var(&mut self, name: &scanner::Token, value: Value) -> Result<(), Error> { + self.env + .write() + .expect("environment lock is poisoned") + .define(name, value) + } + + fn assign_var(&mut self, name: &scanner::Token, value: Value) -> Result<(), Error> { + self.env + .write() + .expect("environment lock is poisoned") + .assign(name, value) + } + + fn get_var(&mut self, var: &parser::Variable) -> Result<Value, Error> { + let ident = identifier_str(&var.name)?; + let depth = var.depth.ok_or_else(|| Error { + line: var.name.line, + kind: ErrorKind::UndefinedVariable(ident.into()), + })?; + + self.env + .read() + .expect("environment lock is poisoned") + .get(ident, var.name.line, depth) + } + + /// Interpret the block in the supplied environment. If no + /// environment is supplied, a new one is created using the + /// current one as its parent. + fn interpret_block_with_env( + &mut self, + env: Option<Rc<RwLock<Environment>>>, + block: &parser::Block, + ) -> Result<Value, Error> { + let env = match env { + Some(env) => env, + None => { + let env: Rc<RwLock<Environment>> = Default::default(); + set_enclosing_env(&env, self.env.clone()); + env + } + }; + + let previous = std::mem::replace(&mut self.env, env); + let result = self.interpret_block(block); + + // Swap it back, discarding the child env. + self.env = previous; + + return result; + } + + fn interpret_block(&mut self, program: &Block) -> Result<Value, Error> { + let mut value = Value::Literal(Literal::Nil); + + for stmt in program { + value = self.interpret_stmt(stmt)?; + } + + Ok(value) + } + + fn interpret_stmt(&mut self, stmt: &Statement) -> Result<Value, Error> { + let value = match stmt { + Statement::Expr(expr) => self.eval(expr)?, + Statement::Print(expr) => { + let result = self.eval(expr)?; + let output = format!("{:?}", result); + println!("{}", output); + Value::Literal(Literal::String(output)) + } + Statement::Var(var) => return self.interpret_var(var), + Statement::Block(block) => return self.interpret_block_with_env(None, block), + Statement::If(if_stmt) => return self.interpret_if(if_stmt), + Statement::While(while_stmt) => return self.interpret_while(while_stmt), + Statement::Function(func) => return self.interpret_function(func.clone()), + Statement::Return(ret) => { + return Err(Error { + line: 0, + kind: ErrorKind::FunctionReturn(self.eval(&ret.value)?), + }) + } + }; + + Ok(value) + } + + fn interpret_var(&mut self, var: &parser::Var) -> Result<Value, Error> { + let init = var.initialiser.as_ref().ok_or_else(|| Error { + line: var.name.line, + kind: ErrorKind::InternalError("missing variable initialiser".into()), + })?; + let value = self.eval(init)?; + self.define_var(&var.name, value.clone())?; + Ok(value) + } + + fn interpret_if(&mut self, if_stmt: &parser::If) -> Result<Value, Error> { + let condition = self.eval(&if_stmt.condition)?; + + if eval_truthy(&condition) { + self.interpret_stmt(&if_stmt.then_branch) + } else if let Some(else_branch) = &if_stmt.else_branch { + self.interpret_stmt(else_branch) + } else { + Ok(Value::Literal(Literal::Nil)) + } + } + + fn interpret_while(&mut self, stmt: &parser::While) -> Result<Value, Error> { + let mut value = Value::Literal(Literal::Nil); + while eval_truthy(&self.eval(&stmt.condition)?) { + value = self.interpret_stmt(&stmt.body)?; + } + + Ok(value) + } + + fn interpret_function(&mut self, func: Rc<parser::Function>) -> Result<Value, Error> { + let name = func.name.clone(); + let value = Value::Callable(Callable::Function { + func, + closure: self.env.clone(), + }); + self.define_var(&name, value.clone())?; + Ok(value) + } + + fn eval(&mut self, expr: &Expr) -> Result<Value, Error> { + match expr { + Expr::Assign(assign) => self.eval_assign(assign), + Expr::Literal(lit) => Ok(lit.clone().into()), + Expr::Grouping(grouping) => self.eval(&*grouping.0), + Expr::Unary(unary) => self.eval_unary(unary), + Expr::Binary(binary) => self.eval_binary(binary), + Expr::Variable(var) => self.get_var(var), + Expr::Logical(log) => self.eval_logical(log), + Expr::Call(call) => self.eval_call(call), + } + } + + fn eval_unary(&mut self, expr: &parser::Unary) -> Result<Value, Error> { + let right = self.eval(&*expr.right)?; + + match (&expr.operator.kind, right) { + (TokenKind::Minus, Value::Literal(Literal::Number(num))) => { + Ok(Literal::Number(-num).into()) + } + (TokenKind::Bang, right) => Ok(Literal::Boolean(!eval_truthy(&right)).into()), + + (op, right) => Err(Error { + line: expr.operator.line, + kind: ErrorKind::TypeError(format!( + "Operator '{:?}' can not be called with argument '{:?}'", + op, right + )), + }), + } + } + + fn eval_binary(&mut self, expr: &parser::Binary) -> Result<Value, Error> { + let left = self.eval(&*expr.left)?.expect_literal()?; + let right = self.eval(&*expr.right)?.expect_literal()?; + + let result = match (&expr.operator.kind, left, right) { + // Numeric + (TokenKind::Minus, Literal::Number(l), Literal::Number(r)) => Literal::Number(l - r), + (TokenKind::Slash, Literal::Number(l), Literal::Number(r)) => Literal::Number(l / r), + (TokenKind::Star, Literal::Number(l), Literal::Number(r)) => Literal::Number(l * r), + (TokenKind::Plus, Literal::Number(l), Literal::Number(r)) => Literal::Number(l + r), + + // Strings + (TokenKind::Plus, Literal::String(l), Literal::String(r)) => { + Literal::String(format!("{}{}", l, r)) + } + + // Comparators (on numbers only?) + (TokenKind::Greater, Literal::Number(l), Literal::Number(r)) => Literal::Boolean(l > r), + (TokenKind::GreaterEqual, Literal::Number(l), Literal::Number(r)) => { + Literal::Boolean(l >= r) + } + (TokenKind::Less, Literal::Number(l), Literal::Number(r)) => Literal::Boolean(l < r), + (TokenKind::LessEqual, Literal::Number(l), Literal::Number(r)) => { + Literal::Boolean(l <= r) + } + + // Equality + (TokenKind::Equal, l, r) => Literal::Boolean(l == r), + (TokenKind::BangEqual, l, r) => Literal::Boolean(l != r), + + (op, left, right) => { + return Err(Error { + line: expr.operator.line, + kind: ErrorKind::TypeError(format!( + "Operator '{:?}' can not be called with arguments '({:?}, {:?})'", + op, left, right + )), + }) + } + }; + + Ok(result.into()) + } + + fn eval_assign(&mut self, assign: &parser::Assign) -> Result<Value, Error> { + let value = self.eval(&assign.value)?; + self.assign_var(&assign.name, value.clone())?; + Ok(value) + } + + fn eval_logical(&mut self, logical: &parser::Logical) -> Result<Value, Error> { + let left = eval_truthy(&self.eval(&logical.left)?); + let right = eval_truthy(&self.eval(&logical.right)?); + + match &logical.operator.kind { + TokenKind::And => Ok(Literal::Boolean(left && right).into()), + TokenKind::Or => Ok(Literal::Boolean(left || right).into()), + kind => Err(Error { + line: logical.operator.line, + kind: ErrorKind::InternalError(format!("Invalid logical operator: {:?}", kind)), + }), + } + } + + fn eval_call(&mut self, call: &parser::Call) -> Result<Value, Error> { + let callable = match self.eval(&call.callee)? { + Value::Callable(c) => c, + Value::Literal(v) => { + return Err(Error { + line: call.paren.line, + kind: ErrorKind::RuntimeError(format!("not callable: {:?}", v)), + }) + } + }; + + let mut args = vec![]; + for arg in &call.args { + args.push(self.eval(arg)?); + } + + if callable.arity() != args.len() { + return Err(Error { + line: call.paren.line, + kind: ErrorKind::RuntimeError(format!( + "Expected {} arguments, but got {}", + callable.arity(), + args.len(), + )), + }); + } + + callable.call(self, args) + } +} + +// Interpreter functions not dependent on interpreter-state. + +fn eval_truthy(lit: &Value) -> bool { + if let Value::Literal(lit) = lit { + match lit { + Literal::Nil => false, + Literal::Boolean(b) => *b, + _ => true, + } + } else { + false + } +} + +fn set_enclosing_env(this: &RwLock<Environment>, parent: Rc<RwLock<Environment>>) { + this.write() + .expect("environment lock is poisoned") + .enclosing = Some(parent); +} diff --git a/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs b/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs new file mode 100644 index 000000000000..c502d2a1718a --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs @@ -0,0 +1,25 @@ +use std::fmt; +use std::time::{SystemTime, UNIX_EPOCH}; + +use crate::treewalk::errors::Error; +use crate::treewalk::interpreter::Value; +use crate::treewalk::parser::Literal; + +pub trait Builtin: fmt::Debug { + fn arity(&self) -> usize; + fn call(&self, args: Vec<Value>) -> Result<Value, Error>; +} + +// Builtin to return the current timestamp. +#[derive(Debug)] +pub struct Clock {} +impl Builtin for Clock { + fn arity(&self) -> usize { + 0 + } + + fn call(&self, _args: Vec<Value>) -> Result<Value, Error> { + let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); + Ok(Value::Literal(Literal::Number(now.as_secs() as f64))) + } +} diff --git a/users/tazjin/rlox/src/treewalk/interpreter/tests.rs b/users/tazjin/rlox/src/treewalk/interpreter/tests.rs new file mode 100644 index 000000000000..2fc6f4fee978 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/interpreter/tests.rs @@ -0,0 +1,97 @@ +use super::*; + +/// Evaluate a code snippet, returning a value. +fn parse_eval(code: &str) -> Value { + Interpreter::create() + .interpret(code.into()) + .expect("could not interpret code") +} + +#[test] +fn test_if() { + let result = parse_eval( + r#" +if (42 > 23) + "pass"; +else + "fail"; +"#, + ); + + assert_eq!(Value::Literal(Literal::String("pass".into())), result,); +} + +#[test] +fn test_scope() { + let result = parse_eval( + r#" +var result = ""; + +var a = "global a, "; +var b = "global b, "; +var c = "global c"; + +{ + var a = "outer a, "; + var b = "outer b, "; + + { + var a = "inner a, "; + result = a + b + c; + } +} +"#, + ); + + assert_eq!( + Value::Literal(Literal::String("inner a, outer b, global c".into())), + result, + ); +} + +#[test] +fn test_binary_operators() { + assert_eq!(Value::Literal(Literal::Number(42.0)), parse_eval("40 + 2;")); + + assert_eq!( + Value::Literal(Literal::String("foobar".into())), + parse_eval("\"foo\" + \"bar\";") + ); +} + +#[test] +fn test_functions() { + let result = parse_eval( + r#" +fun add(a, b, c) { + a + b + c; +} + +add(1, 2, 3); +"#, + ); + + assert_eq!(Value::Literal(Literal::Number(6.0)), result); +} + +#[test] +fn test_closure() { + let result = parse_eval( + r#" +fun makeCounter() { + var i = 0; + fun count() { + i = i + 1; + } + + return count; +} + +var counter = makeCounter(); +counter(); // "1". +counter(); // "2". +"#, + ); + + assert_eq!(Value::Literal(Literal::Number(2.0)), result); +} diff --git a/users/tazjin/rlox/src/treewalk/mod.rs b/users/tazjin/rlox/src/treewalk/mod.rs new file mode 100644 index 000000000000..2d82b3320a90 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/mod.rs @@ -0,0 +1,6 @@ +use crate::scanner; + +mod errors; +pub mod interpreter; +mod parser; +mod resolver; diff --git a/users/tazjin/rlox/src/treewalk/parser.rs b/users/tazjin/rlox/src/treewalk/parser.rs new file mode 100644 index 000000000000..5794b42d1577 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/parser.rs @@ -0,0 +1,700 @@ +// This implements the grammar of Lox as described starting in the +// Crafting Interpreters chapter "Representing Code". Note that the +// upstream Java implementation works around Java being bad at value +// classes by writing a code generator for Java. +// +// My Rust implementation skips this step because it's unnecessary, we +// have real types. +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::scanner::{Token, TokenKind}; +use std::rc::Rc; + +// AST + +#[derive(Debug)] +pub struct Assign { + pub name: Token, + pub value: Box<Expr>, + pub depth: Option<usize>, +} + +#[derive(Debug)] +pub struct Binary { + pub left: Box<Expr>, + pub operator: Token, + pub right: Box<Expr>, +} + +#[derive(Debug)] +pub struct Logical { + pub left: Box<Expr>, + pub operator: Token, + pub right: Box<Expr>, +} + +#[derive(Debug)] +pub struct Grouping(pub Box<Expr>); + +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + Boolean(bool), + Number(f64), + String(String), + Nil, +} + +#[derive(Debug)] +pub struct Unary { + pub operator: Token, + pub right: Box<Expr>, +} + +#[derive(Debug)] +pub struct Call { + pub callee: Box<Expr>, + pub paren: Token, + pub args: Vec<Expr>, +} + +// Not to be confused with `Var`, which is for assignment. +#[derive(Debug)] +pub struct Variable { + pub name: Token, + pub depth: Option<usize>, +} + +#[derive(Debug)] +pub enum Expr { + Assign(Assign), + Binary(Binary), + Grouping(Grouping), + Literal(Literal), + Unary(Unary), + Call(Call), + Variable(Variable), + Logical(Logical), +} + +// Variable assignment. Not to be confused with `Variable`, which is +// for access. +#[derive(Debug)] +pub struct Var { + pub name: Token, + pub initialiser: Option<Expr>, +} + +#[derive(Debug)] +pub struct Return { + pub value: Expr, +} + +#[derive(Debug)] +pub struct If { + pub condition: Expr, + pub then_branch: Box<Statement>, + pub else_branch: Option<Box<Statement>>, +} + +#[derive(Debug)] +pub struct While { + pub condition: Expr, + pub body: Box<Statement>, +} + +pub type Block = Vec<Statement>; + +#[derive(Debug)] +pub struct Function { + pub name: Token, + pub params: Vec<Token>, + pub body: Block, +} + +#[derive(Debug)] +pub enum Statement { + Expr(Expr), + Print(Expr), + Var(Var), + Block(Block), + If(If), + While(While), + Function(Rc<Function>), + Return(Return), +} + +// Parser + +// program → declaration* EOF ; +// +// declaration → funDecl +// | varDecl +// | statement ; +// +// funDecl → "fun" function ; +// function → IDENTIFIER "(" parameters? ")" block ; +// parameters → IDENTIFIER ( "," IDENTIFIER )* ; +// +// +// statement → exprStmt +// | forStmt +// | ifStmt +// | printStmt +// | returnStmt +// | whileStmt +// | block ; +// +// forStmt → "for" "(" ( varDecl | exprStmt | ";" ) +// expression? ";" +// expression? ")" statement ; +// +// returnStmt → "return" expression? ";" ; +// +// whileStmt → "while" "(" expression ")" statement ; +// +// exprStmt → expression ";" ; +// +// ifStmt → "if" "(" expression ")" statement +// ( "else" statement )? ; +// +// printStmt → "print" expression ";" ; +// +// expression → assignment ; +// assignment → IDENTIFIER "=" assignment +// | logic_or ; +// logic_or → logic_and ( "or" logic_and )* ; +// logic_and → equality ( "and" equality )* ; +// equality → comparison ( ( "!=" | "==" ) comparison )* ; +// comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ; +// term → factor ( ( "-" | "+" ) factor )* ; +// factor → unary ( ( "/" | "*" ) unary )* ; +// unary → ( "!" | "-" ) unary | call ; +// call → primary ( "(" arguments? ")" )* ; +// arguments → expression ( "," expression )* ; +// primary → NUMBER | STRING | "true" | "false" | "nil" +// | "(" expression ")" ; + +struct Parser { + tokens: Vec<Token>, + current: usize, +} + +type ExprResult = Result<Expr, Error>; +type StmtResult = Result<Statement, Error>; + +impl Parser { + // recursive-descent parser functions + + fn declaration(&mut self) -> StmtResult { + if self.match_token(&TokenKind::Fun) { + return self.function(); + } + + if self.match_token(&TokenKind::Var) { + return self.var_declaration(); + } + + self.statement() + } + + fn function(&mut self) -> StmtResult { + let name = self.identifier("Expected function name.")?; + + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expect '(' after function name."), + )?; + + let mut params = vec![]; + + if !self.check_token(&TokenKind::RightParen) { + loop { + if params.len() >= 255 { + return Err(Error { + line: self.peek().line, + kind: ErrorKind::InternalError("255 parameter limit exceeded.".into()), + }); + } + + params.push(self.identifier("Expected parameter name.")?); + + if !self.match_token(&TokenKind::Comma) { + break; + } + } + } + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expect ')' after parameters."), + )?; + + self.consume( + &TokenKind::LeftBrace, + ErrorKind::ExpectedToken("Expect '{' before function body."), + )?; + + Ok(Statement::Function(Rc::new(Function { + name, + params, + body: self.block_statement()?, + }))) + } + + fn var_declaration(&mut self) -> StmtResult { + // Since `TokenKind::Identifier` carries data, we can't use + // `consume`. + let mut var = Var { + name: self.identifier("Expected variable name.")?, + initialiser: None, + }; + + if self.match_token(&TokenKind::Equal) { + var.initialiser = Some(self.expression()?); + } + + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Var(var)) + } + + fn statement(&mut self) -> StmtResult { + if self.match_token(&TokenKind::Print) { + self.print_statement() + } else if self.match_token(&TokenKind::LeftBrace) { + Ok(Statement::Block(self.block_statement()?)) + } else if self.match_token(&TokenKind::If) { + self.if_statement() + } else if self.match_token(&TokenKind::While) { + self.while_statement() + } else if self.match_token(&TokenKind::For) { + self.for_statement() + } else if self.match_token(&TokenKind::Return) { + self.return_statement() + } else { + self.expr_statement() + } + } + + fn print_statement(&mut self) -> StmtResult { + let expr = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Print(expr)) + } + + fn block_statement(&mut self) -> Result<Block, Error> { + let mut block: Block = vec![]; + + while !self.check_token(&TokenKind::RightBrace) && !self.is_at_end() { + block.push(self.declaration()?); + } + + self.consume(&TokenKind::RightBrace, ErrorKind::ExpectedClosingBrace)?; + + Ok(block) + } + + fn if_statement(&mut self) -> StmtResult { + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'if'"), + )?; + let condition = self.expression()?; + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after condition"), + )?; + + let then_branch = Box::new(self.statement()?); + + let mut stmt = If { + condition, + then_branch, + else_branch: Option::None, + }; + + if self.match_token(&TokenKind::Else) { + stmt.else_branch = Some(Box::new(self.statement()?)); + } + + Ok(Statement::If(stmt)) + } + + fn while_statement(&mut self) -> StmtResult { + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'while'"), + )?; + + let condition = self.expression()?; + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after 'while'"), + )?; + + Ok(Statement::While(While { + condition, + body: Box::new(self.statement()?), + })) + } + + fn for_statement(&mut self) -> StmtResult { + // Parsing of clauses ... + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'for'"), + )?; + + let initialiser = if self.match_token(&TokenKind::Semicolon) { + None + } else if self.match_token(&TokenKind::Var) { + Some(self.var_declaration()?) + } else { + Some(self.expr_statement()?) + }; + + let condition = if self.check_token(&TokenKind::Semicolon) { + // unspecified condition => infinite loop + Expr::Literal(Literal::Boolean(true)) + } else { + self.expression()? + }; + + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + + let increment = if self.check_token(&TokenKind::RightParen) { + None + } else { + Some(self.expression()?) + }; + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after for clauses"), + )?; + + let mut body = self.statement()?; + + // ... desugaring to while + + if let Some(inc) = increment { + body = Statement::Block(vec![body, Statement::Expr(inc)]); + } + + body = Statement::While(While { + condition, + body: Box::new(body), + }); + + if let Some(init) = initialiser { + body = Statement::Block(vec![init, body]); + } + + Ok(body) + } + + fn return_statement(&mut self) -> StmtResult { + let value = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Return(Return { value })) + } + + fn expr_statement(&mut self) -> StmtResult { + let expr = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Expr(expr)) + } + + fn expression(&mut self) -> ExprResult { + self.assignment() + } + + fn assignment(&mut self) -> ExprResult { + let expr = self.logic_or()?; + + if self.match_token(&TokenKind::Equal) { + let equals = self.previous().clone(); + let value = self.assignment()?; + + if let Expr::Variable(Variable { name, .. }) = expr { + return Ok(Expr::Assign(Assign { + name, + value: Box::new(value), + depth: None, + })); + } + + return Err(Error { + line: equals.line, + kind: ErrorKind::InvalidAssignmentTarget(format!("{:?}", equals)), + }); + } + + Ok(expr) + } + + fn logic_or(&mut self) -> ExprResult { + let mut expr = self.logic_and()?; + + while self.match_token(&TokenKind::Or) { + expr = Expr::Logical(Logical { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(self.logic_and()?), + }) + } + + Ok(expr) + } + + fn logic_and(&mut self) -> ExprResult { + let mut expr = self.equality()?; + + while self.match_token(&TokenKind::And) { + expr = Expr::Logical(Logical { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(self.equality()?), + }) + } + + Ok(expr) + } + + fn equality(&mut self) -> ExprResult { + self.binary_operator( + &[TokenKind::BangEqual, TokenKind::EqualEqual], + Self::comparison, + ) + } + + fn comparison(&mut self) -> ExprResult { + self.binary_operator( + &[ + TokenKind::Greater, + TokenKind::GreaterEqual, + TokenKind::Less, + TokenKind::LessEqual, + ], + Self::term, + ) + } + + fn term(&mut self) -> ExprResult { + self.binary_operator(&[TokenKind::Minus, TokenKind::Plus], Self::factor) + } + + fn factor(&mut self) -> ExprResult { + self.binary_operator(&[TokenKind::Slash, TokenKind::Star], Self::unary) + } + + fn unary(&mut self) -> ExprResult { + if self.match_token(&TokenKind::Bang) || self.match_token(&TokenKind::Minus) { + return Ok(Expr::Unary(Unary { + operator: self.previous().clone(), + right: Box::new(self.unary()?), + })); + } + + return self.call(); + } + + fn call(&mut self) -> ExprResult { + let mut expr = self.primary()?; + + loop { + if self.match_token(&TokenKind::LeftParen) { + expr = self.finish_call(expr)?; + } else { + break; + } + } + + Ok(expr) + } + + fn finish_call(&mut self, callee: Expr) -> ExprResult { + let mut args = vec![]; + + if !self.check_token(&TokenKind::RightParen) { + loop { + // TODO(tazjin): Check for max args count + args.push(self.expression()?); + if !self.match_token(&TokenKind::Comma) { + break; + } + } + } + + let paren = self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expect ')' after arguments."), + )?; + + Ok(Expr::Call(Call { + args, + callee: Box::new(callee), + paren, + })) + } + + fn primary(&mut self) -> ExprResult { + let next = self.advance(); + let literal = match next.kind { + TokenKind::True => Literal::Boolean(true), + TokenKind::False => Literal::Boolean(false), + TokenKind::Nil => Literal::Nil, + TokenKind::Number(num) => Literal::Number(num), + TokenKind::String(string) => Literal::String(string), + + TokenKind::LeftParen => { + let expr = self.expression()?; + self.consume(&TokenKind::RightParen, ErrorKind::UnmatchedParens)?; + return Ok(Expr::Grouping(Grouping(Box::new(expr)))); + } + + TokenKind::Identifier(_) => { + return Ok(Expr::Variable(Variable { + name: next, + depth: None, + })) + } + + unexpected => { + eprintln!("encountered {:?}", unexpected); + return Err(Error { + line: next.line, + kind: ErrorKind::ExpectedExpression(next.lexeme), + }); + } + }; + + Ok(Expr::Literal(literal)) + } + + // internal helpers + + fn identifier(&mut self, err: &'static str) -> Result<Token, Error> { + if let TokenKind::Identifier(_) = self.peek().kind { + Ok(self.advance()) + } else { + Err(Error { + line: self.peek().line, + kind: ErrorKind::ExpectedToken(err), + }) + } + } + + /// Check if the next token is in `oneof`, and advance if it is. + fn match_token(&mut self, token: &TokenKind) -> bool { + if self.check_token(token) { + self.advance(); + return true; + } + + false + } + + /// Return the next token and advance parser state. + fn advance(&mut self) -> Token { + if !self.is_at_end() { + self.current += 1; + } + + return self.previous().clone(); + } + + fn is_at_end(&self) -> bool { + self.check_token(&TokenKind::Eof) + } + + /// Is the next token `token`? + fn check_token(&self, token: &TokenKind) -> bool { + self.peek().kind == *token + } + + fn peek(&self) -> &Token { + &self.tokens[self.current] + } + + fn previous(&self) -> &Token { + &self.tokens[self.current - 1] + } + + fn consume(&mut self, kind: &TokenKind, err: ErrorKind) -> Result<Token, Error> { + if self.check_token(kind) { + return Ok(self.advance()); + } + + Err(Error { + line: self.peek().line, + kind: err, + }) + } + + fn synchronise(&mut self) { + self.advance(); + + while !self.is_at_end() { + if self.previous().kind == TokenKind::Semicolon { + return; + } + + match self.peek().kind { + TokenKind::Class + | TokenKind::Fun + | TokenKind::Var + | TokenKind::For + | TokenKind::If + | TokenKind::While + | TokenKind::Print + | TokenKind::Return => return, + + _ => { + self.advance(); + } + } + } + } + + fn binary_operator( + &mut self, + oneof: &[TokenKind], + each: fn(&mut Parser) -> ExprResult, + ) -> ExprResult { + let mut expr = each(self)?; + + while oneof.iter().any(|t| self.match_token(t)) { + expr = Expr::Binary(Binary { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(each(self)?), + }) + } + + return Ok(expr); + } +} + +pub fn parse(tokens: Vec<Token>) -> Result<Block, Vec<Error>> { + let mut parser = Parser { tokens, current: 0 }; + let mut program: Block = vec![]; + let mut errors: Vec<Error> = vec![]; + + while !parser.is_at_end() { + match parser.declaration() { + Err(err) => { + errors.push(err); + parser.synchronise(); + } + Ok(decl) => { + program.push(decl); + } + } + } + + if errors.is_empty() { + Ok(program) + } else { + Err(errors) + } +} diff --git a/users/tazjin/rlox/src/treewalk/resolver.rs b/users/tazjin/rlox/src/treewalk/resolver.rs new file mode 100644 index 000000000000..3d12973aa089 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/resolver.rs @@ -0,0 +1,199 @@ +// Resolves variable access to their specific instances in the +// environment chain. +// +// https://craftinginterpreters.com/resolving-and-binding.html + +use std::collections::HashMap; +use std::rc::Rc; + +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::parser::{self, Expr, Statement}; +use crate::treewalk::scanner::Token; + +#[derive(Default)] +struct Resolver<'a> { + scopes: Vec<HashMap<&'a str, bool>>, +} + +impl<'a> Resolver<'a> { + // AST traversal + fn resolve(&mut self, program: &'a mut parser::Block) -> Result<(), Error> { + self.begin_scope(); + for stmt in program { + self.resolve_stmt(stmt)?; + } + self.end_scope(); + + Ok(()) + } + + fn resolve_stmt(&mut self, stmt: &'a mut Statement) -> Result<(), Error> { + match stmt { + Statement::Expr(expr) => self.resolve_expr(expr), + Statement::Print(expr) => self.resolve_expr(expr), + Statement::Var(var) => self.resolve_var(var), + Statement::Return(ret) => self.resolve_expr(&mut ret.value), + Statement::Block(block) => self.resolve(block), + + Statement::If(if_stmt) => { + self.resolve_expr(&mut if_stmt.condition)?; + self.resolve_stmt(&mut if_stmt.then_branch)?; + + if let Some(branch) = if_stmt.else_branch.as_mut() { + self.resolve_stmt(branch)?; + } + + Ok(()) + } + + Statement::While(while_stmt) => { + self.resolve_expr(&mut while_stmt.condition)?; + self.resolve_stmt(&mut while_stmt.body) + } + + Statement::Function(func) => match Rc::get_mut(func) { + Some(func) => self.resolve_function(func), + // The resolver does not clone references, so unless + // the interpreter is called before the resolver this + // case should never happen. + None => { + return Err(Error { + line: 0, + kind: ErrorKind::InternalError( + "multiple function references before interpretation".into(), + ), + }) + } + }, + } + } + + fn resolve_var(&mut self, var: &'a mut parser::Var) -> Result<(), Error> { + self.declare(&var.name.lexeme); + + if let Some(init) = &mut var.initialiser { + self.resolve_expr(init)?; + } + + self.define(&var.name.lexeme); + + Ok(()) + } + + fn resolve_function(&mut self, func: &'a mut parser::Function) -> Result<(), Error> { + self.declare(&func.name.lexeme); + self.define(&func.name.lexeme); + + self.begin_scope(); + + for param in &func.params { + self.declare(¶m.lexeme); + self.define(¶m.lexeme); + } + + for stmt in &mut func.body { + self.resolve_stmt(stmt)?; + } + + self.end_scope(); + + Ok(()) + } + + fn resolve_expr(&mut self, expr: &'a mut Expr) -> Result<(), Error> { + match expr { + Expr::Variable(var) => self.resolve_variable(var), + Expr::Assign(assign) => self.resolve_assign(assign), + Expr::Grouping(grouping) => self.resolve_expr(&mut grouping.0), + Expr::Call(call) => self.resolve_call(call), + Expr::Literal(_) => Ok(()), + Expr::Unary(unary) => self.resolve_expr(&mut unary.right), + + Expr::Logical(log) => { + self.resolve_expr(&mut log.left)?; + self.resolve_expr(&mut log.right) + } + + Expr::Binary(binary) => { + self.resolve_expr(&mut binary.left)?; + self.resolve_expr(&mut binary.right) + } + } + } + + fn resolve_variable(&mut self, var: &'a mut parser::Variable) -> Result<(), Error> { + if let Some(scope) = self.scopes.last_mut() { + if let Some(false) = scope.get(var.name.lexeme.as_str()) { + return Err(Error { + line: var.name.line, + kind: ErrorKind::StaticError( + "can't read local variable in its own initialiser".into(), + ), + }); + } + } + + var.depth = self.resolve_local(&var.name); + Ok(()) + } + + fn resolve_assign(&mut self, assign: &'a mut parser::Assign) -> Result<(), Error> { + self.resolve_expr(&mut assign.value)?; + assign.depth = self.resolve_local(&assign.name); + Ok(()) + } + + fn resolve_local(&mut self, name: &'a Token) -> Option<usize> { + for (c, scope) in self.scopes.iter().rev().enumerate() { + if scope.contains_key(name.lexeme.as_str()) { + return Some(c); + } + } + + None + } + + fn resolve_call(&mut self, call: &'a mut parser::Call) -> Result<(), Error> { + self.resolve_expr(&mut call.callee)?; + + for arg in call.args.iter_mut() { + self.resolve_expr(arg)?; + } + + Ok(()) + } + + // Internal helpers + + fn declare(&mut self, name: &'a str) { + if let Some(scope) = self.scopes.last_mut() { + scope.insert(&name, false); + } + } + + fn define(&mut self, name: &'a str) { + if let Some(scope) = self.scopes.last_mut() { + scope.insert(&name, true); + } + } + + fn begin_scope(&mut self) { + self.scopes.push(Default::default()); + } + + fn end_scope(&mut self) { + self.scopes.pop(); + } +} + +pub fn resolve(globals: &[String], block: &mut parser::Block) -> Result<(), Error> { + let mut resolver: Resolver = Default::default(); + + // Scope for static globals only starts, never ends. + resolver.begin_scope(); + for global in globals { + resolver.define(global); + } + + resolver.resolve(block) +} |