diff options
Diffstat (limited to 'users/tazjin/rlox/src/treewalk')
-rw-r--r-- | users/tazjin/rlox/src/treewalk/errors.rs | 59 | ||||
-rw-r--r-- | users/tazjin/rlox/src/treewalk/interpreter.rs | 556 | ||||
-rw-r--r-- | users/tazjin/rlox/src/treewalk/interpreter/builtins.rs | 25 | ||||
-rw-r--r-- | users/tazjin/rlox/src/treewalk/interpreter/tests.rs | 97 | ||||
-rw-r--r-- | users/tazjin/rlox/src/treewalk/mod.rs | 6 | ||||
-rw-r--r-- | users/tazjin/rlox/src/treewalk/parser.rs | 716 | ||||
-rw-r--r-- | users/tazjin/rlox/src/treewalk/resolver.rs | 214 |
7 files changed, 1673 insertions, 0 deletions
diff --git a/users/tazjin/rlox/src/treewalk/errors.rs b/users/tazjin/rlox/src/treewalk/errors.rs new file mode 100644 index 000000000000..391663d51b18 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/errors.rs @@ -0,0 +1,59 @@ +use crate::scanner::ScannerError; +use crate::treewalk::interpreter::Value; + +use std::fmt; + +#[derive(Debug)] +pub enum ErrorKind { + UnexpectedChar(char), + UnterminatedString, + UnmatchedParens, + ExpectedExpression(String), + ExpectedSemicolon, + ExpectedClosingBrace, + ExpectedToken(&'static str), + TypeError(String), + UndefinedVariable(String), + InternalError(String), + InvalidAssignmentTarget(String), + RuntimeError(String), + StaticError(String), + + // This variant is not an error, rather it is used for + // short-circuiting out of a function body that hits a `return` + // statement. + // + // It's implemented this way because in the original book the + // author uses exceptions for control flow, and this is the + // closest equivalent that I had available without diverging too + // much. + FunctionReturn(Value), +} + +#[derive(Debug)] +pub struct Error { + pub line: usize, + pub kind: ErrorKind, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[line {}] Error: {:?}", self.line, self.kind) + } +} + +impl From<ScannerError> for Error { + fn from(err: ScannerError) -> Self { + match err { + ScannerError::UnexpectedChar { line, unexpected } => Error { + line, + kind: ErrorKind::UnexpectedChar(unexpected), + }, + + ScannerError::UnterminatedString { line } => Error { + line, + kind: ErrorKind::UnterminatedString, + }, + } + } +} diff --git a/users/tazjin/rlox/src/treewalk/interpreter.rs b/users/tazjin/rlox/src/treewalk/interpreter.rs new file mode 100644 index 000000000000..d9fe33661684 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/interpreter.rs @@ -0,0 +1,556 @@ +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::parser::{self, Block, Expr, Literal, Statement}; +use crate::treewalk::resolver; +use crate::treewalk::scanner::{self, TokenKind}; +use crate::Lox; +use std::collections::HashMap; +use std::rc::Rc; +use std::sync::RwLock; + +// Implementation of built-in functions. +mod builtins; + +#[cfg(test)] +mod tests; + +// Tree-walk interpreter + +// Representation of all callables, including builtins & user-defined +// functions. +#[derive(Clone, Debug)] +pub enum Callable { + Builtin(&'static dyn builtins::Builtin), + Function { + func: Rc<parser::Function>, + closure: Rc<RwLock<Environment>>, + }, +} + +impl Callable { + fn arity(&self) -> usize { + match self { + Callable::Builtin(builtin) => builtin.arity(), + Callable::Function { func, .. } => func.params.len(), + } + } + + fn call( + &self, + lox: &mut Interpreter, + args: Vec<Value>, + ) -> Result<Value, Error> { + match self { + Callable::Builtin(builtin) => builtin.call(args), + + Callable::Function { func, closure } => { + let mut fn_env: Environment = Default::default(); + fn_env.enclosing = Some(closure.clone()); + + for (param, value) in func.params.iter().zip(args.into_iter()) { + fn_env.define(param, value)?; + } + + let result = lox.interpret_block_with_env( + Some(Rc::new(RwLock::new(fn_env))), + &func.body, + ); + + match result { + // extract returned values if applicable + Err(Error { + kind: ErrorKind::FunctionReturn(value), + .. + }) => Ok(value), + + // otherwise just return the result itself + _ => result, + } + } + } + } +} + +// Representation of an in-language value. +#[derive(Clone, Debug)] +pub enum Value { + Literal(Literal), + Callable(Callable), +} + +impl PartialEq for Value { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (Value::Literal(lhs), Value::Literal(rhs)) => lhs == rhs, + // functions do not have equality + _ => false, + } + } +} + +impl From<Literal> for Value { + fn from(lit: Literal) -> Value { + Value::Literal(lit) + } +} + +impl Value { + fn expect_literal(self) -> Result<Literal, Error> { + match self { + Value::Literal(lit) => Ok(lit), + _ => unimplemented!(), // which error? which line? + } + } +} + +#[derive(Debug, Default)] +pub struct Environment { + enclosing: Option<Rc<RwLock<Environment>>>, + values: HashMap<String, Value>, +} + +impl Environment { + fn define( + &mut self, + name: &scanner::Token, + value: Value, + ) -> Result<(), Error> { + let ident = identifier_str(name)?; + self.values.insert(ident.into(), value); + Ok(()) + } + + fn get( + &self, + ident: &str, + line: usize, + depth: usize, + ) -> Result<Value, Error> { + if depth > 0 { + match &self.enclosing { + None => { + return Err(Error { + line, + kind: ErrorKind::InternalError(format!( + "invalid depth {} for {}", + depth, ident + )), + }) + } + Some(parent) => { + let env = parent + .read() + .expect("fatal: environment lock poisoned"); + return env.get(ident, line, depth - 1); + } + } + } + + self.values + .get(ident) + .map(Clone::clone) + .ok_or_else(|| Error { + line, + kind: ErrorKind::UndefinedVariable(ident.into()), + }) + } + + fn assign( + &mut self, + name: &scanner::Token, + value: Value, + ) -> Result<(), Error> { + let ident = identifier_str(name)?; + + match self.values.get_mut(ident) { + Some(target) => { + *target = value; + Ok(()) + } + None => { + if let Some(parent) = &self.enclosing { + return parent.write().unwrap().assign(name, value); + } + + Err(Error { + line: name.line, + kind: ErrorKind::UndefinedVariable(ident.into()), + }) + } + } + } +} + +fn identifier_str(name: &scanner::Token) -> Result<&str, Error> { + if let TokenKind::Identifier(ident) = &name.kind { + Ok(ident) + } else { + Err(Error { + line: name.line, + kind: ErrorKind::InternalError("unexpected identifier kind".into()), + }) + } +} + +#[derive(Debug)] +pub struct Interpreter { + env: Rc<RwLock<Environment>>, +} + +impl Lox for Interpreter { + type Value = Value; + type Error = Error; + + /// Create a new interpreter and configure the initial global + /// variable set. + fn create() -> Self { + let mut globals = HashMap::new(); + + globals.insert( + "clock".into(), + Value::Callable(Callable::Builtin(&builtins::Clock {})), + ); + + Interpreter { + env: Rc::new(RwLock::new(Environment { + enclosing: None, + values: globals, + })), + } + } + + fn interpret(&mut self, code: String) -> Result<Value, Vec<Error>> { + let chars: Vec<char> = code.chars().collect(); + + let mut program = scanner::scan(&chars) + .map_err(|errors| errors.into_iter().map(Into::into).collect()) + .and_then(|tokens| parser::parse(tokens))?; + + let globals = self + .env + .read() + .expect("static globals lock poisoned") + .values + .keys() + .map(Clone::clone) + .collect::<Vec<String>>(); + + resolver::resolve(&globals, &mut program).map_err(|e| vec![e])?; + self.interpret_block_with_env(None, &program) + .map_err(|e| vec![e]) + } +} + +impl Interpreter { + // Environment modification helpers + fn define_var( + &mut self, + name: &scanner::Token, + value: Value, + ) -> Result<(), Error> { + self.env + .write() + .expect("environment lock is poisoned") + .define(name, value) + } + + fn assign_var( + &mut self, + name: &scanner::Token, + value: Value, + ) -> Result<(), Error> { + self.env + .write() + .expect("environment lock is poisoned") + .assign(name, value) + } + + fn get_var(&mut self, var: &parser::Variable) -> Result<Value, Error> { + let ident = identifier_str(&var.name)?; + let depth = var.depth.ok_or_else(|| Error { + line: var.name.line, + kind: ErrorKind::UndefinedVariable(ident.into()), + })?; + + self.env.read().expect("environment lock is poisoned").get( + ident, + var.name.line, + depth, + ) + } + + /// Interpret the block in the supplied environment. If no + /// environment is supplied, a new one is created using the + /// current one as its parent. + fn interpret_block_with_env( + &mut self, + env: Option<Rc<RwLock<Environment>>>, + block: &parser::Block, + ) -> Result<Value, Error> { + let env = match env { + Some(env) => env, + None => { + let env: Rc<RwLock<Environment>> = Default::default(); + set_enclosing_env(&env, self.env.clone()); + env + } + }; + + let previous = std::mem::replace(&mut self.env, env); + let result = self.interpret_block(block); + + // Swap it back, discarding the child env. + self.env = previous; + + return result; + } + + fn interpret_block(&mut self, program: &Block) -> Result<Value, Error> { + let mut value = Value::Literal(Literal::Nil); + + for stmt in program { + value = self.interpret_stmt(stmt)?; + } + + Ok(value) + } + + fn interpret_stmt(&mut self, stmt: &Statement) -> Result<Value, Error> { + let value = match stmt { + Statement::Expr(expr) => self.eval(expr)?, + Statement::Print(expr) => { + let result = self.eval(expr)?; + let output = format!("{:?}", result); + println!("{}", output); + Value::Literal(Literal::String(output)) + } + Statement::Var(var) => return self.interpret_var(var), + Statement::Block(block) => { + return self.interpret_block_with_env(None, block) + } + Statement::If(if_stmt) => return self.interpret_if(if_stmt), + Statement::While(while_stmt) => { + return self.interpret_while(while_stmt) + } + Statement::Function(func) => { + return self.interpret_function(func.clone()) + } + Statement::Return(ret) => { + return Err(Error { + line: 0, + kind: ErrorKind::FunctionReturn(self.eval(&ret.value)?), + }) + } + }; + + Ok(value) + } + + fn interpret_var(&mut self, var: &parser::Var) -> Result<Value, Error> { + let init = var.initialiser.as_ref().ok_or_else(|| Error { + line: var.name.line, + kind: ErrorKind::InternalError( + "missing variable initialiser".into(), + ), + })?; + let value = self.eval(init)?; + self.define_var(&var.name, value.clone())?; + Ok(value) + } + + fn interpret_if(&mut self, if_stmt: &parser::If) -> Result<Value, Error> { + let condition = self.eval(&if_stmt.condition)?; + + if eval_truthy(&condition) { + self.interpret_stmt(&if_stmt.then_branch) + } else if let Some(else_branch) = &if_stmt.else_branch { + self.interpret_stmt(else_branch) + } else { + Ok(Value::Literal(Literal::Nil)) + } + } + + fn interpret_while( + &mut self, + stmt: &parser::While, + ) -> Result<Value, Error> { + let mut value = Value::Literal(Literal::Nil); + while eval_truthy(&self.eval(&stmt.condition)?) { + value = self.interpret_stmt(&stmt.body)?; + } + + Ok(value) + } + + fn interpret_function( + &mut self, + func: Rc<parser::Function>, + ) -> Result<Value, Error> { + let name = func.name.clone(); + let value = Value::Callable(Callable::Function { + func, + closure: self.env.clone(), + }); + self.define_var(&name, value.clone())?; + Ok(value) + } + + fn eval(&mut self, expr: &Expr) -> Result<Value, Error> { + match expr { + Expr::Assign(assign) => self.eval_assign(assign), + Expr::Literal(lit) => Ok(lit.clone().into()), + Expr::Grouping(grouping) => self.eval(&*grouping.0), + Expr::Unary(unary) => self.eval_unary(unary), + Expr::Binary(binary) => self.eval_binary(binary), + Expr::Variable(var) => self.get_var(var), + Expr::Logical(log) => self.eval_logical(log), + Expr::Call(call) => self.eval_call(call), + } + } + + fn eval_unary(&mut self, expr: &parser::Unary) -> Result<Value, Error> { + let right = self.eval(&*expr.right)?; + + match (&expr.operator.kind, right) { + (TokenKind::Minus, Value::Literal(Literal::Number(num))) => { + Ok(Literal::Number(-num).into()) + } + (TokenKind::Bang, right) => { + Ok(Literal::Boolean(!eval_truthy(&right)).into()) + } + + (op, right) => Err(Error { + line: expr.operator.line, + kind: ErrorKind::TypeError(format!( + "Operator '{:?}' can not be called with argument '{:?}'", + op, right + )), + }), + } + } + + fn eval_binary(&mut self, expr: &parser::Binary) -> Result<Value, Error> { + let left = self.eval(&*expr.left)?.expect_literal()?; + let right = self.eval(&*expr.right)?.expect_literal()?; + + let result = match (&expr.operator.kind, left, right) { + // Numeric + (TokenKind::Minus, Literal::Number(l), Literal::Number(r)) => Literal::Number(l - r), + (TokenKind::Slash, Literal::Number(l), Literal::Number(r)) => Literal::Number(l / r), + (TokenKind::Star, Literal::Number(l), Literal::Number(r)) => Literal::Number(l * r), + (TokenKind::Plus, Literal::Number(l), Literal::Number(r)) => Literal::Number(l + r), + + // Strings + (TokenKind::Plus, Literal::String(l), Literal::String(r)) => { + Literal::String(format!("{}{}", l, r)) + } + + // Comparators (on numbers only?) + (TokenKind::Greater, Literal::Number(l), Literal::Number(r)) => Literal::Boolean(l > r), + (TokenKind::GreaterEqual, Literal::Number(l), Literal::Number(r)) => { + Literal::Boolean(l >= r) + } + (TokenKind::Less, Literal::Number(l), Literal::Number(r)) => Literal::Boolean(l < r), + (TokenKind::LessEqual, Literal::Number(l), Literal::Number(r)) => { + Literal::Boolean(l <= r) + } + + // Equality + (TokenKind::Equal, l, r) => Literal::Boolean(l == r), + (TokenKind::BangEqual, l, r) => Literal::Boolean(l != r), + + (op, left, right) => { + return Err(Error { + line: expr.operator.line, + kind: ErrorKind::TypeError(format!( + "Operator '{:?}' can not be called with arguments '({:?}, {:?})'", + op, left, right + )), + }) + } + }; + + Ok(result.into()) + } + + fn eval_assign(&mut self, assign: &parser::Assign) -> Result<Value, Error> { + let value = self.eval(&assign.value)?; + self.assign_var(&assign.name, value.clone())?; + Ok(value) + } + + fn eval_logical( + &mut self, + logical: &parser::Logical, + ) -> Result<Value, Error> { + let left = eval_truthy(&self.eval(&logical.left)?); + let right = eval_truthy(&self.eval(&logical.right)?); + + match &logical.operator.kind { + TokenKind::And => Ok(Literal::Boolean(left && right).into()), + TokenKind::Or => Ok(Literal::Boolean(left || right).into()), + kind => Err(Error { + line: logical.operator.line, + kind: ErrorKind::InternalError(format!( + "Invalid logical operator: {:?}", + kind + )), + }), + } + } + + fn eval_call(&mut self, call: &parser::Call) -> Result<Value, Error> { + let callable = match self.eval(&call.callee)? { + Value::Callable(c) => c, + Value::Literal(v) => { + return Err(Error { + line: call.paren.line, + kind: ErrorKind::RuntimeError(format!( + "not callable: {:?}", + v + )), + }) + } + }; + + let mut args = vec![]; + for arg in &call.args { + args.push(self.eval(arg)?); + } + + if callable.arity() != args.len() { + return Err(Error { + line: call.paren.line, + kind: ErrorKind::RuntimeError(format!( + "Expected {} arguments, but got {}", + callable.arity(), + args.len(), + )), + }); + } + + callable.call(self, args) + } +} + +// Interpreter functions not dependent on interpreter-state. + +fn eval_truthy(lit: &Value) -> bool { + if let Value::Literal(lit) = lit { + match lit { + Literal::Nil => false, + Literal::Boolean(b) => *b, + _ => true, + } + } else { + false + } +} + +fn set_enclosing_env( + this: &RwLock<Environment>, + parent: Rc<RwLock<Environment>>, +) { + this.write() + .expect("environment lock is poisoned") + .enclosing = Some(parent); +} diff --git a/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs b/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs new file mode 100644 index 000000000000..c502d2a1718a --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/interpreter/builtins.rs @@ -0,0 +1,25 @@ +use std::fmt; +use std::time::{SystemTime, UNIX_EPOCH}; + +use crate::treewalk::errors::Error; +use crate::treewalk::interpreter::Value; +use crate::treewalk::parser::Literal; + +pub trait Builtin: fmt::Debug { + fn arity(&self) -> usize; + fn call(&self, args: Vec<Value>) -> Result<Value, Error>; +} + +// Builtin to return the current timestamp. +#[derive(Debug)] +pub struct Clock {} +impl Builtin for Clock { + fn arity(&self) -> usize { + 0 + } + + fn call(&self, _args: Vec<Value>) -> Result<Value, Error> { + let now = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); + Ok(Value::Literal(Literal::Number(now.as_secs() as f64))) + } +} diff --git a/users/tazjin/rlox/src/treewalk/interpreter/tests.rs b/users/tazjin/rlox/src/treewalk/interpreter/tests.rs new file mode 100644 index 000000000000..2fc6f4fee978 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/interpreter/tests.rs @@ -0,0 +1,97 @@ +use super::*; + +/// Evaluate a code snippet, returning a value. +fn parse_eval(code: &str) -> Value { + Interpreter::create() + .interpret(code.into()) + .expect("could not interpret code") +} + +#[test] +fn test_if() { + let result = parse_eval( + r#" +if (42 > 23) + "pass"; +else + "fail"; +"#, + ); + + assert_eq!(Value::Literal(Literal::String("pass".into())), result,); +} + +#[test] +fn test_scope() { + let result = parse_eval( + r#" +var result = ""; + +var a = "global a, "; +var b = "global b, "; +var c = "global c"; + +{ + var a = "outer a, "; + var b = "outer b, "; + + { + var a = "inner a, "; + result = a + b + c; + } +} +"#, + ); + + assert_eq!( + Value::Literal(Literal::String("inner a, outer b, global c".into())), + result, + ); +} + +#[test] +fn test_binary_operators() { + assert_eq!(Value::Literal(Literal::Number(42.0)), parse_eval("40 + 2;")); + + assert_eq!( + Value::Literal(Literal::String("foobar".into())), + parse_eval("\"foo\" + \"bar\";") + ); +} + +#[test] +fn test_functions() { + let result = parse_eval( + r#" +fun add(a, b, c) { + a + b + c; +} + +add(1, 2, 3); +"#, + ); + + assert_eq!(Value::Literal(Literal::Number(6.0)), result); +} + +#[test] +fn test_closure() { + let result = parse_eval( + r#" +fun makeCounter() { + var i = 0; + fun count() { + i = i + 1; + } + + return count; +} + +var counter = makeCounter(); +counter(); // "1". +counter(); // "2". +"#, + ); + + assert_eq!(Value::Literal(Literal::Number(2.0)), result); +} diff --git a/users/tazjin/rlox/src/treewalk/mod.rs b/users/tazjin/rlox/src/treewalk/mod.rs new file mode 100644 index 000000000000..2d82b3320a90 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/mod.rs @@ -0,0 +1,6 @@ +use crate::scanner; + +mod errors; +pub mod interpreter; +mod parser; +mod resolver; diff --git a/users/tazjin/rlox/src/treewalk/parser.rs b/users/tazjin/rlox/src/treewalk/parser.rs new file mode 100644 index 000000000000..003cc34b4665 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/parser.rs @@ -0,0 +1,716 @@ +// This implements the grammar of Lox as described starting in the +// Crafting Interpreters chapter "Representing Code". Note that the +// upstream Java implementation works around Java being bad at value +// classes by writing a code generator for Java. +// +// My Rust implementation skips this step because it's unnecessary, we +// have real types. +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::scanner::{Token, TokenKind}; +use std::rc::Rc; + +// AST + +#[derive(Debug)] +pub struct Assign { + pub name: Token, + pub value: Box<Expr>, + pub depth: Option<usize>, +} + +#[derive(Debug)] +pub struct Binary { + pub left: Box<Expr>, + pub operator: Token, + pub right: Box<Expr>, +} + +#[derive(Debug)] +pub struct Logical { + pub left: Box<Expr>, + pub operator: Token, + pub right: Box<Expr>, +} + +#[derive(Debug)] +pub struct Grouping(pub Box<Expr>); + +#[derive(Debug, Clone, PartialEq)] +pub enum Literal { + Boolean(bool), + Number(f64), + String(String), + Nil, +} + +#[derive(Debug)] +pub struct Unary { + pub operator: Token, + pub right: Box<Expr>, +} + +#[derive(Debug)] +pub struct Call { + pub callee: Box<Expr>, + pub paren: Token, + pub args: Vec<Expr>, +} + +// Not to be confused with `Var`, which is for assignment. +#[derive(Debug)] +pub struct Variable { + pub name: Token, + pub depth: Option<usize>, +} + +#[derive(Debug)] +pub enum Expr { + Assign(Assign), + Binary(Binary), + Grouping(Grouping), + Literal(Literal), + Unary(Unary), + Call(Call), + Variable(Variable), + Logical(Logical), +} + +// Variable assignment. Not to be confused with `Variable`, which is +// for access. +#[derive(Debug)] +pub struct Var { + pub name: Token, + pub initialiser: Option<Expr>, +} + +#[derive(Debug)] +pub struct Return { + pub value: Expr, +} + +#[derive(Debug)] +pub struct If { + pub condition: Expr, + pub then_branch: Box<Statement>, + pub else_branch: Option<Box<Statement>>, +} + +#[derive(Debug)] +pub struct While { + pub condition: Expr, + pub body: Box<Statement>, +} + +pub type Block = Vec<Statement>; + +#[derive(Debug)] +pub struct Function { + pub name: Token, + pub params: Vec<Token>, + pub body: Block, +} + +#[derive(Debug)] +pub enum Statement { + Expr(Expr), + Print(Expr), + Var(Var), + Block(Block), + If(If), + While(While), + Function(Rc<Function>), + Return(Return), +} + +// Parser + +/* +program → declaration* EOF ; + +declaration → funDecl + | varDecl + | statement ; + +funDecl → "fun" function ; +function → IDENTIFIER "(" parameters? ")" block ; +parameters → IDENTIFIER ( "," IDENTIFIER )* ; + + +statement → exprStmt + | forStmt + | ifStmt + | printStmt + | returnStmt + | whileStmt + | block ; + +forStmt → "for" "(" ( varDecl | exprStmt | ";" ) + expression? ";" + expression? ")" statement ; + +returnStmt → "return" expression? ";" ; + +whileStmt → "while" "(" expression ")" statement ; + +exprStmt → expression ";" ; + +ifStmt → "if" "(" expression ")" statement + ( "else" statement )? ; + +printStmt → "print" expression ";" ; + +expression → assignment ; +assignment → IDENTIFIER "=" assignment + | logic_or ; +logic_or → logic_and ( "or" logic_and )* ; +logic_and → equality ( "and" equality )* ; +equality → comparison ( ( "!=" | "==" ) comparison )* ; +comparison → term ( ( ">" | ">=" | "<" | "<=" ) term )* ; +term → factor ( ( "-" | "+" ) factor )* ; +factor → unary ( ( "/" | "*" ) unary )* ; +unary → ( "!" | "-" ) unary | call ; +call → primary ( "(" arguments? ")" )* ; +arguments → expression ( "," expression )* ; +primary → NUMBER | STRING | "true" | "false" | "nil" + | "(" expression ")" ; +*/ + +struct Parser { + tokens: Vec<Token>, + current: usize, +} + +type ExprResult = Result<Expr, Error>; +type StmtResult = Result<Statement, Error>; + +impl Parser { + // recursive-descent parser functions + + fn declaration(&mut self) -> StmtResult { + if self.match_token(&TokenKind::Fun) { + return self.function(); + } + + if self.match_token(&TokenKind::Var) { + return self.var_declaration(); + } + + self.statement() + } + + fn function(&mut self) -> StmtResult { + let name = self.identifier("Expected function name.")?; + + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expect '(' after function name."), + )?; + + let mut params = vec![]; + + if !self.check_token(&TokenKind::RightParen) { + loop { + if params.len() >= 255 { + return Err(Error { + line: self.peek().line, + kind: ErrorKind::InternalError( + "255 parameter limit exceeded.".into(), + ), + }); + } + + params.push(self.identifier("Expected parameter name.")?); + + if !self.match_token(&TokenKind::Comma) { + break; + } + } + } + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expect ')' after parameters."), + )?; + + self.consume( + &TokenKind::LeftBrace, + ErrorKind::ExpectedToken("Expect '{' before function body."), + )?; + + Ok(Statement::Function(Rc::new(Function { + name, + params, + body: self.block_statement()?, + }))) + } + + fn var_declaration(&mut self) -> StmtResult { + // Since `TokenKind::Identifier` carries data, we can't use + // `consume`. + let mut var = Var { + name: self.identifier("Expected variable name.")?, + initialiser: None, + }; + + if self.match_token(&TokenKind::Equal) { + var.initialiser = Some(self.expression()?); + } + + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Var(var)) + } + + fn statement(&mut self) -> StmtResult { + if self.match_token(&TokenKind::Print) { + self.print_statement() + } else if self.match_token(&TokenKind::LeftBrace) { + Ok(Statement::Block(self.block_statement()?)) + } else if self.match_token(&TokenKind::If) { + self.if_statement() + } else if self.match_token(&TokenKind::While) { + self.while_statement() + } else if self.match_token(&TokenKind::For) { + self.for_statement() + } else if self.match_token(&TokenKind::Return) { + self.return_statement() + } else { + self.expr_statement() + } + } + + fn print_statement(&mut self) -> StmtResult { + let expr = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Print(expr)) + } + + fn block_statement(&mut self) -> Result<Block, Error> { + let mut block: Block = vec![]; + + while !self.check_token(&TokenKind::RightBrace) && !self.is_at_end() { + block.push(self.declaration()?); + } + + self.consume(&TokenKind::RightBrace, ErrorKind::ExpectedClosingBrace)?; + + Ok(block) + } + + fn if_statement(&mut self) -> StmtResult { + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'if'"), + )?; + let condition = self.expression()?; + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after condition"), + )?; + + let then_branch = Box::new(self.statement()?); + + let mut stmt = If { + condition, + then_branch, + else_branch: Option::None, + }; + + if self.match_token(&TokenKind::Else) { + stmt.else_branch = Some(Box::new(self.statement()?)); + } + + Ok(Statement::If(stmt)) + } + + fn while_statement(&mut self) -> StmtResult { + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'while'"), + )?; + + let condition = self.expression()?; + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after 'while'"), + )?; + + Ok(Statement::While(While { + condition, + body: Box::new(self.statement()?), + })) + } + + fn for_statement(&mut self) -> StmtResult { + // Parsing of clauses ... + self.consume( + &TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'for'"), + )?; + + let initialiser = if self.match_token(&TokenKind::Semicolon) { + None + } else if self.match_token(&TokenKind::Var) { + Some(self.var_declaration()?) + } else { + Some(self.expr_statement()?) + }; + + let condition = if self.check_token(&TokenKind::Semicolon) { + // unspecified condition => infinite loop + Expr::Literal(Literal::Boolean(true)) + } else { + self.expression()? + }; + + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + + let increment = if self.check_token(&TokenKind::RightParen) { + None + } else { + Some(self.expression()?) + }; + + self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after for clauses"), + )?; + + let mut body = self.statement()?; + + // ... desugaring to while + + if let Some(inc) = increment { + body = Statement::Block(vec![body, Statement::Expr(inc)]); + } + + body = Statement::While(While { + condition, + body: Box::new(body), + }); + + if let Some(init) = initialiser { + body = Statement::Block(vec![init, body]); + } + + Ok(body) + } + + fn return_statement(&mut self) -> StmtResult { + let value = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Return(Return { value })) + } + + fn expr_statement(&mut self) -> StmtResult { + let expr = self.expression()?; + self.consume(&TokenKind::Semicolon, ErrorKind::ExpectedSemicolon)?; + Ok(Statement::Expr(expr)) + } + + fn expression(&mut self) -> ExprResult { + self.assignment() + } + + fn assignment(&mut self) -> ExprResult { + let expr = self.logic_or()?; + + if self.match_token(&TokenKind::Equal) { + let equals = self.previous().clone(); + let value = self.assignment()?; + + if let Expr::Variable(Variable { name, .. }) = expr { + return Ok(Expr::Assign(Assign { + name, + value: Box::new(value), + depth: None, + })); + } + + return Err(Error { + line: equals.line, + kind: ErrorKind::InvalidAssignmentTarget(format!( + "{:?}", + equals + )), + }); + } + + Ok(expr) + } + + fn logic_or(&mut self) -> ExprResult { + let mut expr = self.logic_and()?; + + while self.match_token(&TokenKind::Or) { + expr = Expr::Logical(Logical { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(self.logic_and()?), + }) + } + + Ok(expr) + } + + fn logic_and(&mut self) -> ExprResult { + let mut expr = self.equality()?; + + while self.match_token(&TokenKind::And) { + expr = Expr::Logical(Logical { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(self.equality()?), + }) + } + + Ok(expr) + } + + fn equality(&mut self) -> ExprResult { + self.binary_operator( + &[TokenKind::BangEqual, TokenKind::EqualEqual], + Self::comparison, + ) + } + + fn comparison(&mut self) -> ExprResult { + self.binary_operator( + &[ + TokenKind::Greater, + TokenKind::GreaterEqual, + TokenKind::Less, + TokenKind::LessEqual, + ], + Self::term, + ) + } + + fn term(&mut self) -> ExprResult { + self.binary_operator(&[TokenKind::Minus, TokenKind::Plus], Self::factor) + } + + fn factor(&mut self) -> ExprResult { + self.binary_operator(&[TokenKind::Slash, TokenKind::Star], Self::unary) + } + + fn unary(&mut self) -> ExprResult { + if self.match_token(&TokenKind::Bang) + || self.match_token(&TokenKind::Minus) + { + return Ok(Expr::Unary(Unary { + operator: self.previous().clone(), + right: Box::new(self.unary()?), + })); + } + + return self.call(); + } + + fn call(&mut self) -> ExprResult { + let mut expr = self.primary()?; + + loop { + if self.match_token(&TokenKind::LeftParen) { + expr = self.finish_call(expr)?; + } else { + break; + } + } + + Ok(expr) + } + + fn finish_call(&mut self, callee: Expr) -> ExprResult { + let mut args = vec![]; + + if !self.check_token(&TokenKind::RightParen) { + loop { + // TODO(tazjin): Check for max args count + args.push(self.expression()?); + if !self.match_token(&TokenKind::Comma) { + break; + } + } + } + + let paren = self.consume( + &TokenKind::RightParen, + ErrorKind::ExpectedToken("Expect ')' after arguments."), + )?; + + Ok(Expr::Call(Call { + args, + callee: Box::new(callee), + paren, + })) + } + + fn primary(&mut self) -> ExprResult { + let next = self.advance(); + let literal = match next.kind { + TokenKind::True => Literal::Boolean(true), + TokenKind::False => Literal::Boolean(false), + TokenKind::Nil => Literal::Nil, + TokenKind::Number(num) => Literal::Number(num), + TokenKind::String(string) => Literal::String(string), + + TokenKind::LeftParen => { + let expr = self.expression()?; + self.consume( + &TokenKind::RightParen, + ErrorKind::UnmatchedParens, + )?; + return Ok(Expr::Grouping(Grouping(Box::new(expr)))); + } + + TokenKind::Identifier(_) => { + return Ok(Expr::Variable(Variable { + name: next, + depth: None, + })) + } + + unexpected => { + eprintln!("encountered {:?}", unexpected); + return Err(Error { + line: next.line, + kind: ErrorKind::ExpectedExpression(next.lexeme), + }); + } + }; + + Ok(Expr::Literal(literal)) + } + + // internal helpers + + fn identifier(&mut self, err: &'static str) -> Result<Token, Error> { + if let TokenKind::Identifier(_) = self.peek().kind { + Ok(self.advance()) + } else { + Err(Error { + line: self.peek().line, + kind: ErrorKind::ExpectedToken(err), + }) + } + } + + /// Check if the next token is in `oneof`, and advance if it is. + fn match_token(&mut self, token: &TokenKind) -> bool { + if self.check_token(token) { + self.advance(); + return true; + } + + false + } + + /// Return the next token and advance parser state. + fn advance(&mut self) -> Token { + if !self.is_at_end() { + self.current += 1; + } + + return self.previous().clone(); + } + + fn is_at_end(&self) -> bool { + self.check_token(&TokenKind::Eof) + } + + /// Is the next token `token`? + fn check_token(&self, token: &TokenKind) -> bool { + self.peek().kind == *token + } + + fn peek(&self) -> &Token { + &self.tokens[self.current] + } + + fn previous(&self) -> &Token { + &self.tokens[self.current - 1] + } + + fn consume( + &mut self, + kind: &TokenKind, + err: ErrorKind, + ) -> Result<Token, Error> { + if self.check_token(kind) { + return Ok(self.advance()); + } + + Err(Error { + line: self.peek().line, + kind: err, + }) + } + + fn synchronise(&mut self) { + self.advance(); + + while !self.is_at_end() { + if self.previous().kind == TokenKind::Semicolon { + return; + } + + match self.peek().kind { + TokenKind::Class + | TokenKind::Fun + | TokenKind::Var + | TokenKind::For + | TokenKind::If + | TokenKind::While + | TokenKind::Print + | TokenKind::Return => return, + + _ => { + self.advance(); + } + } + } + } + + fn binary_operator( + &mut self, + oneof: &[TokenKind], + each: fn(&mut Parser) -> ExprResult, + ) -> ExprResult { + let mut expr = each(self)?; + + while oneof.iter().any(|t| self.match_token(t)) { + expr = Expr::Binary(Binary { + left: Box::new(expr), + operator: self.previous().clone(), + right: Box::new(each(self)?), + }) + } + + return Ok(expr); + } +} + +pub fn parse(tokens: Vec<Token>) -> Result<Block, Vec<Error>> { + let mut parser = Parser { tokens, current: 0 }; + let mut program: Block = vec![]; + let mut errors: Vec<Error> = vec![]; + + while !parser.is_at_end() { + match parser.declaration() { + Err(err) => { + errors.push(err); + parser.synchronise(); + } + Ok(decl) => { + program.push(decl); + } + } + } + + if errors.is_empty() { + Ok(program) + } else { + Err(errors) + } +} diff --git a/users/tazjin/rlox/src/treewalk/resolver.rs b/users/tazjin/rlox/src/treewalk/resolver.rs new file mode 100644 index 000000000000..8231ce5a9e58 --- /dev/null +++ b/users/tazjin/rlox/src/treewalk/resolver.rs @@ -0,0 +1,214 @@ +// Resolves variable access to their specific instances in the +// environment chain. +// +// https://craftinginterpreters.com/resolving-and-binding.html + +use std::collections::HashMap; +use std::rc::Rc; + +use crate::treewalk::errors::{Error, ErrorKind}; +use crate::treewalk::parser::{self, Expr, Statement}; +use crate::treewalk::scanner::Token; + +#[derive(Default)] +struct Resolver<'a> { + scopes: Vec<HashMap<&'a str, bool>>, +} + +impl<'a> Resolver<'a> { + // AST traversal + fn resolve(&mut self, program: &'a mut parser::Block) -> Result<(), Error> { + self.begin_scope(); + for stmt in program { + self.resolve_stmt(stmt)?; + } + self.end_scope(); + + Ok(()) + } + + fn resolve_stmt(&mut self, stmt: &'a mut Statement) -> Result<(), Error> { + match stmt { + Statement::Expr(expr) => self.resolve_expr(expr), + Statement::Print(expr) => self.resolve_expr(expr), + Statement::Var(var) => self.resolve_var(var), + Statement::Return(ret) => self.resolve_expr(&mut ret.value), + Statement::Block(block) => self.resolve(block), + + Statement::If(if_stmt) => { + self.resolve_expr(&mut if_stmt.condition)?; + self.resolve_stmt(&mut if_stmt.then_branch)?; + + if let Some(branch) = if_stmt.else_branch.as_mut() { + self.resolve_stmt(branch)?; + } + + Ok(()) + } + + Statement::While(while_stmt) => { + self.resolve_expr(&mut while_stmt.condition)?; + self.resolve_stmt(&mut while_stmt.body) + } + + Statement::Function(func) => match Rc::get_mut(func) { + Some(func) => self.resolve_function(func), + // The resolver does not clone references, so unless + // the interpreter is called before the resolver this + // case should never happen. + None => return Err(Error { + line: 0, + kind: ErrorKind::InternalError( + "multiple function references before interpretation" + .into(), + ), + }), + }, + } + } + + fn resolve_var(&mut self, var: &'a mut parser::Var) -> Result<(), Error> { + self.declare(&var.name.lexeme); + + if let Some(init) = &mut var.initialiser { + self.resolve_expr(init)?; + } + + self.define(&var.name.lexeme); + + Ok(()) + } + + fn resolve_function( + &mut self, + func: &'a mut parser::Function, + ) -> Result<(), Error> { + self.declare(&func.name.lexeme); + self.define(&func.name.lexeme); + + self.begin_scope(); + + for param in &func.params { + self.declare(¶m.lexeme); + self.define(¶m.lexeme); + } + + for stmt in &mut func.body { + self.resolve_stmt(stmt)?; + } + + self.end_scope(); + + Ok(()) + } + + fn resolve_expr(&mut self, expr: &'a mut Expr) -> Result<(), Error> { + match expr { + Expr::Variable(var) => self.resolve_variable(var), + Expr::Assign(assign) => self.resolve_assign(assign), + Expr::Grouping(grouping) => self.resolve_expr(&mut grouping.0), + Expr::Call(call) => self.resolve_call(call), + Expr::Literal(_) => Ok(()), + Expr::Unary(unary) => self.resolve_expr(&mut unary.right), + + Expr::Logical(log) => { + self.resolve_expr(&mut log.left)?; + self.resolve_expr(&mut log.right) + } + + Expr::Binary(binary) => { + self.resolve_expr(&mut binary.left)?; + self.resolve_expr(&mut binary.right) + } + } + } + + fn resolve_variable( + &mut self, + var: &'a mut parser::Variable, + ) -> Result<(), Error> { + if let Some(scope) = self.scopes.last_mut() { + if let Some(false) = scope.get(var.name.lexeme.as_str()) { + return Err(Error { + line: var.name.line, + kind: ErrorKind::StaticError( + "can't read local variable in its own initialiser" + .into(), + ), + }); + } + } + + var.depth = self.resolve_local(&var.name); + Ok(()) + } + + fn resolve_assign( + &mut self, + assign: &'a mut parser::Assign, + ) -> Result<(), Error> { + self.resolve_expr(&mut assign.value)?; + assign.depth = self.resolve_local(&assign.name); + Ok(()) + } + + fn resolve_local(&mut self, name: &'a Token) -> Option<usize> { + for (c, scope) in self.scopes.iter().rev().enumerate() { + if scope.contains_key(name.lexeme.as_str()) { + return Some(c); + } + } + + None + } + + fn resolve_call( + &mut self, + call: &'a mut parser::Call, + ) -> Result<(), Error> { + self.resolve_expr(&mut call.callee)?; + + for arg in call.args.iter_mut() { + self.resolve_expr(arg)?; + } + + Ok(()) + } + + // Internal helpers + + fn declare(&mut self, name: &'a str) { + if let Some(scope) = self.scopes.last_mut() { + scope.insert(&name, false); + } + } + + fn define(&mut self, name: &'a str) { + if let Some(scope) = self.scopes.last_mut() { + scope.insert(&name, true); + } + } + + fn begin_scope(&mut self) { + self.scopes.push(Default::default()); + } + + fn end_scope(&mut self) { + self.scopes.pop(); + } +} + +pub fn resolve( + globals: &[String], + block: &mut parser::Block, +) -> Result<(), Error> { + let mut resolver: Resolver = Default::default(); + + // Scope for static globals only starts, never ends. + resolver.begin_scope(); + for global in globals { + resolver.define(global); + } + + resolver.resolve(block) +} |