diff options
Diffstat (limited to 'users/tazjin/rlox/src/bytecode')
-rw-r--r-- | users/tazjin/rlox/src/bytecode/chunk.rs | 93 | ||||
-rw-r--r-- | users/tazjin/rlox/src/bytecode/compiler.rs | 737 | ||||
-rw-r--r-- | users/tazjin/rlox/src/bytecode/errors.rs | 51 | ||||
-rw-r--r-- | users/tazjin/rlox/src/bytecode/interner/mod.rs | 87 | ||||
-rw-r--r-- | users/tazjin/rlox/src/bytecode/interner/tests.rs | 24 | ||||
-rw-r--r-- | users/tazjin/rlox/src/bytecode/mod.rs | 33 | ||||
-rw-r--r-- | users/tazjin/rlox/src/bytecode/opcode.rs | 56 | ||||
-rw-r--r-- | users/tazjin/rlox/src/bytecode/tests.rs | 152 | ||||
-rw-r--r-- | users/tazjin/rlox/src/bytecode/value.rs | 37 | ||||
-rw-r--r-- | users/tazjin/rlox/src/bytecode/vm.rs | 277 |
10 files changed, 1547 insertions, 0 deletions
diff --git a/users/tazjin/rlox/src/bytecode/chunk.rs b/users/tazjin/rlox/src/bytecode/chunk.rs new file mode 100644 index 000000000000..fc5cd34fdf4f --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/chunk.rs @@ -0,0 +1,93 @@ +use std::ops::Index; + +use super::opcode::{CodeIdx, ConstantIdx, OpCode}; +use super::value; + +// In the book, this type is a hand-rolled dynamic array +// implementation in C. The main benefit of following that approach +// would be avoiding issues with OpCode variants not having equal +// sizes, but for the purpose of this I'm going to ignore that +// problem. +#[derive(Debug, Default)] +pub struct Chunk { + pub code: Vec<OpCode>, + lines: Vec<Span>, + constants: Vec<value::Value>, +} + +#[derive(Debug)] +struct Span { + /// Source code line + line: usize, + + /// Number of instructions derived from this line + count: usize, +} + +impl Chunk { + pub fn add_op(&mut self, data: OpCode, line: usize) -> CodeIdx { + let idx = self.code.len(); + self.code.push(data); + self.add_line(line); + CodeIdx(idx) + } + + pub fn add_constant(&mut self, data: value::Value) -> usize { + let idx = self.constants.len(); + self.constants.push(data); + idx + } + + pub fn constant(&self, idx: ConstantIdx) -> &value::Value { + self.constants.index(idx.0) + } + + fn add_line(&mut self, line: usize) { + match self.lines.last_mut() { + Some(span) if span.line == line => span.count += 1, + _ => self.lines.push(Span { line, count: 1 }), + } + } + + pub fn get_line(&self, offset: usize) -> usize { + let mut pos = 0; + for span in &self.lines { + pos += span.count; + if pos > offset { + return span.line; + } + } + + panic!("invalid chunk state: line missing for offset {}", offset); + } +} + +// Disassembler + +/// Print a single disassembled instruction at the specified offset. +/// Some instructions are printed "raw", others have special handling. +#[cfg(feature = "disassemble")] +pub fn disassemble_instruction(chunk: &Chunk, offset: usize) { + print!("{:04} ", offset); + + let line = chunk.get_line(offset); + if offset > 0 && line == chunk.get_line(offset - 1) { + print!(" | "); + } else { + print!("{:4} ", line); + } + + match chunk.code.index(offset) { + OpCode::OpConstant(idx) => { + println!("OpConstant({:?}) '{:?}'", idx, chunk.constant(*idx)) + } + op => println!("{:?}", op), + } +} + +#[cfg(feature = "disassemble")] +pub fn disassemble_chunk(chunk: &Chunk) { + for (idx, _) in chunk.code.iter().enumerate() { + disassemble_instruction(chunk, idx); + } +} diff --git a/users/tazjin/rlox/src/bytecode/compiler.rs b/users/tazjin/rlox/src/bytecode/compiler.rs new file mode 100644 index 000000000000..3e8a80653f91 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/compiler.rs @@ -0,0 +1,737 @@ +use super::chunk::Chunk; +use super::errors::{Error, ErrorKind, LoxResult}; +use super::interner::{InternedStr, Interner}; +use super::opcode::{CodeIdx, CodeOffset, ConstantIdx, OpCode, StackIdx}; +use super::value::Value; +use crate::scanner::{self, Token, TokenKind}; + +#[cfg(feature = "disassemble")] +use super::chunk; + +#[derive(Debug)] +enum Depth { + Unitialised, + At(usize), +} + +impl Depth { + fn above(&self, theirs: usize) -> bool { + match self { + Depth::Unitialised => false, + Depth::At(ours) => *ours > theirs, + } + } + + fn below(&self, theirs: usize) -> bool { + match self { + Depth::Unitialised => false, + Depth::At(ours) => *ours < theirs, + } + } +} + +#[derive(Debug)] +struct Local { + name: Token, + depth: Depth, +} + +#[derive(Debug, Default)] +struct Locals { + locals: Vec<Local>, + scope_depth: usize, +} + +struct Compiler<T: Iterator<Item = Token>> { + tokens: T, + chunk: Chunk, + panic: bool, + errors: Vec<Error>, + strings: Interner, + locals: Locals, + + current: Option<Token>, + previous: Option<Token>, +} + +#[derive(Debug, PartialEq, PartialOrd)] +enum Precedence { + None, + Assignment, // = + Or, // or + And, // and + Equality, // == != + Comparison, // < > <= >= + Term, // + - + Factor, // * / + Unary, // ! - + Call, // . () + Primary, +} + +type ParseFn<T> = fn(&mut Compiler<T>) -> LoxResult<()>; + +struct ParseRule<T: Iterator<Item = Token>> { + prefix: Option<ParseFn<T>>, + infix: Option<ParseFn<T>>, + precedence: Precedence, +} + +impl<T: Iterator<Item = Token>> ParseRule<T> { + fn new( + prefix: Option<ParseFn<T>>, + infix: Option<ParseFn<T>>, + precedence: Precedence, + ) -> Self { + ParseRule { + prefix, + infix, + precedence, + } + } +} + +impl Precedence { + // Return the next highest precedence, if there is one. + fn next(&self) -> Self { + match self { + Precedence::None => Precedence::Assignment, + Precedence::Assignment => Precedence::Or, + Precedence::Or => Precedence::And, + Precedence::And => Precedence::Equality, + Precedence::Equality => Precedence::Comparison, + Precedence::Comparison => Precedence::Term, + Precedence::Term => Precedence::Factor, + Precedence::Factor => Precedence::Unary, + Precedence::Unary => Precedence::Call, + Precedence::Call => Precedence::Primary, + Precedence::Primary => panic!( + "invalid parser state: no higher precedence than Primary" + ), + } + } +} + +fn rule_for<T: Iterator<Item = Token>>(token: &TokenKind) -> ParseRule<T> { + match token { + TokenKind::LeftParen => { + ParseRule::new(Some(Compiler::grouping), None, Precedence::None) + } + + TokenKind::Minus => ParseRule::new( + Some(Compiler::unary), + Some(Compiler::binary), + Precedence::Term, + ), + + TokenKind::Plus => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Term) + } + + TokenKind::Slash => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Factor) + } + + TokenKind::Star => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Factor) + } + + TokenKind::Number(_) => { + ParseRule::new(Some(Compiler::number), None, Precedence::None) + } + + TokenKind::True => { + ParseRule::new(Some(Compiler::literal), None, Precedence::None) + } + + TokenKind::False => { + ParseRule::new(Some(Compiler::literal), None, Precedence::None) + } + + TokenKind::Nil => { + ParseRule::new(Some(Compiler::literal), None, Precedence::None) + } + + TokenKind::Bang => { + ParseRule::new(Some(Compiler::unary), None, Precedence::None) + } + + TokenKind::BangEqual => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Equality) + } + + TokenKind::EqualEqual => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Equality) + } + + TokenKind::Greater => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison) + } + + TokenKind::GreaterEqual => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison) + } + + TokenKind::Less => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison) + } + + TokenKind::LessEqual => { + ParseRule::new(None, Some(Compiler::binary), Precedence::Comparison) + } + + TokenKind::Identifier(_) => { + ParseRule::new(Some(Compiler::variable), None, Precedence::None) + } + + TokenKind::String(_) => { + ParseRule::new(Some(Compiler::string), None, Precedence::None) + } + + _ => ParseRule::new(None, None, Precedence::None), + } +} + +macro_rules! consume { + ( $self:ident, $expected:pat, $err:expr ) => { + match $self.current().kind { + $expected => $self.advance(), + _ => $self.error_at($self.current().line, $err), + } + }; +} + +impl<T: Iterator<Item = Token>> Compiler<T> { + fn compile(&mut self) -> LoxResult<()> { + self.advance(); + + while !self.match_token(&TokenKind::Eof) { + self.declaration()?; + } + + self.end_compiler() + } + + fn advance(&mut self) { + self.previous = self.current.take(); + self.current = self.tokens.next(); + } + + fn expression(&mut self) -> LoxResult<()> { + self.parse_precedence(Precedence::Assignment) + } + + fn var_declaration(&mut self) -> LoxResult<()> { + let idx = self.parse_variable()?; + + if self.match_token(&TokenKind::Equal) { + self.expression()?; + } else { + self.emit_op(OpCode::OpNil); + } + + self.expect_semicolon("expect ';' after variable declaration")?; + self.define_variable(idx) + } + + fn define_variable(&mut self, var: Option<ConstantIdx>) -> LoxResult<()> { + if self.locals.scope_depth == 0 { + self.emit_op(OpCode::OpDefineGlobal( + var.expect("should be global"), + )); + } else { + self.locals + .locals + .last_mut() + .expect("fatal: variable not yet added at definition") + .depth = Depth::At(self.locals.scope_depth); + } + + Ok(()) + } + + fn declaration(&mut self) -> LoxResult<()> { + if self.match_token(&TokenKind::Var) { + self.var_declaration()?; + } else { + self.statement()?; + } + + if self.panic { + self.synchronise(); + } + + Ok(()) + } + + fn statement(&mut self) -> LoxResult<()> { + if self.match_token(&TokenKind::Print) { + self.print_statement() + } else if self.match_token(&TokenKind::If) { + self.if_statement() + } else if self.match_token(&TokenKind::LeftBrace) { + self.begin_scope(); + self.block()?; + self.end_scope(); + Ok(()) + } else { + self.expression_statement() + } + } + + fn print_statement(&mut self) -> LoxResult<()> { + self.expression()?; + self.expect_semicolon("expect ';' after print statement")?; + self.emit_op(OpCode::OpPrint); + Ok(()) + } + + fn begin_scope(&mut self) { + self.locals.scope_depth += 1; + } + + fn end_scope(&mut self) { + debug_assert!(self.locals.scope_depth > 0, "tried to end global scope"); + self.locals.scope_depth -= 1; + + while self.locals.locals.len() > 0 + && self.locals.locals[self.locals.locals.len() - 1] + .depth + .above(self.locals.scope_depth) + { + self.emit_op(OpCode::OpPop); + self.locals.locals.remove(self.locals.locals.len() - 1); + } + } + + fn block(&mut self) -> LoxResult<()> { + while !self.check(&TokenKind::RightBrace) + && !self.check(&TokenKind::Eof) + { + self.declaration()?; + } + + consume!( + self, + TokenKind::RightBrace, + ErrorKind::ExpectedToken("Expected '}' after block.") + ); + Ok(()) + } + + fn expression_statement(&mut self) -> LoxResult<()> { + self.expression()?; + self.expect_semicolon("expect ';' after expression")?; + // TODO(tazjin): Why did I add this originally? + // self.emit_op(OpCode::OpPop); + Ok(()) + } + + fn if_statement(&mut self) -> LoxResult<()> { + consume!( + self, + TokenKind::LeftParen, + ErrorKind::ExpectedToken("Expected '(' after 'if'") + ); + + self.expression()?; + + consume!( + self, + TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after condition") + ); + + let then_jump = self.emit_op(OpCode::OpJumpPlaceholder(false)); + self.emit_op(OpCode::OpPop); + self.statement()?; + let else_jump = self.emit_op(OpCode::OpJumpPlaceholder(true)); + self.patch_jump(then_jump); + self.emit_op(OpCode::OpPop); + + if self.match_token(&TokenKind::Else) { + self.statement()?; + } + + self.patch_jump(else_jump); + + Ok(()) + } + + fn number(&mut self) -> LoxResult<()> { + if let TokenKind::Number(num) = self.previous().kind { + self.emit_constant(Value::Number(num), true); + return Ok(()); + } + + unreachable!("internal parser error: entered number() incorrectly") + } + + fn grouping(&mut self) -> LoxResult<()> { + self.expression()?; + consume!( + self, + TokenKind::RightParen, + ErrorKind::ExpectedToken("Expected ')' after expression") + ); + Ok(()) + } + + fn unary(&mut self) -> LoxResult<()> { + // TODO(tazjin): Avoid clone + let kind = self.previous().kind.clone(); + + // Compile the operand + self.parse_precedence(Precedence::Unary)?; + + // Emit operator instruction + match kind { + TokenKind::Bang => self.emit_op(OpCode::OpNot), + TokenKind::Minus => self.emit_op(OpCode::OpNegate), + _ => unreachable!("only called for unary operator tokens"), + }; + + Ok(()) + } + + fn binary(&mut self) -> LoxResult<()> { + // Remember the operator + let operator = self.previous().kind.clone(); + + // Compile the right operand + let rule: ParseRule<T> = rule_for(&operator); + self.parse_precedence(rule.precedence.next())?; + + // Emit operator instruction + match operator { + TokenKind::Minus => self.emit_op(OpCode::OpSubtract), + TokenKind::Plus => self.emit_op(OpCode::OpAdd), + TokenKind::Star => self.emit_op(OpCode::OpMultiply), + TokenKind::Slash => self.emit_op(OpCode::OpDivide), + + TokenKind::BangEqual => { + self.emit_op(OpCode::OpEqual); + self.emit_op(OpCode::OpNot) + } + + TokenKind::EqualEqual => self.emit_op(OpCode::OpEqual), + TokenKind::Greater => self.emit_op(OpCode::OpGreater), + + TokenKind::GreaterEqual => { + self.emit_op(OpCode::OpLess); + self.emit_op(OpCode::OpNot) + } + + TokenKind::Less => self.emit_op(OpCode::OpLess), + TokenKind::LessEqual => { + self.emit_op(OpCode::OpGreater); + self.emit_op(OpCode::OpNot) + } + + _ => unreachable!("only called for binary operator tokens"), + }; + + Ok(()) + } + + fn literal(&mut self) -> LoxResult<()> { + match self.previous().kind { + TokenKind::Nil => self.emit_op(OpCode::OpNil), + TokenKind::True => self.emit_op(OpCode::OpTrue), + TokenKind::False => self.emit_op(OpCode::OpFalse), + _ => unreachable!("only called for literal value tokens"), + }; + + Ok(()) + } + + fn string(&mut self) -> LoxResult<()> { + let val = match &self.previous().kind { + TokenKind::String(s) => s.clone(), + _ => unreachable!("only called for strings"), + }; + + let id = self.strings.intern(val); + self.emit_constant(Value::String(id.into()), true); + + Ok(()) + } + + fn named_variable(&mut self, name: Token) -> LoxResult<()> { + let local_idx = self.resolve_local(&name); + + let ident = if local_idx.is_some() { + None + } else { + Some(self.identifier_constant(&name)?) + }; + + if self.match_token(&TokenKind::Equal) { + self.expression()?; + match local_idx { + Some(idx) => self.emit_op(OpCode::OpSetLocal(idx)), + None => self.emit_op(OpCode::OpSetGlobal(ident.unwrap())), + }; + } else { + match local_idx { + Some(idx) => self.emit_op(OpCode::OpGetLocal(idx)), + None => self.emit_op(OpCode::OpGetGlobal(ident.unwrap())), + }; + } + + Ok(()) + } + + fn variable(&mut self) -> LoxResult<()> { + let name = self.previous().clone(); + self.named_variable(name) + } + + fn parse_precedence(&mut self, precedence: Precedence) -> LoxResult<()> { + self.advance(); + let rule: ParseRule<T> = rule_for(&self.previous().kind); + let prefix_fn = match rule.prefix { + None => unimplemented!("expected expression or something, unclear"), + Some(func) => func, + }; + + prefix_fn(self)?; + + while precedence <= rule_for::<T>(&self.current().kind).precedence { + self.advance(); + match rule_for::<T>(&self.previous().kind).infix { + Some(func) => { + func(self)?; + } + None => { + unreachable!("invalid compiler state: error in parse rules") + } + } + } + + Ok(()) + } + + fn identifier_str(&mut self, token: &Token) -> LoxResult<InternedStr> { + let ident = match &token.kind { + TokenKind::Identifier(ident) => ident.to_string(), + _ => { + return Err(Error { + line: self.current().line, + kind: ErrorKind::ExpectedToken("Expected identifier"), + }) + } + }; + + Ok(self.strings.intern(ident)) + } + + fn identifier_constant(&mut self, name: &Token) -> LoxResult<ConstantIdx> { + let ident = self.identifier_str(name)?; + Ok(self.emit_constant(Value::String(ident.into()), false)) + } + + fn resolve_local(&self, name: &Token) -> Option<StackIdx> { + for (idx, local) in self.locals.locals.iter().enumerate().rev() { + if name.lexeme == local.name.lexeme { + if let Depth::Unitialised = local.depth { + // TODO(tazjin): *return* err + panic!("can't read variable in its own initialiser"); + } + return Some(StackIdx(idx)); + } + } + + None + } + + fn add_local(&mut self, name: Token) { + let local = Local { + name, + depth: Depth::Unitialised, + }; + + self.locals.locals.push(local); + } + + fn declare_variable(&mut self) -> LoxResult<()> { + if self.locals.scope_depth == 0 { + return Ok(()); + } + + let name = self.previous().clone(); + + for local in self.locals.locals.iter().rev() { + if local.depth.below(self.locals.scope_depth) { + break; + } + + if name.lexeme == local.name.lexeme { + return Err(Error { + kind: ErrorKind::VariableShadowed(name.lexeme.into()), + line: name.line, + }); + } + } + + self.add_local(name); + Ok(()) + } + + fn parse_variable(&mut self) -> LoxResult<Option<ConstantIdx>> { + consume!( + self, + TokenKind::Identifier(_), + ErrorKind::ExpectedToken("expected identifier") + ); + + self.declare_variable()?; + if self.locals.scope_depth > 0 { + return Ok(None); + } + + let name = self.previous().clone(); + let id = self.identifier_str(&name)?; + Ok(Some(self.emit_constant(Value::String(id.into()), false))) + } + + fn current_chunk(&mut self) -> &mut Chunk { + &mut self.chunk + } + + fn end_compiler(&mut self) -> LoxResult<()> { + self.emit_op(OpCode::OpReturn); + + #[cfg(feature = "disassemble")] + { + chunk::disassemble_chunk(&self.chunk); + println!("== compilation finished =="); + } + + Ok(()) + } + + fn emit_op(&mut self, op: OpCode) -> CodeIdx { + let line = self.previous().line; + self.current_chunk().add_op(op, line) + } + + fn emit_constant(&mut self, val: Value, with_op: bool) -> ConstantIdx { + let idx = ConstantIdx(self.chunk.add_constant(val)); + + if with_op { + self.emit_op(OpCode::OpConstant(idx)); + } + + idx + } + + fn patch_jump(&mut self, idx: CodeIdx) { + let offset = CodeOffset(self.chunk.code.len() - idx.0 - 1); + + if let OpCode::OpJumpPlaceholder(true) = self.chunk.code[idx.0] { + self.chunk.code[idx.0] = OpCode::OpJump(offset); + return; + } + + if let OpCode::OpJumpPlaceholder(false) = self.chunk.code[idx.0] { + self.chunk.code[idx.0] = OpCode::OpJumpIfFalse(offset); + return; + } + + panic!( + "attempted to patch unsupported op: {:?}", + self.chunk.code[idx.0] + ); + } + + fn previous(&self) -> &Token { + self.previous + .as_ref() + .expect("invalid internal compiler state: missing previous token") + } + + fn current(&self) -> &Token { + self.current + .as_ref() + .expect("invalid internal compiler state: missing current token") + } + + fn error_at(&mut self, line: usize, kind: ErrorKind) { + if self.panic { + return; + } + + self.panic = true; + self.errors.push(Error { kind, line }) + } + + fn match_token(&mut self, token: &TokenKind) -> bool { + if !self.check(token) { + return false; + } + + self.advance(); + true + } + + fn check(&self, token: &TokenKind) -> bool { + return self.current().kind == *token; + } + + fn synchronise(&mut self) { + self.panic = false; + + while self.current().kind != TokenKind::Eof { + if self.previous().kind == TokenKind::Semicolon { + return; + } + + match self.current().kind { + TokenKind::Class + | TokenKind::Fun + | TokenKind::Var + | TokenKind::For + | TokenKind::If + | TokenKind::While + | TokenKind::Print + | TokenKind::Return => return, + + _ => { + self.advance(); + } + } + } + } + + fn expect_semicolon(&mut self, msg: &'static str) -> LoxResult<()> { + consume!(self, TokenKind::Semicolon, ErrorKind::ExpectedToken(msg)); + Ok(()) + } +} + +pub fn compile(code: &str) -> Result<(Interner, Chunk), Vec<Error>> { + let chars = code.chars().collect::<Vec<char>>(); + let tokens = scanner::scan(&chars).map_err(|errors| { + errors.into_iter().map(Into::into).collect::<Vec<Error>>() + })?; + + let mut compiler = Compiler { + tokens: tokens.into_iter().peekable(), + chunk: Default::default(), + panic: false, + errors: vec![], + strings: Interner::with_capacity(1024), + locals: Default::default(), + current: None, + previous: None, + }; + + compiler.compile()?; + + if compiler.errors.is_empty() { + Ok((compiler.strings, compiler.chunk)) + } else { + Err(compiler.errors) + } +} diff --git a/users/tazjin/rlox/src/bytecode/errors.rs b/users/tazjin/rlox/src/bytecode/errors.rs new file mode 100644 index 000000000000..988031f763cf --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/errors.rs @@ -0,0 +1,51 @@ +use crate::scanner::ScannerError; + +use std::fmt; + +#[derive(Debug)] +pub enum ErrorKind { + UnexpectedChar(char), + UnterminatedString, + ExpectedToken(&'static str), + InternalError(&'static str), + TypeError(String), + VariableShadowed(String), +} + +#[derive(Debug)] +pub struct Error { + pub kind: ErrorKind, + pub line: usize, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[line NYI] Error: {:?}", self.kind) + } +} + +impl From<ScannerError> for Error { + fn from(err: ScannerError) -> Self { + match err { + ScannerError::UnexpectedChar { line, unexpected } => Error { + line, + kind: ErrorKind::UnexpectedChar(unexpected), + }, + + ScannerError::UnterminatedString { line } => Error { + line, + kind: ErrorKind::UnterminatedString, + }, + } + } +} + +// Convenience implementation as we're often dealing with vectors of +// errors (to report as many issues as possible before terminating) +impl From<Error> for Vec<Error> { + fn from(err: Error) -> Self { + vec![err] + } +} + +pub type LoxResult<T> = Result<T, Error>; diff --git a/users/tazjin/rlox/src/bytecode/interner/mod.rs b/users/tazjin/rlox/src/bytecode/interner/mod.rs new file mode 100644 index 000000000000..1da1a24b2c5f --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/interner/mod.rs @@ -0,0 +1,87 @@ +//! String-interning implementation for values that are likely to +//! benefit from fast comparisons and deduplication (e.g. instances of +//! variable names). +//! +//! This uses a trick from the typed-arena crate for guaranteeing +//! stable addresses by never resizing the existing String buffer, and +//! collecting full buffers in a vector. + +use std::collections::HashMap; + +#[cfg(test)] +mod tests; + +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub struct InternedStr { + id: usize, +} + +#[derive(Default)] +pub struct Interner { + map: HashMap<&'static str, InternedStr>, + vec: Vec<&'static str>, + buf: String, + full: Vec<String>, +} + +impl Interner { + pub fn with_capacity(cap: usize) -> Self { + Interner { + buf: String::with_capacity(cap), + ..Default::default() + } + } + + pub fn intern<S: AsRef<str>>(&mut self, name: S) -> InternedStr { + let name = name.as_ref(); + if let Some(&id) = self.map.get(name) { + return id; + } + + let name = self.alloc(name); + let id = InternedStr { + id: self.vec.len() as usize, + }; + + self.map.insert(name, id); + self.vec.push(name); + + debug_assert!(self.lookup(id) == name); + debug_assert!(self.intern(name) == id); + + id + } + + pub fn lookup<'a>(&'a self, id: InternedStr) -> &'a str { + self.vec[id.id] + } + + fn alloc<'a>(&'a mut self, name: &str) -> &'static str { + let cap = self.buf.capacity(); + if cap < self.buf.len() + name.len() { + let new_cap = (cap.max(name.len()) + 1).next_power_of_two(); + let new_buf = String::with_capacity(new_cap); + let old_buf = std::mem::replace(&mut self.buf, new_buf); + self.full.push(old_buf); + } + + let interned: &'a str = { + let start = self.buf.len(); + self.buf.push_str(name); + &self.buf[start..] + }; + + unsafe { + // This is sound for two reasons: + // + // 1. This function (Interner::alloc) is private, which + // prevents users from allocating a supposedly static + // reference. + // + // 2. Interner::lookup explicitly shortens the lifetime of + // references that are handed out to that of the + // reference to self. + return &*(interned as *const str); + } + } +} diff --git a/users/tazjin/rlox/src/bytecode/interner/tests.rs b/users/tazjin/rlox/src/bytecode/interner/tests.rs new file mode 100644 index 000000000000..b34bf6835389 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/interner/tests.rs @@ -0,0 +1,24 @@ +use super::*; + +#[test] +fn interns_strings() { + let mut interner = Interner::with_capacity(128); + let id = interner.intern("hello world"); + assert_eq!("hello world", interner.lookup(id)); +} + +#[test] +fn deduplicates_strings() { + let mut interner = Interner::with_capacity(128); + let id_1 = interner.intern("hello world"); + let id_2 = interner.intern("hello world"); + assert_eq!(id_1, id_2); +} + +#[test] +fn ids_survive_growing() { + let mut interner = Interner::with_capacity(16); + let id = interner.intern("hello"); + interner.intern("excessively large string that will cause eallocation"); + assert_eq!("hello", interner.lookup(id)); +} diff --git a/users/tazjin/rlox/src/bytecode/mod.rs b/users/tazjin/rlox/src/bytecode/mod.rs new file mode 100644 index 000000000000..c6f3a737aef8 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/mod.rs @@ -0,0 +1,33 @@ +//! Bytecode interpreter for Lox. +//! +//! https://craftinginterpreters.com/chunks-of-bytecode.html + +mod chunk; +mod compiler; +mod errors; +mod interner; +mod opcode; +mod value; +mod vm; + +#[cfg(test)] +mod tests; + +pub struct Interpreter {} + +impl crate::Lox for Interpreter { + type Error = errors::Error; + type Value = value::Value; + + fn create() -> Self { + Interpreter {} + } + + fn interpret( + &mut self, + code: String, + ) -> Result<Self::Value, Vec<Self::Error>> { + let (strings, chunk) = compiler::compile(&code)?; + vm::interpret(strings, chunk).map_err(|e| vec![e]) + } +} diff --git a/users/tazjin/rlox/src/bytecode/opcode.rs b/users/tazjin/rlox/src/bytecode/opcode.rs new file mode 100644 index 000000000000..8a106f96917d --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/opcode.rs @@ -0,0 +1,56 @@ +#[derive(Clone, Copy, Debug)] +pub struct ConstantIdx(pub usize); + +#[derive(Clone, Copy, Debug)] +pub struct StackIdx(pub usize); + +#[derive(Clone, Copy, Debug)] +pub struct CodeIdx(pub usize); + +#[derive(Clone, Copy, Debug)] +pub struct CodeOffset(pub usize); + +#[derive(Debug)] +pub enum OpCode { + /// Push a constant onto the stack. + OpConstant(ConstantIdx), + + // Literal pushes + OpNil, + OpTrue, + OpFalse, + + /// Return from the current function. + OpReturn, + + // Boolean & comparison operators + OpNot, + OpEqual, + OpGreater, + OpLess, + + /// Unary negation + OpNegate, + + // Arithmetic operators + OpAdd, + OpSubtract, + OpMultiply, + OpDivide, + + // Built in operations + OpPrint, + OpPop, + + // Variable management + OpDefineGlobal(ConstantIdx), + OpGetGlobal(ConstantIdx), + OpSetGlobal(ConstantIdx), + OpGetLocal(StackIdx), + OpSetLocal(StackIdx), + + // Control flow + OpJumpPlaceholder(bool), + OpJump(CodeOffset), + OpJumpIfFalse(CodeOffset), +} diff --git a/users/tazjin/rlox/src/bytecode/tests.rs b/users/tazjin/rlox/src/bytecode/tests.rs new file mode 100644 index 000000000000..bc7d6cb878f8 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/tests.rs @@ -0,0 +1,152 @@ +use super::value::Value; +use super::*; + +use crate::Lox; + +fn expect(code: &str, value: Value) { + let result = Interpreter::create() + .interpret(code.into()) + .expect("evaluation failed"); + assert_eq!(result, value); +} + +fn expect_num(code: &str, value: f64) { + expect(code, Value::Number(value)) +} + +fn expect_bool(code: &str, value: bool) { + expect(code, Value::Bool(value)) +} + +fn expect_str(code: &str, value: &str) { + expect(code, Value::String(value.to_string().into())) +} + +#[test] +fn numbers() { + expect_num("1;", 1.0); + expect_num("13.37;", 13.37); +} + +#[test] +fn negative_numbers() { + // Note: This technically tests unary operators. + expect_num("-1;", -1.0); + expect_num("-13.37;", -13.37); +} + +#[test] +fn terms() { + expect_num("1 + 2;", 3.0); + expect_num("3 - 1;", 2.0); + expect_num("0.7 + 0.3;", 1.0); + expect_num("1 + -3;", -2.0); + expect_num("-1 - -1;", 0.0); + expect_num("10 - -10 + 10;", 30.0); +} + +#[test] +fn factors() { + expect_num("1 * 2;", 2.0); + expect_num("10 / 5;", 2.0); + expect_num("0.7 * 4 / 1.4;", 2.0); + expect_num("10 * -10 / 10;", -10.0); +} + +#[test] +fn arithmetic() { + expect_num("10 - 3 * 2;", 4.0); + expect_num("-4 * -4 + (14 - 5);", 25.0); + expect_num("(702 + 408) - ((239 - 734) / -5) + -4;", 1007.0); +} + +#[test] +fn trivial_literals() { + expect("true;", Value::Bool(true)); + expect("false;", Value::Bool(false)); + expect("nil;", Value::Nil); +} + +#[test] +fn negation() { + expect_bool("!true;", false); + expect_bool("!false;", true); + expect_bool("!nil;", true); + expect_bool("!13.5;", false); + expect_bool("!-42;", false); +} + +#[test] +fn equality() { + expect_bool("42 == 42;", true); + expect_bool("42 != 42;", false); + expect_bool("42 == 42.0;", true); + + expect_bool("true == true;", true); + expect_bool("true == false;", false); + expect_bool("true == !false;", true); + expect_bool("true != true;", false); + expect_bool("true != false;", true); + + expect_bool("42 == false;", false); + expect_bool("42 == true;", false); + expect_bool("!42 == !true;", true); +} + +#[test] +fn comparisons() { + expect_bool("42 > 23;", true); + expect_bool("42 < 23;", false); + expect_bool("42 <= 42;", true); + expect_bool("42 <= 23;", false); + expect_bool("42 >= 42;", true); + expect_bool("42 >= 23;", true); +} + +#[test] +fn strings() { + expect_str("\"hello\";", "hello"); + expect_str("\"hello\" + \" world\";", "hello world"); +} + +#[test] +fn global_variables() { + expect_num("var a = 5; a;", 5.0); + expect_num("var a = 5; var b = 2; a * b;", 10.0); + expect_str( + "var greeting = \"hello\"; var name = \"Zubnog\"; greeting + \" \" + name;", + "hello Zubnog", + ); +} + +#[test] +fn global_assignment() { + expect_str( + r#" + var breakfast = "beignets"; + var beverage = "cafe au lait"; + breakfast = "beignets with " + beverage; + breakfast; + "#, + "beignets with cafe au lait", + ); +} + +#[test] +fn local_variables() { + expect_num( + r#" + var a = 10; + var b = 5; + var result = 0; + { + var b = 10; + var c = 2; + result = a * b * c; + } + + result; + "#, + 200.0, + ); +} diff --git a/users/tazjin/rlox/src/bytecode/value.rs b/users/tazjin/rlox/src/bytecode/value.rs new file mode 100644 index 000000000000..4170efadf8fe --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/value.rs @@ -0,0 +1,37 @@ +use super::interner::InternedStr; + +#[derive(Clone, Debug, PartialEq)] +pub enum Value { + Nil, + Bool(bool), + Number(f64), + String(LoxString), +} + +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub enum LoxString { + Heap(String), + Interned(InternedStr), +} + +impl From<String> for LoxString { + fn from(s: String) -> Self { + LoxString::Heap(s) + } +} + +impl From<InternedStr> for LoxString { + fn from(s: InternedStr) -> Self { + LoxString::Interned(s) + } +} + +impl Value { + pub fn is_falsey(&self) -> bool { + match self { + Value::Nil => true, + Value::Bool(false) => true, + _ => false, + } + } +} diff --git a/users/tazjin/rlox/src/bytecode/vm.rs b/users/tazjin/rlox/src/bytecode/vm.rs new file mode 100644 index 000000000000..d287ec7cb8c5 --- /dev/null +++ b/users/tazjin/rlox/src/bytecode/vm.rs @@ -0,0 +1,277 @@ +use std::collections::HashMap; + +use super::chunk; +use super::errors::*; +use super::interner::Interner; +use super::opcode::OpCode; +use super::value::{LoxString, Value}; + +pub struct VM { + chunk: chunk::Chunk, + + // TODO(tazjin): Accessing array elements constantly is not ideal, + // lets see if something clever can be done with iterators. + ip: usize, + + stack: Vec<Value>, + strings: Interner, + + globals: HashMap<LoxString, Value>, + + // Operations that consume values from the stack without pushing + // anything leave their last value in this slot, which makes it + // possible to return values from interpreters that ran code which + // ended with a statement. + last_drop: Option<Value>, +} + +impl VM { + fn push(&mut self, value: Value) { + self.stack.push(value) + } + + fn pop(&mut self) -> Value { + self.stack.pop().expect("fatal error: stack empty!") + } +} + +macro_rules! with_type { + ( $self:ident, $val:ident, $type:pat, $body:expr ) => { + match $val { + $type => $body, + _ => { + return Err(Error { + line: $self.chunk.get_line($self.ip - 1), + kind: ErrorKind::TypeError(format!( + "Expected type {}, but found value: {:?}", + stringify!($type), + $val, + )), + }) + } + } + }; +} + +macro_rules! binary_op { + ( $vm:ident, $type:tt, $op:tt ) => { + binary_op!($vm, $type, $type, $op) + }; + + ( $vm:ident, $in_type:tt, $out_type:tt, $op:tt ) => {{ + let b = $vm.pop(); + let a = $vm.pop(); + + with_type!($vm, b, Value::$in_type(val_b), { + with_type!($vm, a, Value::$in_type(val_a), { + $vm.push(Value::$out_type(val_a $op val_b)) + }) + }) + }}; +} + +impl VM { + fn run(&mut self) -> LoxResult<Value> { + loop { + let op = &self.chunk.code[self.ip]; + + #[cfg(feature = "disassemble")] + chunk::disassemble_instruction(&self.chunk, self.ip); + + self.ip += 1; + + match op { + OpCode::OpReturn => { + if !self.stack.is_empty() { + let val = self.pop(); + return Ok(self.return_value(val)); + } else if self.last_drop.is_some() { + let val = self.last_drop.take().unwrap(); + return Ok(self.return_value(val)); + } else { + return Ok(Value::Nil); + } + } + + OpCode::OpConstant(idx) => { + let c = self.chunk.constant(*idx).clone(); + self.push(c); + } + + OpCode::OpNil => self.push(Value::Nil), + OpCode::OpTrue => self.push(Value::Bool(true)), + OpCode::OpFalse => self.push(Value::Bool(false)), + + OpCode::OpNot => { + let v = self.pop(); + self.push(Value::Bool(v.is_falsey())); + } + + OpCode::OpEqual => { + let b = self.pop(); + let a = self.pop(); + self.push(Value::Bool(a == b)); + } + + OpCode::OpLess => binary_op!(self, Number, Bool, <), + OpCode::OpGreater => binary_op!(self, Number, Bool, >), + + OpCode::OpNegate => { + let v = self.pop(); + with_type!( + self, + v, + Value::Number(num), + self.push(Value::Number(-num)) + ); + } + + OpCode::OpSubtract => binary_op!(self, Number, -), + OpCode::OpMultiply => binary_op!(self, Number, *), + OpCode::OpDivide => binary_op!(self, Number, /), + + OpCode::OpAdd => { + let b = self.pop(); + let a = self.pop(); + + match (a, b) { + (Value::String(s_a), Value::String(s_b)) => { + let mut new_s = self.resolve_str(&s_a).to_string(); + new_s.push_str(self.resolve_str(&s_b)); + self.push(Value::String(new_s.into())); + } + + (Value::Number(n_a), Value::Number(n_b)) => + self.push(Value::Number(n_a + n_b)), + + _ => return Err(Error { + line: self.chunk.get_line(self.ip - 1), + kind: ErrorKind::TypeError( + "'+' operator only works on strings and numbers".into() + ), + }) + } + } + + OpCode::OpPrint => { + let val = self.pop(); + println!("{}", self.print_value(val)); + } + + OpCode::OpPop => { + self.last_drop = Some(self.pop()); + } + + OpCode::OpDefineGlobal(name_idx) => { + let name = self.chunk.constant(*name_idx); + with_type!(self, name, Value::String(name), { + let name = name.clone(); + let val = self.pop(); + self.globals.insert(name, val); + }); + } + + OpCode::OpGetGlobal(name_idx) => { + let name = self.chunk.constant(*name_idx); + with_type!(self, name, Value::String(name), { + let val = match self.globals.get(name) { + None => unimplemented!("variable not found error"), + Some(val) => val.clone(), + }; + self.push(val) + }); + } + + OpCode::OpSetGlobal(name_idx) => { + let name = self.chunk.constant(*name_idx).clone(); + let new_val = self.pop(); + with_type!(self, name, Value::String(name), { + match self.globals.get_mut(&name) { + None => unimplemented!("variable not found error"), + Some(val) => { + *val = new_val; + } + } + }); + } + + OpCode::OpGetLocal(local_idx) => { + let value = self.stack[local_idx.0].clone(); + self.push(value); + } + + OpCode::OpSetLocal(local_idx) => { + debug_assert!( + self.stack.len() > local_idx.0, + "stack is not currently large enough for local" + ); + self.stack[local_idx.0] = + self.stack.last().unwrap().clone(); + } + + OpCode::OpJumpPlaceholder(_) => { + panic!("unpatched jump detected - this is a fatal compiler error!"); + } + + OpCode::OpJump(offset) => { + self.ip += offset.0; + } + + OpCode::OpJumpIfFalse(offset) => { + if self + .stack + .last() + .expect("condition should leave a value on the stack") + .is_falsey() + { + self.ip += offset.0; + } + } + } + + #[cfg(feature = "disassemble")] + println!("=> {:?}", self.stack); + } + } + + // For some types of values (e.g. interned strings), returns + // should no longer include any references into the interpreter. + fn return_value(&self, val: Value) -> Value { + match val { + Value::String(string @ LoxString::Interned(_)) => { + Value::String(self.resolve_str(&string).to_string().into()) + } + _ => val, + } + } + + fn resolve_str<'a>(&'a self, string: &'a LoxString) -> &'a str { + match string { + LoxString::Heap(s) => s.as_str(), + LoxString::Interned(id) => self.strings.lookup(*id), + } + } + + fn print_value(&self, val: Value) -> String { + match val { + Value::String(LoxString::Heap(s)) => s, + Value::String(LoxString::Interned(id)) => { + self.strings.lookup(id).into() + } + _ => format!("{:?}", val), + } + } +} + +pub fn interpret(strings: Interner, chunk: chunk::Chunk) -> LoxResult<Value> { + let mut vm = VM { + chunk, + strings, + globals: HashMap::new(), + ip: 0, + stack: vec![], + last_drop: None, + }; + + vm.run() +} |