From 432e7a7dddf56224297285c9f47f0aa3963eb5b5 Mon Sep 17 00:00:00 2001 From: Vincent Ambo Date: Tue, 2 Mar 2021 13:11:21 +0200 Subject: feat(tazjin/rlox): Intern all string constants This is again a step closer to the book, but there are some notable differences: * Only constants encountered by the compiler are interned, all other string operations (well, concatenation) happen with heap objects. * OpReturn will always ensure that a returned string value is newly heap allocated and does not reference the interner. Change-Id: If4f04309446e01b8ff2db51094e9710d465dbc50 Reviewed-on: https://cl.tvl.fyi/c/depot/+/2582 Reviewed-by: tazjin Tested-by: BuildkiteCI --- users/tazjin/rlox/src/bytecode/compiler.rs | 22 +++++++++--------- users/tazjin/rlox/src/bytecode/mod.rs | 4 ++-- users/tazjin/rlox/src/bytecode/value.rs | 22 +++++++++++++++++- users/tazjin/rlox/src/bytecode/vm.rs | 36 +++++++++++++++++++++++++----- 4 files changed, 65 insertions(+), 19 deletions(-) diff --git a/users/tazjin/rlox/src/bytecode/compiler.rs b/users/tazjin/rlox/src/bytecode/compiler.rs index 39bb2f4907a7..ae38654c5d38 100644 --- a/users/tazjin/rlox/src/bytecode/compiler.rs +++ b/users/tazjin/rlox/src/bytecode/compiler.rs @@ -1,5 +1,6 @@ use super::chunk::Chunk; use super::errors::{Error, ErrorKind, LoxResult}; +use super::interner::Interner; use super::opcode::OpCode; use super::value::Value; use crate::scanner::{self, Token, TokenKind}; @@ -12,8 +13,8 @@ struct Compiler> { chunk: Chunk, panic: bool, errors: Vec, + strings: Interner, - // TODO(tazjin): Restructure so that these don't need to be Option? current: Option, previous: Option, } @@ -146,7 +147,7 @@ fn rule_for>(token: &TokenKind) -> ParseRule { TokenKind::String(_) => { ParseRule::new(Some(Compiler::string), None, Precedence::None) - }, + } _ => ParseRule::new(None, None, Precedence::None), } @@ -260,13 +261,13 @@ impl> Compiler { } fn string(&mut self) -> LoxResult<()> { - match &self.previous().kind { - TokenKind::String(s) => { - let s = s.clone(); - self.emit_constant(Value::String(s)); - } + let val = match &self.previous().kind { + TokenKind::String(s) => s.clone(), _ => unreachable!("only called for strings"), - } + }; + + let id = self.strings.intern(val); + self.emit_constant(Value::String(id.into())); Ok(()) } @@ -353,7 +354,7 @@ impl> Compiler { } } -pub fn compile(code: &str) -> Result> { +pub fn compile(code: &str) -> Result<(Interner, Chunk), Vec> { let chars = code.chars().collect::>(); let tokens = scanner::scan(&chars).map_err(|errors| { errors.into_iter().map(Into::into).collect::>() @@ -364,6 +365,7 @@ pub fn compile(code: &str) -> Result> { chunk: Default::default(), panic: false, errors: vec![], + strings: Interner::with_capacity(1024), current: None, previous: None, }; @@ -371,7 +373,7 @@ pub fn compile(code: &str) -> Result> { compiler.compile()?; if compiler.errors.is_empty() { - Ok(compiler.chunk) + Ok((compiler.strings, compiler.chunk)) } else { Err(compiler.errors) } diff --git a/users/tazjin/rlox/src/bytecode/mod.rs b/users/tazjin/rlox/src/bytecode/mod.rs index b3e0966fa071..c6f3a737aef8 100644 --- a/users/tazjin/rlox/src/bytecode/mod.rs +++ b/users/tazjin/rlox/src/bytecode/mod.rs @@ -27,7 +27,7 @@ impl crate::Lox for Interpreter { &mut self, code: String, ) -> Result> { - let chunk = compiler::compile(&code)?; - vm::interpret(chunk).map_err(|e| vec![e]) + let (strings, chunk) = compiler::compile(&code)?; + vm::interpret(strings, chunk).map_err(|e| vec![e]) } } diff --git a/users/tazjin/rlox/src/bytecode/value.rs b/users/tazjin/rlox/src/bytecode/value.rs index c6667a698ec9..19a4dcc92eac 100644 --- a/users/tazjin/rlox/src/bytecode/value.rs +++ b/users/tazjin/rlox/src/bytecode/value.rs @@ -1,9 +1,29 @@ +use super::interner::InternedStr; + #[derive(Clone, Debug, PartialEq)] pub enum Value { Nil, Bool(bool), Number(f64), - String(String), + String(LoxString), +} + +#[derive(Clone, Debug, PartialEq)] +pub enum LoxString { + Heap(String), + Interned(InternedStr), +} + +impl From for LoxString { + fn from(s: String) -> Self { + LoxString::Heap(s) + } +} + +impl From for LoxString { + fn from(s: InternedStr) -> Self { + LoxString::Interned(s) + } } impl Value { diff --git a/users/tazjin/rlox/src/bytecode/vm.rs b/users/tazjin/rlox/src/bytecode/vm.rs index 730eee321206..02db30de58a9 100644 --- a/users/tazjin/rlox/src/bytecode/vm.rs +++ b/users/tazjin/rlox/src/bytecode/vm.rs @@ -1,7 +1,8 @@ use super::chunk; use super::errors::*; +use super::interner::Interner; use super::opcode::OpCode; -use super::value::Value; +use super::value::{LoxString, Value}; pub struct VM { chunk: chunk::Chunk, @@ -11,6 +12,7 @@ pub struct VM { ip: usize, stack: Vec, + strings: Interner, } impl VM { @@ -69,7 +71,10 @@ impl VM { self.ip += 1; match op { - OpCode::OpReturn => return Ok(self.pop()), + OpCode::OpReturn => { + let val = self.pop(); + return Ok(self.return_value(val)); + } OpCode::OpConstant(idx) => { let c = self.chunk.constant(*idx).clone(); @@ -114,9 +119,9 @@ impl VM { match (a, b) { (Value::String(s_a), Value::String(s_b)) => { - let mut new_s = s_a.clone(); - new_s.push_str(&s_b); - self.push(Value::String(new_s)); + let mut new_s = self.resolve_str(&s_a).to_string(); + new_s.push_str(self.resolve_str(&s_b)); + self.push(Value::String(new_s.into())); } (Value::Number(n_a), Value::Number(n_b)) => @@ -136,11 +141,30 @@ impl VM { println!("=> {:?}", self.stack); } } + + // For some types of values (e.g. interned strings), returns + // should no longer include any references into the interpreter. + fn return_value(&self, val: Value) -> Value { + match val { + Value::String(string @ LoxString::Interned(_)) => { + Value::String(self.resolve_str(&string).to_string().into()) + } + _ => val, + } + } + + fn resolve_str<'a>(&'a self, string: &'a LoxString) -> &'a str { + match string { + LoxString::Heap(s) => s.as_str(), + LoxString::Interned(id) => self.strings.lookup(*id), + } + } } -pub fn interpret(chunk: chunk::Chunk) -> LoxResult { +pub fn interpret(strings: Interner, chunk: chunk::Chunk) -> LoxResult { let mut vm = VM { chunk, + strings, ip: 0, stack: vec![], }; -- cgit 1.4.1