about summary refs log tree commit diff
diff options
context:
space:
mode:
authorVincent Ambo <mail@tazj.in>2021-03-02T11·11+0200
committertazjin <mail@tazj.in>2021-03-02T19·48+0000
commit432e7a7dddf56224297285c9f47f0aa3963eb5b5 (patch)
treede3ee9a64b116e43f4ea4911e27d9a58603de9b7
parentbcea8e0d169974ba57a54ca098f480d6294a7fb1 (diff)
feat(tazjin/rlox): Intern all string constants r/2263
This is again a step closer to the book, but there are some notable
differences:

* Only constants encountered by the compiler are interned, all other
  string operations (well, concatenation) happen with heap objects.

* OpReturn will always ensure that a returned string value is newly
  heap allocated and does not reference the interner.

Change-Id: If4f04309446e01b8ff2db51094e9710d465dbc50
Reviewed-on: https://cl.tvl.fyi/c/depot/+/2582
Reviewed-by: tazjin <mail@tazj.in>
Tested-by: BuildkiteCI
-rw-r--r--users/tazjin/rlox/src/bytecode/compiler.rs22
-rw-r--r--users/tazjin/rlox/src/bytecode/mod.rs4
-rw-r--r--users/tazjin/rlox/src/bytecode/value.rs22
-rw-r--r--users/tazjin/rlox/src/bytecode/vm.rs36
4 files changed, 65 insertions, 19 deletions
diff --git a/users/tazjin/rlox/src/bytecode/compiler.rs b/users/tazjin/rlox/src/bytecode/compiler.rs
index 39bb2f4907a7..ae38654c5d38 100644
--- a/users/tazjin/rlox/src/bytecode/compiler.rs
+++ b/users/tazjin/rlox/src/bytecode/compiler.rs
@@ -1,5 +1,6 @@
 use super::chunk::Chunk;
 use super::errors::{Error, ErrorKind, LoxResult};
+use super::interner::Interner;
 use super::opcode::OpCode;
 use super::value::Value;
 use crate::scanner::{self, Token, TokenKind};
@@ -12,8 +13,8 @@ struct Compiler<T: Iterator<Item = Token>> {
     chunk: Chunk,
     panic: bool,
     errors: Vec<Error>,
+    strings: Interner,
 
-    // TODO(tazjin): Restructure so that these don't need to be Option?
     current: Option<Token>,
     previous: Option<Token>,
 }
@@ -146,7 +147,7 @@ fn rule_for<T: Iterator<Item = Token>>(token: &TokenKind) -> ParseRule<T> {
 
         TokenKind::String(_) => {
             ParseRule::new(Some(Compiler::string), None, Precedence::None)
-        },
+        }
 
         _ => ParseRule::new(None, None, Precedence::None),
     }
@@ -260,13 +261,13 @@ impl<T: Iterator<Item = Token>> Compiler<T> {
     }
 
     fn string(&mut self) -> LoxResult<()> {
-        match &self.previous().kind {
-            TokenKind::String(s) => {
-                let s = s.clone();
-                self.emit_constant(Value::String(s));
-            }
+        let val = match &self.previous().kind {
+            TokenKind::String(s) => s.clone(),
             _ => unreachable!("only called for strings"),
-        }
+        };
+
+        let id = self.strings.intern(val);
+        self.emit_constant(Value::String(id.into()));
 
         Ok(())
     }
@@ -353,7 +354,7 @@ impl<T: Iterator<Item = Token>> Compiler<T> {
     }
 }
 
-pub fn compile(code: &str) -> Result<Chunk, Vec<Error>> {
+pub fn compile(code: &str) -> Result<(Interner, Chunk), Vec<Error>> {
     let chars = code.chars().collect::<Vec<char>>();
     let tokens = scanner::scan(&chars).map_err(|errors| {
         errors.into_iter().map(Into::into).collect::<Vec<Error>>()
@@ -364,6 +365,7 @@ pub fn compile(code: &str) -> Result<Chunk, Vec<Error>> {
         chunk: Default::default(),
         panic: false,
         errors: vec![],
+        strings: Interner::with_capacity(1024),
         current: None,
         previous: None,
     };
@@ -371,7 +373,7 @@ pub fn compile(code: &str) -> Result<Chunk, Vec<Error>> {
     compiler.compile()?;
 
     if compiler.errors.is_empty() {
-        Ok(compiler.chunk)
+        Ok((compiler.strings, compiler.chunk))
     } else {
         Err(compiler.errors)
     }
diff --git a/users/tazjin/rlox/src/bytecode/mod.rs b/users/tazjin/rlox/src/bytecode/mod.rs
index b3e0966fa071..c6f3a737aef8 100644
--- a/users/tazjin/rlox/src/bytecode/mod.rs
+++ b/users/tazjin/rlox/src/bytecode/mod.rs
@@ -27,7 +27,7 @@ impl crate::Lox for Interpreter {
         &mut self,
         code: String,
     ) -> Result<Self::Value, Vec<Self::Error>> {
-        let chunk = compiler::compile(&code)?;
-        vm::interpret(chunk).map_err(|e| vec![e])
+        let (strings, chunk) = compiler::compile(&code)?;
+        vm::interpret(strings, chunk).map_err(|e| vec![e])
     }
 }
diff --git a/users/tazjin/rlox/src/bytecode/value.rs b/users/tazjin/rlox/src/bytecode/value.rs
index c6667a698ec9..19a4dcc92eac 100644
--- a/users/tazjin/rlox/src/bytecode/value.rs
+++ b/users/tazjin/rlox/src/bytecode/value.rs
@@ -1,9 +1,29 @@
+use super::interner::InternedStr;
+
 #[derive(Clone, Debug, PartialEq)]
 pub enum Value {
     Nil,
     Bool(bool),
     Number(f64),
-    String(String),
+    String(LoxString),
+}
+
+#[derive(Clone, Debug, PartialEq)]
+pub enum LoxString {
+    Heap(String),
+    Interned(InternedStr),
+}
+
+impl From<String> for LoxString {
+    fn from(s: String) -> Self {
+        LoxString::Heap(s)
+    }
+}
+
+impl From<InternedStr> for LoxString {
+    fn from(s: InternedStr) -> Self {
+        LoxString::Interned(s)
+    }
 }
 
 impl Value {
diff --git a/users/tazjin/rlox/src/bytecode/vm.rs b/users/tazjin/rlox/src/bytecode/vm.rs
index 730eee321206..02db30de58a9 100644
--- a/users/tazjin/rlox/src/bytecode/vm.rs
+++ b/users/tazjin/rlox/src/bytecode/vm.rs
@@ -1,7 +1,8 @@
 use super::chunk;
 use super::errors::*;
+use super::interner::Interner;
 use super::opcode::OpCode;
-use super::value::Value;
+use super::value::{LoxString, Value};
 
 pub struct VM {
     chunk: chunk::Chunk,
@@ -11,6 +12,7 @@ pub struct VM {
     ip: usize,
 
     stack: Vec<Value>,
+    strings: Interner,
 }
 
 impl VM {
@@ -69,7 +71,10 @@ impl VM {
             self.ip += 1;
 
             match op {
-                OpCode::OpReturn => return Ok(self.pop()),
+                OpCode::OpReturn => {
+                    let val = self.pop();
+                    return Ok(self.return_value(val));
+                }
 
                 OpCode::OpConstant(idx) => {
                     let c = self.chunk.constant(*idx).clone();
@@ -114,9 +119,9 @@ impl VM {
 
                     match (a, b) {
                         (Value::String(s_a), Value::String(s_b)) => {
-                            let mut new_s = s_a.clone();
-                            new_s.push_str(&s_b);
-                            self.push(Value::String(new_s));
+                            let mut new_s = self.resolve_str(&s_a).to_string();
+                            new_s.push_str(self.resolve_str(&s_b));
+                            self.push(Value::String(new_s.into()));
                         }
 
                         (Value::Number(n_a), Value::Number(n_b)) =>
@@ -136,11 +141,30 @@ impl VM {
             println!("=> {:?}", self.stack);
         }
     }
+
+    // For some types of values (e.g. interned strings), returns
+    // should no longer include any references into the interpreter.
+    fn return_value(&self, val: Value) -> Value {
+        match val {
+            Value::String(string @ LoxString::Interned(_)) => {
+                Value::String(self.resolve_str(&string).to_string().into())
+            }
+            _ => val,
+        }
+    }
+
+    fn resolve_str<'a>(&'a self, string: &'a LoxString) -> &'a str {
+        match string {
+            LoxString::Heap(s) => s.as_str(),
+            LoxString::Interned(id) => self.strings.lookup(*id),
+        }
+    }
 }
 
-pub fn interpret(chunk: chunk::Chunk) -> LoxResult<Value> {
+pub fn interpret(strings: Interner, chunk: chunk::Chunk) -> LoxResult<Value> {
     let mut vm = VM {
         chunk,
+        strings,
         ip: 0,
         stack: vec![],
     };