about summary refs log tree commit diff
path: root/users/tazjin/rlox/src/bytecode/interner/mod.rs
//! String-interning implementation for values that are likely to
//! benefit from fast comparisons and deduplication (e.g. instances of
//! variable names).
//!
//! This uses a trick from the typed-arena crate for guaranteeing
//! stable addresses by never resizing the existing String buffer, and
//! collecting full buffers in a vector.

use std::collections::HashMap;

#[cfg(test)]
mod tests;

#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct InternedStr {
    id: usize,
}

#[derive(Default)]
pub struct Interner {
    map: HashMap<&'static str, InternedStr>,
    vec: Vec<&'static str>,
    buf: String,
    full: Vec<String>,
}

impl Interner {
    pub fn with_capacity(cap: usize) -> Self {
        Interner {
            buf: String::with_capacity(cap),
            ..Default::default()
        }
    }

    pub fn intern<S: AsRef<str>>(&mut self, name: S) -> InternedStr {
        let name = name.as_ref();
        if let Some(&id) = self.map.get(name) {
            return id;
        }

        let name = self.alloc(name);
        let id = InternedStr {
            id: self.vec.len() as usize,
        };

        self.map.insert(name, id);
        self.vec.push(name);

        debug_assert!(self.lookup(id) == name);
        debug_assert!(self.intern(name) == id);

        id
    }

    pub fn lookup<'a>(&'a self, id: InternedStr) -> &'a str {
        self.vec[id.id]
    }

    fn alloc<'a>(&'a mut self, name: &str) -> &'static str {
        let cap = self.buf.capacity();
        if cap < self.buf.len() + name.len() {
            let new_cap = (cap.max(name.len()) + 1).next_power_of_two();
            let new_buf = String::with_capacity(new_cap);
            let old_buf = std::mem::replace(&mut self.buf, new_buf);
            self.full.push(old_buf);
        }

        let interned: &'a str = {
            let start = self.buf.len();
            self.buf.push_str(name);
            &self.buf[start..]
        };

        unsafe {
            // This is sound for two reasons:
            //
            // 1. This function (Interner::alloc) is private, which
            //    prevents users from allocating a supposedly static
            //    reference.
            //
            // 2. Interner::lookup explicitly shortens the lifetime of
            //    references that are handed out to that of the
            //    reference to self.
            return &*(interned as *const str);
        }
    }
}