1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
//! String-interning implementation for values that are likely to
//! benefit from fast comparisons and deduplication (e.g. instances of
//! variable names).
//!
//! This uses a trick from the typed-arena crate for guaranteeing
//! stable addresses by never resizing the existing String buffer, and
//! collecting full buffers in a vector.
use std::collections::HashMap;
#[cfg(test)]
mod tests;
#[derive(Clone, Copy, Debug, PartialEq, Hash)]
pub struct InternedStr {
id: usize,
}
#[derive(Default)]
pub struct Interner {
map: HashMap<&'static str, InternedStr>,
vec: Vec<&'static str>,
buf: String,
full: Vec<String>,
}
impl Interner {
pub fn with_capacity(cap: usize) -> Self {
Interner {
buf: String::with_capacity(cap),
..Default::default()
}
}
pub fn intern<S: AsRef<str>>(&mut self, name: S) -> InternedStr {
let name = name.as_ref();
if let Some(&id) = self.map.get(name) {
return id;
}
let name = self.alloc(name);
let id = InternedStr {
id: self.vec.len() as usize,
};
self.map.insert(name, id);
self.vec.push(name);
debug_assert!(self.lookup(id) == name);
debug_assert!(self.intern(name) == id);
id
}
pub fn lookup<'a>(&'a self, id: InternedStr) -> &'a str {
self.vec[id.id]
}
fn alloc<'a>(&'a mut self, name: &str) -> &'static str {
let cap = self.buf.capacity();
if cap < self.buf.len() + name.len() {
let new_cap = (cap.max(name.len()) + 1).next_power_of_two();
let new_buf = String::with_capacity(new_cap);
let old_buf = std::mem::replace(&mut self.buf, new_buf);
self.full.push(old_buf);
}
let interned: &'a str = {
let start = self.buf.len();
self.buf.push_str(name);
&self.buf[start..]
};
unsafe {
// This is sound for two reasons:
//
// 1. This function (Interner::alloc) is private, which
// prevents users from allocating a supposedly static
// reference.
//
// 2. Interner::lookup explicitly shortens the lifetime of
// references that are handed out to that of the
// reference to self.
return &*(interned as *const str);
}
}
}
|