Skip to content

Commit 0f68749

Browse files
committed
Use a newtype_index! within Symbol.
This shrinks `Option<Symbol>` from 8 bytes to 4 bytes, which shrinks `Token` from 24 bytes to 16 bytes. This reduces instruction counts by up to 1% across a range of benchmarks.
1 parent b755501 commit 0f68749

File tree

3 files changed

+46
-22
lines changed

3 files changed

+46
-22
lines changed

src/libsyntax/parse/token.rs

+4
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ pub enum Token {
207207
Eof,
208208
}
209209

210+
// `Token` is used a lot. Make sure it doesn't unintentionally get bigger.
211+
#[cfg(target_arch = "x86_64")]
212+
static_assert!(MEM_SIZE_OF_STATEMENT: mem::size_of::<Token>() == 16);
213+
210214
impl Token {
211215
pub fn interpolated(nt: Nonterminal) -> Token {
212216
Token::Interpolated(Lrc::new((nt, LazyTokenStream::new())))

src/libsyntax_pos/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,13 @@
2424
#![feature(nll)]
2525
#![feature(non_exhaustive)]
2626
#![feature(optin_builtin_traits)]
27+
#![feature(rustc_attrs)]
2728
#![feature(specialization)]
29+
#![feature(step_trait)]
2830
#![cfg_attr(not(stage0), feature(stdsimd))]
2931

3032
extern crate arena;
33+
#[macro_use]
3134
extern crate rustc_data_structures;
3235

3336
#[macro_use]

src/libsyntax_pos/symbol.rs

+39-22
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
1515
use arena::DroplessArena;
1616
use rustc_data_structures::fx::FxHashMap;
17+
use rustc_data_structures::indexed_vec::Idx;
1718
use serialize::{Decodable, Decoder, Encodable, Encoder};
1819

1920
use std::fmt;
@@ -143,9 +144,18 @@ impl Decodable for Ident {
143144
}
144145
}
145146

146-
/// A symbol is an interned or gensymed string.
147+
/// A symbol is an interned or gensymed string. The use of newtype_index! means
148+
/// that Option<Symbol> only takes up 4 bytes, because newtype_index! reserves
149+
/// the last 256 values for tagging purposes.
150+
///
151+
/// Note that Symbol cannot be a newtype_index! directly because it implements
152+
/// fmt::Debug, Encodable, and Decodable in special ways.
147153
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
148-
pub struct Symbol(u32);
154+
pub struct Symbol(SymbolIndex);
155+
156+
newtype_index! {
157+
pub struct SymbolIndex { .. }
158+
}
149159

150160
// The interner is pointed to by a thread local value which is only set on the main thread
151161
// with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
@@ -156,6 +166,10 @@ impl !Send for Symbol { }
156166
impl !Sync for Symbol { }
157167

158168
impl Symbol {
169+
const fn new(n: u32) -> Self {
170+
Symbol(SymbolIndex::from_u32_const(n))
171+
}
172+
159173
/// Maps a string to its interned representation.
160174
pub fn intern(string: &str) -> Self {
161175
with_interner(|interner| interner.intern(string))
@@ -189,15 +203,15 @@ impl Symbol {
189203
}
190204

191205
pub fn as_u32(self) -> u32 {
192-
self.0
206+
self.0.as_u32()
193207
}
194208
}
195209

196210
impl fmt::Debug for Symbol {
197211
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
198212
let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
199213
if is_gensymed {
200-
write!(f, "{}({})", self, self.0)
214+
write!(f, "{}({:?})", self, self.0)
201215
} else {
202216
write!(f, "{}", self)
203217
}
@@ -229,6 +243,9 @@ impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
229243
}
230244

231245
// The `&'static str`s in this type actually point into the arena.
246+
//
247+
// Note that normal symbols are indexed upward from 0, and gensyms are indexed
248+
// downward from SymbolIndex::MAX_AS_U32.
232249
#[derive(Default)]
233250
pub struct Interner {
234251
arena: DroplessArena,
@@ -243,7 +260,7 @@ impl Interner {
243260
for &string in init {
244261
if string == "" {
245262
// We can't allocate empty strings in the arena, so handle this here.
246-
let name = Symbol(this.strings.len() as u32);
263+
let name = Symbol::new(this.strings.len() as u32);
247264
this.names.insert("", name);
248265
this.strings.push("");
249266
} else {
@@ -258,7 +275,7 @@ impl Interner {
258275
return name;
259276
}
260277

261-
let name = Symbol(self.strings.len() as u32);
278+
let name = Symbol::new(self.strings.len() as u32);
262279

263280
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
264281
// UTF-8.
@@ -276,10 +293,10 @@ impl Interner {
276293
}
277294

278295
pub fn interned(&self, symbol: Symbol) -> Symbol {
279-
if (symbol.0 as usize) < self.strings.len() {
296+
if (symbol.0.as_usize()) < self.strings.len() {
280297
symbol
281298
} else {
282-
self.interned(self.gensyms[(!0 - symbol.0) as usize])
299+
self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize])
283300
}
284301
}
285302

@@ -290,17 +307,17 @@ impl Interner {
290307

291308
fn gensymed(&mut self, symbol: Symbol) -> Symbol {
292309
self.gensyms.push(symbol);
293-
Symbol(!0 - self.gensyms.len() as u32 + 1)
310+
Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1)
294311
}
295312

296313
fn is_gensymed(&mut self, symbol: Symbol) -> bool {
297-
symbol.0 as usize >= self.strings.len()
314+
symbol.0.as_usize() >= self.strings.len()
298315
}
299316

300317
pub fn get(&self, symbol: Symbol) -> &str {
301-
match self.strings.get(symbol.0 as usize) {
318+
match self.strings.get(symbol.0.as_usize()) {
302319
Some(string) => string,
303-
None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
320+
None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]),
304321
}
305322
}
306323
}
@@ -324,7 +341,7 @@ macro_rules! declare_keywords {(
324341
$(
325342
#[allow(non_upper_case_globals)]
326343
pub const $konst: Keyword = Keyword {
327-
ident: Ident::with_empty_ctxt(super::Symbol($index))
344+
ident: Ident::with_empty_ctxt(super::Symbol::new($index))
328345
};
329346
)*
330347

@@ -709,19 +726,19 @@ mod tests {
709726
fn interner_tests() {
710727
let mut i: Interner = Interner::default();
711728
// first one is zero:
712-
assert_eq!(i.intern("dog"), Symbol(0));
729+
assert_eq!(i.intern("dog"), Symbol::new(0));
713730
// re-use gets the same entry:
714-
assert_eq!(i.intern("dog"), Symbol(0));
731+
assert_eq!(i.intern("dog"), Symbol::new(0));
715732
// different string gets a different #:
716-
assert_eq!(i.intern("cat"), Symbol(1));
717-
assert_eq!(i.intern("cat"), Symbol(1));
733+
assert_eq!(i.intern("cat"), Symbol::new(1));
734+
assert_eq!(i.intern("cat"), Symbol::new(1));
718735
// dog is still at zero
719-
assert_eq!(i.intern("dog"), Symbol(0));
720-
assert_eq!(i.gensym("zebra"), Symbol(4294967295));
721-
// gensym of same string gets new number :
722-
assert_eq!(i.gensym("zebra"), Symbol(4294967294));
736+
assert_eq!(i.intern("dog"), Symbol::new(0));
737+
assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32));
738+
// gensym of same string gets new number:
739+
assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1));
723740
// gensym of *existing* string gets new number:
724-
assert_eq!(i.gensym("dog"), Symbol(4294967293));
741+
assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2));
725742
}
726743

727744
#[test]

0 commit comments

Comments
 (0)