Skip to content

Commit 7f67df4

Browse files
committed
Use a newtype_index! within Symbol.
This shrinks `Option<Symbol>` from 8 bytes to 4 bytes, which shrinks `Token` from 24 bytes to 16 bytes. This reduces instruction counts by up to 1% across a range of benchmarks.
1 parent b755501 commit 7f67df4

File tree

3 files changed

+43
-22
lines changed

3 files changed

+43
-22
lines changed

src/libsyntax/parse/token.rs

+4
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,10 @@ pub enum Token {
207207
Eof,
208208
}
209209

210+
// `Token` is used a lot. Make sure it doesn't unintentionally get bigger.
211+
#[cfg(target_arch = "x86_64")]
212+
static_assert!(MEM_SIZE_OF_STATEMENT: mem::size_of::<Token>() == 16);
213+
210214
impl Token {
211215
pub fn interpolated(nt: Nonterminal) -> Token {
212216
Token::Interpolated(Lrc::new((nt, LazyTokenStream::new())))

src/libsyntax_pos/lib.rs

+3
Original file line numberDiff line numberDiff line change
@@ -24,10 +24,13 @@
2424
#![feature(nll)]
2525
#![feature(non_exhaustive)]
2626
#![feature(optin_builtin_traits)]
27+
#![feature(rustc_attrs)]
2728
#![feature(specialization)]
29+
#![feature(step_trait)]
2830
#![cfg_attr(not(stage0), feature(stdsimd))]
2931

3032
extern crate arena;
33+
#[macro_use]
3134
extern crate rustc_data_structures;
3235

3336
#[macro_use]

src/libsyntax_pos/symbol.rs

+36-22
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
1515
use arena::DroplessArena;
1616
use rustc_data_structures::fx::FxHashMap;
17+
use rustc_data_structures::indexed_vec::Idx;
1718
use serialize::{Decodable, Decoder, Encodable, Encoder};
1819

1920
use std::fmt;
@@ -143,9 +144,15 @@ impl Decodable for Ident {
143144
}
144145
}
145146

146-
/// A symbol is an interned or gensymed string.
147+
/// A symbol is an interned or gensymed string. The use of newtype_index! means
148+
/// that Option<Symbol> only takes up 4 bytes, because newtype_index! reserves
149+
/// the last 256 values for tagging purposes.
147150
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
148-
pub struct Symbol(u32);
151+
pub struct Symbol(SymbolIndex);
152+
153+
newtype_index! {
154+
pub struct SymbolIndex { .. }
155+
}
149156

150157
// The interner is pointed to by a thread local value which is only set on the main thread
151158
// with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
@@ -156,6 +163,10 @@ impl !Send for Symbol { }
156163
impl !Sync for Symbol { }
157164

158165
impl Symbol {
166+
const fn new(n: u32) -> Self {
167+
Symbol(SymbolIndex::from_u32_const(n))
168+
}
169+
159170
/// Maps a string to its interned representation.
160171
pub fn intern(string: &str) -> Self {
161172
with_interner(|interner| interner.intern(string))
@@ -189,15 +200,15 @@ impl Symbol {
189200
}
190201

191202
pub fn as_u32(self) -> u32 {
192-
self.0
203+
self.0.as_u32()
193204
}
194205
}
195206

196207
impl fmt::Debug for Symbol {
197208
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
198209
let is_gensymed = with_interner(|interner| interner.is_gensymed(*self));
199210
if is_gensymed {
200-
write!(f, "{}({})", self, self.0)
211+
write!(f, "{}({:?})", self, self.0)
201212
} else {
202213
write!(f, "{}", self)
203214
}
@@ -229,6 +240,9 @@ impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
229240
}
230241

231242
// The `&'static str`s in this type actually point into the arena.
243+
//
244+
// Note that normal symbols are indexed upward from 0, and gensyms are indexed
245+
// downward from SymbolIndex::MAX_AS_U32.
232246
#[derive(Default)]
233247
pub struct Interner {
234248
arena: DroplessArena,
@@ -243,7 +257,7 @@ impl Interner {
243257
for &string in init {
244258
if string == "" {
245259
// We can't allocate empty strings in the arena, so handle this here.
246-
let name = Symbol(this.strings.len() as u32);
260+
let name = Symbol::new(this.strings.len() as u32);
247261
this.names.insert("", name);
248262
this.strings.push("");
249263
} else {
@@ -258,7 +272,7 @@ impl Interner {
258272
return name;
259273
}
260274

261-
let name = Symbol(self.strings.len() as u32);
275+
let name = Symbol::new(self.strings.len() as u32);
262276

263277
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
264278
// UTF-8.
@@ -276,10 +290,10 @@ impl Interner {
276290
}
277291

278292
pub fn interned(&self, symbol: Symbol) -> Symbol {
279-
if (symbol.0 as usize) < self.strings.len() {
293+
if (symbol.0.as_usize()) < self.strings.len() {
280294
symbol
281295
} else {
282-
self.interned(self.gensyms[(!0 - symbol.0) as usize])
296+
self.interned(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize])
283297
}
284298
}
285299

@@ -290,17 +304,17 @@ impl Interner {
290304

291305
fn gensymed(&mut self, symbol: Symbol) -> Symbol {
292306
self.gensyms.push(symbol);
293-
Symbol(!0 - self.gensyms.len() as u32 + 1)
307+
Symbol::new(SymbolIndex::MAX_AS_U32 - self.gensyms.len() as u32 + 1)
294308
}
295309

296310
fn is_gensymed(&mut self, symbol: Symbol) -> bool {
297-
symbol.0 as usize >= self.strings.len()
311+
symbol.0.as_usize() >= self.strings.len()
298312
}
299313

300314
pub fn get(&self, symbol: Symbol) -> &str {
301-
match self.strings.get(symbol.0 as usize) {
315+
match self.strings.get(symbol.0.as_usize()) {
302316
Some(string) => string,
303-
None => self.get(self.gensyms[(!0 - symbol.0) as usize]),
317+
None => self.get(self.gensyms[(SymbolIndex::MAX_AS_U32 - symbol.0.as_u32()) as usize]),
304318
}
305319
}
306320
}
@@ -324,7 +338,7 @@ macro_rules! declare_keywords {(
324338
$(
325339
#[allow(non_upper_case_globals)]
326340
pub const $konst: Keyword = Keyword {
327-
ident: Ident::with_empty_ctxt(super::Symbol($index))
341+
ident: Ident::with_empty_ctxt(super::Symbol::new($index))
328342
};
329343
)*
330344

@@ -709,19 +723,19 @@ mod tests {
709723
fn interner_tests() {
710724
let mut i: Interner = Interner::default();
711725
// first one is zero:
712-
assert_eq!(i.intern("dog"), Symbol(0));
726+
assert_eq!(i.intern("dog"), Symbol::new(0));
713727
// re-use gets the same entry:
714-
assert_eq!(i.intern("dog"), Symbol(0));
728+
assert_eq!(i.intern("dog"), Symbol::new(0));
715729
// different string gets a different #:
716-
assert_eq!(i.intern("cat"), Symbol(1));
717-
assert_eq!(i.intern("cat"), Symbol(1));
730+
assert_eq!(i.intern("cat"), Symbol::new(1));
731+
assert_eq!(i.intern("cat"), Symbol::new(1));
718732
// dog is still at zero
719-
assert_eq!(i.intern("dog"), Symbol(0));
720-
assert_eq!(i.gensym("zebra"), Symbol(4294967295));
721-
// gensym of same string gets new number :
722-
assert_eq!(i.gensym("zebra"), Symbol(4294967294));
733+
assert_eq!(i.intern("dog"), Symbol::new(0));
734+
assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32));
735+
// gensym of same string gets new number:
736+
assert_eq!(i.gensym("zebra"), Symbol::new(SymbolIndex::MAX_AS_U32 - 1));
723737
// gensym of *existing* string gets new number:
724-
assert_eq!(i.gensym("dog"), Symbol(4294967293));
738+
assert_eq!(i.gensym("dog"), Symbol::new(SymbolIndex::MAX_AS_U32 - 2));
725739
}
726740

727741
#[test]

0 commit comments

Comments
 (0)