14
14
15
15
use arena:: DroplessArena ;
16
16
use rustc_data_structures:: fx:: FxHashMap ;
17
+ use rustc_data_structures:: indexed_vec:: Idx ;
17
18
use serialize:: { Decodable , Decoder , Encodable , Encoder } ;
18
19
19
20
use std:: fmt;
@@ -143,9 +144,18 @@ impl Decodable for Ident {
143
144
}
144
145
}
145
146
146
- /// A symbol is an interned or gensymed string.
147
+ /// A symbol is an interned or gensymed string. The use of newtype_index! means
148
+ /// that Option<Symbol> only takes up 4 bytes, because newtype_index! reserves
149
+ /// the last 256 values for tagging purposes.
150
+ ///
151
+ /// Note that Symbol cannot be a newtype_index! directly because it implements
152
+ /// fmt::Debug, Encodable, and Decodable in special ways.
147
153
#[ derive( Clone , Copy , PartialEq , Eq , PartialOrd , Ord , Hash ) ]
148
- pub struct Symbol ( u32 ) ;
154
+ pub struct Symbol ( SymbolIndex ) ;
155
+
156
+ newtype_index ! {
157
+ pub struct SymbolIndex { .. }
158
+ }
149
159
150
160
// The interner is pointed to by a thread local value which is only set on the main thread
151
161
// with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
@@ -156,6 +166,10 @@ impl !Send for Symbol { }
156
166
impl !Sync for Symbol { }
157
167
158
168
impl Symbol {
169
+ const fn new ( n : u32 ) -> Self {
170
+ Symbol ( SymbolIndex :: from_u32_const ( n) )
171
+ }
172
+
159
173
/// Maps a string to its interned representation.
160
174
pub fn intern ( string : & str ) -> Self {
161
175
with_interner ( |interner| interner. intern ( string) )
@@ -189,15 +203,15 @@ impl Symbol {
189
203
}
190
204
191
205
pub fn as_u32 ( self ) -> u32 {
192
- self . 0
206
+ self . 0 . as_u32 ( )
193
207
}
194
208
}
195
209
196
210
impl fmt:: Debug for Symbol {
197
211
fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
198
212
let is_gensymed = with_interner ( |interner| interner. is_gensymed ( * self ) ) ;
199
213
if is_gensymed {
200
- write ! ( f, "{}({})" , self , self . 0 )
214
+ write ! ( f, "{}({:? })" , self , self . 0 )
201
215
} else {
202
216
write ! ( f, "{}" , self )
203
217
}
@@ -229,6 +243,9 @@ impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
229
243
}
230
244
231
245
// The `&'static str`s in this type actually point into the arena.
246
+ //
247
+ // Note that normal symbols are indexed upward from 0, and gensyms are indexed
248
+ // downward from SymbolIndex::MAX_AS_U32.
232
249
#[ derive( Default ) ]
233
250
pub struct Interner {
234
251
arena : DroplessArena ,
@@ -243,7 +260,7 @@ impl Interner {
243
260
for & string in init {
244
261
if string == "" {
245
262
// We can't allocate empty strings in the arena, so handle this here.
246
- let name = Symbol ( this. strings . len ( ) as u32 ) ;
263
+ let name = Symbol :: new ( this. strings . len ( ) as u32 ) ;
247
264
this. names . insert ( "" , name) ;
248
265
this. strings . push ( "" ) ;
249
266
} else {
@@ -258,7 +275,7 @@ impl Interner {
258
275
return name;
259
276
}
260
277
261
- let name = Symbol ( self . strings . len ( ) as u32 ) ;
278
+ let name = Symbol :: new ( self . strings . len ( ) as u32 ) ;
262
279
263
280
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
264
281
// UTF-8.
@@ -276,10 +293,10 @@ impl Interner {
276
293
}
277
294
278
295
pub fn interned ( & self , symbol : Symbol ) -> Symbol {
279
- if ( symbol. 0 as usize ) < self . strings . len ( ) {
296
+ if ( symbol. 0 . as_usize ( ) ) < self . strings . len ( ) {
280
297
symbol
281
298
} else {
282
- self . interned ( self . gensyms [ ( ! 0 - symbol. 0 ) as usize ] )
299
+ self . interned ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] )
283
300
}
284
301
}
285
302
@@ -290,17 +307,17 @@ impl Interner {
290
307
291
308
fn gensymed ( & mut self , symbol : Symbol ) -> Symbol {
292
309
self . gensyms . push ( symbol) ;
293
- Symbol ( ! 0 - self . gensyms . len ( ) as u32 + 1 )
310
+ Symbol :: new ( SymbolIndex :: MAX_AS_U32 - self . gensyms . len ( ) as u32 + 1 )
294
311
}
295
312
296
313
fn is_gensymed ( & mut self , symbol : Symbol ) -> bool {
297
- symbol. 0 as usize >= self . strings . len ( )
314
+ symbol. 0 . as_usize ( ) >= self . strings . len ( )
298
315
}
299
316
300
317
pub fn get ( & self , symbol : Symbol ) -> & str {
301
- match self . strings . get ( symbol. 0 as usize ) {
318
+ match self . strings . get ( symbol. 0 . as_usize ( ) ) {
302
319
Some ( string) => string,
303
- None => self . get ( self . gensyms [ ( ! 0 - symbol. 0 ) as usize ] ) ,
320
+ None => self . get ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] ) ,
304
321
}
305
322
}
306
323
}
@@ -324,7 +341,7 @@ macro_rules! declare_keywords {(
324
341
$(
325
342
#[ allow( non_upper_case_globals) ]
326
343
pub const $konst: Keyword = Keyword {
327
- ident: Ident :: with_empty_ctxt( super :: Symbol ( $index) )
344
+ ident: Ident :: with_empty_ctxt( super :: Symbol :: new ( $index) )
328
345
} ;
329
346
) *
330
347
@@ -709,19 +726,19 @@ mod tests {
709
726
fn interner_tests ( ) {
710
727
let mut i: Interner = Interner :: default ( ) ;
711
728
// first one is zero:
712
- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
729
+ assert_eq ! ( i. intern( "dog" ) , Symbol :: new ( 0 ) ) ;
713
730
// re-use gets the same entry:
714
- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
731
+ assert_eq ! ( i. intern( "dog" ) , Symbol :: new ( 0 ) ) ;
715
732
// different string gets a different #:
716
- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
717
- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
733
+ assert_eq ! ( i. intern( "cat" ) , Symbol :: new ( 1 ) ) ;
734
+ assert_eq ! ( i. intern( "cat" ) , Symbol :: new ( 1 ) ) ;
718
735
// dog is still at zero
719
- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
720
- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967295 ) ) ;
721
- // gensym of same string gets new number :
722
- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967294 ) ) ;
736
+ assert_eq ! ( i. intern( "dog" ) , Symbol :: new ( 0 ) ) ;
737
+ assert_eq ! ( i. gensym( "zebra" ) , Symbol :: new ( SymbolIndex :: MAX_AS_U32 ) ) ;
738
+ // gensym of same string gets new number:
739
+ assert_eq ! ( i. gensym( "zebra" ) , Symbol :: new ( SymbolIndex :: MAX_AS_U32 - 1 ) ) ;
723
740
// gensym of *existing* string gets new number:
724
- assert_eq ! ( i. gensym( "dog" ) , Symbol ( 4294967293 ) ) ;
741
+ assert_eq ! ( i. gensym( "dog" ) , Symbol :: new ( SymbolIndex :: MAX_AS_U32 - 2 ) ) ;
725
742
}
726
743
727
744
#[ test]
0 commit comments