14
14
15
15
use arena:: DroplessArena ;
16
16
use rustc_data_structures:: fx:: FxHashMap ;
17
+ use rustc_data_structures:: indexed_vec:: Idx ;
17
18
use serialize:: { Decodable , Decoder , Encodable , Encoder } ;
18
19
19
20
use std:: fmt;
@@ -143,9 +144,15 @@ impl Decodable for Ident {
143
144
}
144
145
}
145
146
146
- /// A symbol is an interned or gensymed string.
147
+ /// A symbol is an interned or gensymed string. The use of newtype_index! means
148
+ /// that Option<Symbol> only takes up 4 bytes, because newtype_index! reserves
149
+ /// the last 256 values for tagging purposes.
147
150
#[ derive( Clone , Copy , PartialEq , Eq , PartialOrd , Ord , Hash ) ]
148
- pub struct Symbol ( u32 ) ;
151
+ pub struct Symbol ( SymbolIndex ) ;
152
+
153
+ newtype_index ! {
154
+ pub struct SymbolIndex { .. }
155
+ }
149
156
150
157
// The interner is pointed to by a thread local value which is only set on the main thread
151
158
// with parallelization is disabled. So we don't allow `Symbol` to transfer between threads
@@ -156,6 +163,10 @@ impl !Send for Symbol { }
156
163
impl !Sync for Symbol { }
157
164
158
165
impl Symbol {
166
+ const fn new ( n : u32 ) -> Self {
167
+ Symbol ( SymbolIndex :: from_u32_const ( n) )
168
+ }
169
+
159
170
/// Maps a string to its interned representation.
160
171
pub fn intern ( string : & str ) -> Self {
161
172
with_interner ( |interner| interner. intern ( string) )
@@ -189,15 +200,15 @@ impl Symbol {
189
200
}
190
201
191
202
pub fn as_u32 ( self ) -> u32 {
192
- self . 0
203
+ self . 0 . as_u32 ( )
193
204
}
194
205
}
195
206
196
207
impl fmt:: Debug for Symbol {
197
208
fn fmt ( & self , f : & mut fmt:: Formatter ) -> fmt:: Result {
198
209
let is_gensymed = with_interner ( |interner| interner. is_gensymed ( * self ) ) ;
199
210
if is_gensymed {
200
- write ! ( f, "{}({})" , self , self . 0 )
211
+ write ! ( f, "{}({:? })" , self , self . 0 )
201
212
} else {
202
213
write ! ( f, "{}" , self )
203
214
}
@@ -229,6 +240,9 @@ impl<T: ::std::ops::Deref<Target=str>> PartialEq<T> for Symbol {
229
240
}
230
241
231
242
// The `&'static str`s in this type actually point into the arena.
243
+ //
244
+ // Note that normal symbols are indexed upward from 0, and gensyms are indexed
245
+ // downward from SymbolIndex::MAX_AS_U32.
232
246
#[ derive( Default ) ]
233
247
pub struct Interner {
234
248
arena : DroplessArena ,
@@ -243,7 +257,7 @@ impl Interner {
243
257
for & string in init {
244
258
if string == "" {
245
259
// We can't allocate empty strings in the arena, so handle this here.
246
- let name = Symbol ( this. strings . len ( ) as u32 ) ;
260
+ let name = Symbol :: new ( this. strings . len ( ) as u32 ) ;
247
261
this. names . insert ( "" , name) ;
248
262
this. strings . push ( "" ) ;
249
263
} else {
@@ -258,7 +272,7 @@ impl Interner {
258
272
return name;
259
273
}
260
274
261
- let name = Symbol ( self . strings . len ( ) as u32 ) ;
275
+ let name = Symbol :: new ( self . strings . len ( ) as u32 ) ;
262
276
263
277
// `from_utf8_unchecked` is safe since we just allocated a `&str` which is known to be
264
278
// UTF-8.
@@ -276,10 +290,10 @@ impl Interner {
276
290
}
277
291
278
292
pub fn interned ( & self , symbol : Symbol ) -> Symbol {
279
- if ( symbol. 0 as usize ) < self . strings . len ( ) {
293
+ if ( symbol. 0 . as_usize ( ) ) < self . strings . len ( ) {
280
294
symbol
281
295
} else {
282
- self . interned ( self . gensyms [ ( ! 0 - symbol. 0 ) as usize ] )
296
+ self . interned ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] )
283
297
}
284
298
}
285
299
@@ -290,17 +304,17 @@ impl Interner {
290
304
291
305
fn gensymed ( & mut self , symbol : Symbol ) -> Symbol {
292
306
self . gensyms . push ( symbol) ;
293
- Symbol ( ! 0 - self . gensyms . len ( ) as u32 + 1 )
307
+ Symbol :: new ( SymbolIndex :: MAX_AS_U32 - self . gensyms . len ( ) as u32 + 1 )
294
308
}
295
309
296
310
fn is_gensymed ( & mut self , symbol : Symbol ) -> bool {
297
- symbol. 0 as usize >= self . strings . len ( )
311
+ symbol. 0 . as_usize ( ) >= self . strings . len ( )
298
312
}
299
313
300
314
pub fn get ( & self , symbol : Symbol ) -> & str {
301
- match self . strings . get ( symbol. 0 as usize ) {
315
+ match self . strings . get ( symbol. 0 . as_usize ( ) ) {
302
316
Some ( string) => string,
303
- None => self . get ( self . gensyms [ ( ! 0 - symbol. 0 ) as usize ] ) ,
317
+ None => self . get ( self . gensyms [ ( SymbolIndex :: MAX_AS_U32 - symbol. 0 . as_u32 ( ) ) as usize ] ) ,
304
318
}
305
319
}
306
320
}
@@ -324,7 +338,7 @@ macro_rules! declare_keywords {(
324
338
$(
325
339
#[ allow( non_upper_case_globals) ]
326
340
pub const $konst: Keyword = Keyword {
327
- ident: Ident :: with_empty_ctxt( super :: Symbol ( $index) )
341
+ ident: Ident :: with_empty_ctxt( super :: Symbol :: new ( $index) )
328
342
} ;
329
343
) *
330
344
@@ -709,19 +723,19 @@ mod tests {
709
723
fn interner_tests ( ) {
710
724
let mut i: Interner = Interner :: default ( ) ;
711
725
// first one is zero:
712
- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
726
+ assert_eq ! ( i. intern( "dog" ) , Symbol :: new ( 0 ) ) ;
713
727
// re-use gets the same entry:
714
- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
728
+ assert_eq ! ( i. intern( "dog" ) , Symbol :: new ( 0 ) ) ;
715
729
// different string gets a different #:
716
- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
717
- assert_eq ! ( i. intern( "cat" ) , Symbol ( 1 ) ) ;
730
+ assert_eq ! ( i. intern( "cat" ) , Symbol :: new ( 1 ) ) ;
731
+ assert_eq ! ( i. intern( "cat" ) , Symbol :: new ( 1 ) ) ;
718
732
// dog is still at zero
719
- assert_eq ! ( i. intern( "dog" ) , Symbol ( 0 ) ) ;
720
- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967295 ) ) ;
721
- // gensym of same string gets new number :
722
- assert_eq ! ( i. gensym( "zebra" ) , Symbol ( 4294967294 ) ) ;
733
+ assert_eq ! ( i. intern( "dog" ) , Symbol :: new ( 0 ) ) ;
734
+ assert_eq ! ( i. gensym( "zebra" ) , Symbol :: new ( SymbolIndex :: MAX_AS_U32 ) ) ;
735
+ // gensym of same string gets new number:
736
+ assert_eq ! ( i. gensym( "zebra" ) , Symbol :: new ( SymbolIndex :: MAX_AS_U32 - 1 ) ) ;
723
737
// gensym of *existing* string gets new number:
724
- assert_eq ! ( i. gensym( "dog" ) , Symbol ( 4294967293 ) ) ;
738
+ assert_eq ! ( i. gensym( "dog" ) , Symbol :: new ( SymbolIndex :: MAX_AS_U32 - 2 ) ) ;
725
739
}
726
740
727
741
#[ test]
0 commit comments