1
- // Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
1
+ // Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2
2
// file at the top-level directory of this distribution and at
3
3
// http://rust-lang.org/COPYRIGHT.
4
4
//
8
8
// option. This file may not be copied, modified, or distributed
9
9
// except according to those terms.
10
10
11
- //! Functions for sampling data
11
+ //! Functions for randomly accessing and sampling sequences.
12
12
13
13
use super :: Rng ;
14
14
use std:: collections:: hash_map:: HashMap ;
15
15
16
- /// The `Sample` trait provides the `sample` method .
16
+ /// Randomly sample *up to* `amount` elements from a finite iterator .
17
17
///
18
- /// This is intended to be implemented for containers that:
19
- /// - Can be sampled in `O(amount)` time.
20
- /// - Whos items can be `cloned`.
18
+ /// The values are non-repeating but the order of elements returned is *not* random.
21
19
///
22
- /// If cloning is impossible or expensive, use `sample_ref` instead.
23
- pub trait Sample {
24
- /// The returned sampled data. Typically the either a `Vec<T>` or a new instance of the
25
- /// container's own type.
26
- type Sampled ;
27
-
28
- /// Return exactly `amount` randomly sampled values.
29
- ///
30
- /// Any type which implements `sample` should guarantee that:
31
- /// - Both the order and values of `Sampled` is random.
32
- /// - The implementation uses `O(amount)` speed and memory
33
- /// - The returned values are not references (if so, implement `SampleRef` instead).
34
- ///
35
- /// Panics if `amount > self.len()`
36
- ///
37
- /// # Example
38
- ///
39
- /// ```rust
40
- /// use rand::{thread_rng, Sample};
41
- ///
42
- /// let mut rng = thread_rng();
43
- /// let values = vec![5, 6, 1, 3, 4, 6, 7];
44
- /// println!("{:?}", values.sample(&mut rng, 3))
45
- /// ```
46
- fn sample < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Self :: Sampled ;
47
- }
48
-
49
- /// The `SampleRef` trait provides the `sample_ref` method.
50
- ///
51
- /// This is intended to be implemented for containers that which can be sampled in `O(amount)` time
52
- /// and want a fast way to give references to a sample of their items.
53
- pub trait SampleRef {
54
- /// The returned sampled data. Typically the either a `Vec<&T>` or a new instance of the
55
- /// container's own type containing references to the underlying data.
56
- type SampledRef ;
57
-
58
- /// Return exactly `amount` references to randomly sampled values.
59
- ///
60
- /// Any type which implements `sample_ref` should guarantee that:
61
- /// - Both the order and values of `SampledRef` is random.
62
- /// - The implementation uses `O(amount)` speed and memory.
63
- /// - The returned values are not copies/clones (if so, implement `Sample` instead).
64
- ///
65
- /// Panics if `amount > self.len()`
66
- ///
67
- /// # Example
68
- ///
69
- /// ```rust
70
- /// use rand::{thread_rng, SampleRef};
71
- ///
72
- /// let mut rng = thread_rng();
73
- /// let values = vec![5, 6, 1, 3, 4, 6, 7];
74
- /// println!("{:?}", values.as_slice().sample_ref(&mut rng, 3))
75
- /// ```
76
- fn sample_ref < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Self :: SampledRef ;
77
- }
78
-
79
- /// Randomly sample *up to* `amount` elements from a finite iterator using a reservoir.
20
+ /// This implementation uses `O(len(iterable))` time and `O(amount)` memory.
80
21
///
81
- /// The order of elements in the sample is not random. In fact, if `len(iterable) <= amount` then
82
- /// the output will be in the exact order they were collected.
83
- ///
84
- /// The reservoir method used allocates only an `Vec` of size `amount`. The size of the iterable
85
- /// does not affect the amount of memory used.
22
+ /// > If `len(iterable) <= amount` then the values will be in sequential order. In all other
23
+ /// > cases the order of the elements is neither random nor guaranteed.
86
24
///
87
25
/// # Example
88
26
///
89
27
/// ```rust
90
- /// use rand::{thread_rng, sample_reservoir };
28
+ /// use rand::{thread_rng, seq };
91
29
///
92
30
/// let mut rng = thread_rng();
93
- /// let sample = sample_reservoir(&mut rng, 1..100, 5);
31
+ /// let sample = seq:: sample_reservoir(&mut rng, 1..100, 5);
94
32
/// println!("{:?}", sample);
95
33
/// ```
96
34
pub fn sample_reservoir < T , I , R > ( rng : & mut R , iterable : I , amount : usize ) -> Vec < T >
@@ -110,18 +48,73 @@ pub fn sample_reservoir<T, I, R>(rng: &mut R, iterable: I, amount: usize) -> Vec
110
48
}
111
49
}
112
50
}
51
+ // There is a rare corner case where `size(iterable) <<< amount`,
52
+ // we don't want to be hanging onto exra memory.
53
+ reservoir. shrink_to_fit ( ) ;
113
54
reservoir
114
55
}
115
56
116
- /// Sample (non-repeating) exactly `amount` of indices from a sequence of the given `length`.
57
+ /// Randomly sample exactly `amount` values from `slice`.
58
+ ///
59
+ /// The values are non-repeating and in random order.
60
+ ///
61
+ /// This implementation uses `O(amount)` time and memory.
62
+ ///
63
+ /// Panics if `amount > self.len()`
64
+ ///
65
+ /// # Example
66
+ ///
67
+ /// ```rust
68
+ /// use rand::{thread_rng, seq};
69
+ ///
70
+ /// let mut rng = thread_rng();
71
+ /// let values = vec![5, 6, 1, 3, 4, 6, 7];
72
+ /// println!("{:?}", seq::sample_slice(&mut rng, &values, 3));
73
+ /// ```
74
+ pub fn sample_slice < R : Rng , T : Clone > ( rng : & mut R , slice : & [ T ] , amount : usize ) -> Vec < T > {
75
+ let indices = sample_indices ( rng, slice. len ( ) , amount) ;
76
+
77
+ let mut out = Vec :: with_capacity ( amount) ;
78
+ out. extend ( indices. iter ( ) . map ( |i| slice[ * i] . clone ( ) ) ) ;
79
+ out
80
+ }
81
+
82
+ /// Randomly sample exactly `amount` references from `slice`.
83
+ ///
84
+ /// The references are non-repeating and in random order.
117
85
///
118
- /// The returned elements and their order are random .
86
+ /// This implementation uses `O(amount)` time and memory .
119
87
///
120
- /// Panics if `amount > length`
88
+ /// Panics if `amount > self.len()`
89
+ ///
90
+ /// # Example
91
+ ///
92
+ /// ```rust
93
+ /// use rand::{thread_rng, seq};
94
+ ///
95
+ /// let mut rng = thread_rng();
96
+ /// let values = vec![5, 6, 1, 3, 4, 6, 7];
97
+ /// println!("{:?}", seq::sample_slice_ref(&mut rng, &values, 3));
98
+ /// ```
99
+ pub fn sample_slice_ref < ' a , R : Rng , T > ( rng : & mut R , slice : & ' a [ T ] , amount : usize ) -> Vec < & ' a T > {
100
+ let indices = sample_indices ( rng, slice. len ( ) , amount) ;
101
+
102
+ let mut out = Vec :: with_capacity ( amount) ;
103
+ out. extend ( indices. iter ( ) . map ( |i| & slice[ * i] ) ) ;
104
+ out
105
+ }
106
+
107
+ /// Randomly sample exactly `amount` indices from `0..length`.
121
108
///
122
- /// TODO: IMO this should be made public since it can be generally useful, although
123
- /// there might be a way to make the output type more generic/compact.
124
- fn sample_indices < R > ( rng : & mut R , length : usize , amount : usize ) -> Vec < usize >
109
+ /// The values are non-repeating and in random order.
110
+ ///
111
+ /// This implementation uses `O(amount)` time and memory.
112
+ ///
113
+ /// This method is used internally by the slice sampling methods, but it can sometimes be useful to
114
+ /// have the indices themselves so this is provided as an alternative.
115
+ ///
116
+ /// Panics if `amount > self.len()`
117
+ pub fn sample_indices < R > ( rng : & mut R , length : usize , amount : usize ) -> Vec < usize >
125
118
where R : Rng ,
126
119
{
127
120
if amount > length {
@@ -132,7 +125,7 @@ fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize>
132
125
// if we use the `cached` version we will have to allocate `amount` as a HashMap as well since
133
126
// it inserts an element for every loop.
134
127
//
135
- // Therefore, if amount >= length / 2, inplace will be both faster and use less memory.
128
+ // Therefore, if ` amount >= length / 2` then inplace will be both faster and use less memory.
136
129
//
137
130
// TODO: there is probably even more fine-tuning that can be done here since
138
131
// `HashMap::with_capacity(amount)` probably allocates more than `amount` in practice,
@@ -156,23 +149,25 @@ fn sample_indices_inplace<R>(rng: &mut R, length: usize, amount: usize) -> Vec<u
156
149
where R : Rng ,
157
150
{
158
151
debug_assert ! ( amount <= length) ;
159
- let amount = if amount == length {
152
+ let mut indices: Vec < usize > = Vec :: with_capacity ( length) ;
153
+ indices. extend ( 0 ..length) ;
154
+ let end_i = if length != 0 && amount == length {
160
155
// It isn't necessary to shuffle the final element if we are shuffling
161
156
// the whole array... it would just be shuffled with itself
157
+ //
158
+ // Also, `rng.gen_range(i, i)` panics.
162
159
amount - 1
163
160
} else {
164
161
amount
165
162
} ;
166
-
167
- let mut indices: Vec < usize > = Vec :: with_capacity ( length) ;
168
- indices. extend ( 0 ..length) ;
169
- for i in 0 ..amount {
163
+ for i in 0 ..end_i {
170
164
let j: usize = rng. gen_range ( i, length) ;
171
165
let tmp = indices[ i] ;
172
166
indices[ i] = indices[ j] ;
173
167
indices[ j] = tmp;
174
168
}
175
169
indices. truncate ( amount) ;
170
+ debug_assert_eq ! ( indices. len( ) , amount) ;
176
171
indices
177
172
}
178
173
@@ -213,52 +208,10 @@ fn sample_indices_cache<R>(
213
208
// note that in the inplace version, slice[i] is automatically "returned" value
214
209
out. push ( x) ;
215
210
}
211
+ debug_assert_eq ! ( out. len( ) , amount) ;
216
212
out
217
213
}
218
214
219
- impl < ' a , T : Clone > Sample for & ' a [ T ] {
220
- type Sampled = Vec < T > ;
221
-
222
- fn sample < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Vec < T > {
223
- let indices = sample_indices ( rng, self . len ( ) , amount) ;
224
-
225
- let mut out = Vec :: with_capacity ( amount) ;
226
- out. extend ( indices. iter ( ) . map ( |i| self [ * i] . clone ( ) ) ) ;
227
- out
228
- }
229
- }
230
-
231
- impl < ' a , T : Clone > Sample for Vec < T > {
232
- type Sampled = Vec < T > ;
233
-
234
- fn sample < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Vec < T > {
235
- self . as_slice ( ) . sample ( rng, amount)
236
- }
237
- }
238
-
239
- impl < ' a , T > SampleRef for & ' a [ T ] {
240
- type SampledRef = Vec < & ' a T > ;
241
-
242
- fn sample_ref < R : Rng > ( & self , rng : & mut R , amount : usize ) -> Vec < & ' a T > {
243
- let indices = sample_indices ( rng, self . len ( ) , amount) ;
244
-
245
- let mut out = Vec :: with_capacity ( amount) ;
246
- out. extend ( indices. iter ( ) . map ( |i| & self [ * i] ) ) ;
247
- out
248
- }
249
- }
250
-
251
- // TODO: It looks like implementing this depends on RFC 1598 being implemented.
252
- // See this: https://github.com/rust-lang/rfcs/issues/1965
253
- //
254
- // impl<'a, T> SampleRef for Vec<&'a T>{
255
- // type SampledRef = Vec<&'a T>;
256
- //
257
- // fn sample_ref<R: Rng>(&'a self, rng: &mut R, amount: usize) -> Vec<&'a T> {
258
- // self.as_slice().sample_ref(rng, amount)
259
- // }
260
- // }
261
-
262
215
#[ cfg( test) ]
263
216
mod test {
264
217
use super :: * ;
@@ -281,11 +234,28 @@ mod test {
281
234
* * e >= min_val && * * e <= max_val
282
235
} ) ) ;
283
236
}
237
+ #[ test]
238
+ fn test_sample_slice_boundaries ( ) {
239
+ let empty: & [ u8 ] = & [ ] ;
240
+
241
+ let mut r = thread_rng ( ) ;
242
+
243
+ // sample 0 items
244
+ assert_eq ! ( sample_slice( & mut r, empty, 0 ) , vec![ ] ) ;
245
+ assert_eq ! ( sample_slice( & mut r, & [ 42 , 2 , 42 ] , 0 ) , vec![ ] ) ;
246
+
247
+ // sample 1 item
248
+ assert_eq ! ( sample_slice( & mut r, & [ 42 ] , 1 ) , vec![ 42 ] ) ;
249
+ let v = sample_slice ( & mut r, & [ 1 , 42 ] , 1 ) [ 0 ] ;
250
+ assert ! ( v == 1 || v == 42 ) ;
251
+
252
+ // sample "all" the items
253
+ let v = sample_slice ( & mut r, & [ 42 , 133 ] , 2 ) ;
254
+ assert ! ( v == vec![ 42 , 133 ] || v == vec![ 133 , 42 ] ) ;
255
+ }
284
256
285
257
#[ test]
286
- /// This test mainly works by asserting that the two cases are equivalent,
287
- /// as well as equivalent to the exported function.
288
- fn test_sample_indices ( ) {
258
+ fn test_sample_slice ( ) {
289
259
let xor_rng = XorShiftRng :: from_seed;
290
260
291
261
let max_range = 100 ;
@@ -299,7 +269,7 @@ mod test {
299
269
300
270
println ! ( "Selecting indices: len={}, amount={}, seed={:?}" , length, amount, seed) ;
301
271
302
- // assert that the two methods give exactly the same result
272
+ // assert that the two index methods give exactly the same result
303
273
let inplace = sample_indices_inplace (
304
274
& mut xor_rng ( seed) , length, amount) ;
305
275
let cache = sample_indices_cache (
@@ -313,15 +283,15 @@ mod test {
313
283
assert ! ( regular. iter( ) . all( |e| * e < length) ) ;
314
284
assert_eq ! ( regular, inplace) ;
315
285
316
- // just for fun, also test sampling from a vector
286
+ // also test that sampling the slice works
317
287
let vec: Vec < usize > = ( 0 ..length) . collect ( ) ;
318
288
{
319
- let result = vec . sample ( & mut xor_rng ( seed) , amount) ;
289
+ let result = sample_slice ( & mut xor_rng ( seed) , & vec , amount) ;
320
290
assert_eq ! ( result, regular) ;
321
291
}
322
292
323
293
{
324
- let result = vec . as_slice ( ) . sample_ref ( & mut xor_rng ( seed) , amount) ;
294
+ let result = sample_slice_ref ( & mut xor_rng ( seed) , & vec , amount) ;
325
295
let expected = regular. iter ( ) . map ( |v| v) . collect :: < Vec < _ > > ( ) ;
326
296
assert_eq ! ( result, expected) ;
327
297
}
0 commit comments