Skip to content

Commit 9723127

Browse files
committed
fixing comments
1 parent 3e9eac0 commit 9723127

File tree

2 files changed

+107
-141
lines changed

2 files changed

+107
-141
lines changed

src/lib.rs

+4-8
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@
184184
//! // where the car is. The game host will never open the door with the car.
185185
//! fn game_host_open<R: Rng>(car: u32, choice: u32, rng: &mut R) -> u32 {
186186
//! let choices = free_doors(&[car, choice]);
187-
//! rand::sample_reservoir(rng, choices.into_iter(), 1)[0]
187+
//! rand::seq::sample_slice(rng, &choices, 1)[0]
188188
//! }
189189
//!
190190
//! // Returns the door we switch to, given our current choice and
@@ -260,12 +260,8 @@ pub use os::OsRng;
260260

261261
pub use isaac::{IsaacRng, Isaac64Rng};
262262
pub use chacha::ChaChaRng;
263-
pub use sample::{
264-
// TODO: `sample` name will be deprecated in 1.0, use `sample_reservoir` instead
265-
sample_reservoir as sample,
266-
sample_reservoir,
267-
Sample,
268-
SampleRef};
263+
#[deprecated(since="0.3.18", note="renamed to seq::sample_reservoir")]
264+
pub use seq::{sample_reservoir as sample};
269265

270266
#[cfg(target_pointer_width = "32")]
271267
use IsaacRng as IsaacWordRng;
@@ -282,7 +278,7 @@ pub mod reseeding;
282278
mod rand_impls;
283279
pub mod os;
284280
pub mod read;
285-
mod sample;
281+
pub mod seq;
286282

287283
#[allow(bad_style)]
288284
type w64 = w<u64>;

src/sample.rs src/seq.rs

+103-133
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2013-2014 The Rust Project Developers. See the COPYRIGHT
1+
// Copyright 2017 The Rust Project Developers. See the COPYRIGHT
22
// file at the top-level directory of this distribution and at
33
// http://rust-lang.org/COPYRIGHT.
44
//
@@ -8,89 +8,27 @@
88
// option. This file may not be copied, modified, or distributed
99
// except according to those terms.
1010

11-
//! Functions for sampling data
11+
//! Functions for randomly accessing and sampling sequences.
1212
1313
use super::Rng;
1414
use std::collections::hash_map::HashMap;
1515

16-
/// The `Sample` trait provides the `sample` method.
16+
/// Randomly sample *up to* `amount` elements from a finite iterator.
1717
///
18-
/// This is intended to be implemented for containers that:
19-
/// - Can be sampled in `O(amount)` time.
20-
/// - Whos items can be `cloned`.
18+
/// The values are non-repeating but the order of elements returned is *not* random.
2119
///
22-
/// If cloning is impossible or expensive, use `sample_ref` instead.
23-
pub trait Sample {
24-
/// The returned sampled data. Typically the either a `Vec<T>` or a new instance of the
25-
/// container's own type.
26-
type Sampled;
27-
28-
/// Return exactly `amount` randomly sampled values.
29-
///
30-
/// Any type which implements `sample` should guarantee that:
31-
/// - Both the order and values of `Sampled` is random.
32-
/// - The implementation uses `O(amount)` speed and memory
33-
/// - The returned values are not references (if so, implement `SampleRef` instead).
34-
///
35-
/// Panics if `amount > self.len()`
36-
///
37-
/// # Example
38-
///
39-
/// ```rust
40-
/// use rand::{thread_rng, Sample};
41-
///
42-
/// let mut rng = thread_rng();
43-
/// let values = vec![5, 6, 1, 3, 4, 6, 7];
44-
/// println!("{:?}", values.sample(&mut rng, 3))
45-
/// ```
46-
fn sample<R: Rng>(&self, rng: &mut R, amount: usize) -> Self::Sampled;
47-
}
48-
49-
/// The `SampleRef` trait provides the `sample_ref` method.
50-
///
51-
/// This is intended to be implemented for containers that which can be sampled in `O(amount)` time
52-
/// and want a fast way to give references to a sample of their items.
53-
pub trait SampleRef {
54-
/// The returned sampled data. Typically the either a `Vec<&T>` or a new instance of the
55-
/// container's own type containing references to the underlying data.
56-
type SampledRef;
57-
58-
/// Return exactly `amount` references to randomly sampled values.
59-
///
60-
/// Any type which implements `sample_ref` should guarantee that:
61-
/// - Both the order and values of `SampledRef` is random.
62-
/// - The implementation uses `O(amount)` speed and memory.
63-
/// - The returned values are not copies/clones (if so, implement `Sample` instead).
64-
///
65-
/// Panics if `amount > self.len()`
66-
///
67-
/// # Example
68-
///
69-
/// ```rust
70-
/// use rand::{thread_rng, SampleRef};
71-
///
72-
/// let mut rng = thread_rng();
73-
/// let values = vec![5, 6, 1, 3, 4, 6, 7];
74-
/// println!("{:?}", values.as_slice().sample_ref(&mut rng, 3))
75-
/// ```
76-
fn sample_ref<R: Rng>(&self, rng: &mut R, amount: usize) -> Self::SampledRef;
77-
}
78-
79-
/// Randomly sample *up to* `amount` elements from a finite iterator using a reservoir.
20+
/// This implementation uses `O(len(iterable))` time and `O(amount)` memory.
8021
///
81-
/// The order of elements in the sample is not random. In fact, if `len(iterable) <= amount` then
82-
/// the output will be in the exact order they were collected.
83-
///
84-
/// The reservoir method used allocates only an `Vec` of size `amount`. The size of the iterable
85-
/// does not affect the amount of memory used.
22+
/// > If `len(iterable) <= amount` then the values will be in sequential order. In all other
23+
/// > cases the order of the elements is neither random nor guaranteed.
8624
///
8725
/// # Example
8826
///
8927
/// ```rust
90-
/// use rand::{thread_rng, sample_reservoir};
28+
/// use rand::{thread_rng, seq};
9129
///
9230
/// let mut rng = thread_rng();
93-
/// let sample = sample_reservoir(&mut rng, 1..100, 5);
31+
/// let sample = seq::sample_reservoir(&mut rng, 1..100, 5);
9432
/// println!("{:?}", sample);
9533
/// ```
9634
pub fn sample_reservoir<T, I, R>(rng: &mut R, iterable: I, amount: usize) -> Vec<T>
@@ -110,18 +48,73 @@ pub fn sample_reservoir<T, I, R>(rng: &mut R, iterable: I, amount: usize) -> Vec
11048
}
11149
}
11250
}
51+
// There is a rare corner case where `size(iterable) <<< amount`,
52+
// we don't want to be hanging onto exra memory.
53+
reservoir.shrink_to_fit();
11354
reservoir
11455
}
11556

116-
/// Sample (non-repeating) exactly `amount` of indices from a sequence of the given `length`.
57+
/// Randomly sample exactly `amount` values from `slice`.
58+
///
59+
/// The values are non-repeating and in random order.
60+
///
61+
/// This implementation uses `O(amount)` time and memory.
62+
///
63+
/// Panics if `amount > self.len()`
64+
///
65+
/// # Example
66+
///
67+
/// ```rust
68+
/// use rand::{thread_rng, seq};
69+
///
70+
/// let mut rng = thread_rng();
71+
/// let values = vec![5, 6, 1, 3, 4, 6, 7];
72+
/// println!("{:?}", seq::sample_slice(&mut rng, &values, 3));
73+
/// ```
74+
pub fn sample_slice<R: Rng, T: Clone>(rng: &mut R, slice: &[T], amount: usize) -> Vec<T> {
75+
let indices = sample_indices(rng, slice.len(), amount);
76+
77+
let mut out = Vec::with_capacity(amount);
78+
out.extend(indices.iter().map(|i| slice[*i].clone()));
79+
out
80+
}
81+
82+
/// Randomly sample exactly `amount` references from `slice`.
83+
///
84+
/// The references are non-repeating and in random order.
11785
///
118-
/// The returned elements and their order are random.
86+
/// This implementation uses `O(amount)` time and memory.
11987
///
120-
/// Panics if `amount > length`
88+
/// Panics if `amount > self.len()`
89+
///
90+
/// # Example
91+
///
92+
/// ```rust
93+
/// use rand::{thread_rng, seq};
94+
///
95+
/// let mut rng = thread_rng();
96+
/// let values = vec![5, 6, 1, 3, 4, 6, 7];
97+
/// println!("{:?}", seq::sample_slice_ref(&mut rng, &values, 3));
98+
/// ```
99+
pub fn sample_slice_ref<'a, R: Rng, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T> {
100+
let indices = sample_indices(rng, slice.len(), amount);
101+
102+
let mut out = Vec::with_capacity(amount);
103+
out.extend(indices.iter().map(|i| &slice[*i]));
104+
out
105+
}
106+
107+
/// Randomly sample exactly `amount` indices from `0..length`.
121108
///
122-
/// TODO: IMO this should be made public since it can be generally useful, although
123-
/// there might be a way to make the output type more generic/compact.
124-
fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize>
109+
/// The values are non-repeating and in random order.
110+
///
111+
/// This implementation uses `O(amount)` time and memory.
112+
///
113+
/// This method is used internally by the slice sampling methods, but it can sometimes be useful to
114+
/// have the indices themselves so this is provided as an alternative.
115+
///
116+
/// Panics if `amount > self.len()`
117+
pub fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize>
125118
where R: Rng,
126119
{
127120
if amount > length {
@@ -132,7 +125,7 @@ fn sample_indices<R>(rng: &mut R, length: usize, amount: usize) -> Vec<usize>
132125
// if we use the `cached` version we will have to allocate `amount` as a HashMap as well since
133126
// it inserts an element for every loop.
134127
//
135-
// Therefore, if amount >= length / 2, inplace will be both faster and use less memory.
128+
// Therefore, if `amount >= length / 2` then inplace will be both faster and use less memory.
136129
//
137130
// TODO: there is probably even more fine-tuning that can be done here since
138131
// `HashMap::with_capacity(amount)` probably allocates more than `amount` in practice,
@@ -156,23 +149,25 @@ fn sample_indices_inplace<R>(rng: &mut R, length: usize, amount: usize) -> Vec<u
156149
where R: Rng,
157150
{
158151
debug_assert!(amount <= length);
159-
let amount = if amount == length {
152+
let mut indices: Vec<usize> = Vec::with_capacity(length);
153+
indices.extend(0..length);
154+
let end_i = if length != 0 && amount == length {
160155
// It isn't necessary to shuffle the final element if we are shuffling
161156
// the whole array... it would just be shuffled with itself
157+
//
158+
// Also, `rng.gen_range(i, i)` panics.
162159
amount - 1
163160
} else {
164161
amount
165162
};
166-
167-
let mut indices: Vec<usize> = Vec::with_capacity(length);
168-
indices.extend(0..length);
169-
for i in 0..amount {
163+
for i in 0..end_i {
170164
let j: usize = rng.gen_range(i, length);
171165
let tmp = indices[i];
172166
indices[i] = indices[j];
173167
indices[j] = tmp;
174168
}
175169
indices.truncate(amount);
170+
debug_assert_eq!(indices.len(), amount);
176171
indices
177172
}
178173

@@ -213,52 +208,10 @@ fn sample_indices_cache<R>(
213208
// note that in the inplace version, slice[i] is automatically "returned" value
214209
out.push(x);
215210
}
211+
debug_assert_eq!(out.len(), amount);
216212
out
217213
}
218214

219-
impl<'a, T: Clone> Sample for &'a [T] {
220-
type Sampled = Vec<T>;
221-
222-
fn sample<R: Rng>(&self, rng: &mut R, amount: usize) -> Vec<T> {
223-
let indices = sample_indices(rng, self.len(), amount);
224-
225-
let mut out = Vec::with_capacity(amount);
226-
out.extend(indices.iter().map(|i| self[*i].clone()));
227-
out
228-
}
229-
}
230-
231-
impl<'a, T: Clone> Sample for Vec<T> {
232-
type Sampled = Vec<T>;
233-
234-
fn sample<R: Rng>(&self, rng: &mut R, amount: usize) -> Vec<T> {
235-
self.as_slice().sample(rng, amount)
236-
}
237-
}
238-
239-
impl<'a, T> SampleRef for &'a [T] {
240-
type SampledRef = Vec<&'a T>;
241-
242-
fn sample_ref<R: Rng>(&self, rng: &mut R, amount: usize) -> Vec<&'a T> {
243-
let indices = sample_indices(rng, self.len(), amount);
244-
245-
let mut out = Vec::with_capacity(amount);
246-
out.extend(indices.iter().map(|i| &self[*i]));
247-
out
248-
}
249-
}
250-
251-
// TODO: It looks like implementing this depends on RFC 1598 being implemented.
252-
// See this: https://github.com/rust-lang/rfcs/issues/1965
253-
//
254-
// impl<'a, T> SampleRef for Vec<&'a T>{
255-
// type SampledRef = Vec<&'a T>;
256-
//
257-
// fn sample_ref<R: Rng>(&'a self, rng: &mut R, amount: usize) -> Vec<&'a T> {
258-
// self.as_slice().sample_ref(rng, amount)
259-
// }
260-
// }
261-
262215
#[cfg(test)]
263216
mod test {
264217
use super::*;
@@ -281,11 +234,28 @@ mod test {
281234
**e >= min_val && **e <= max_val
282235
}));
283236
}
237+
#[test]
238+
fn test_sample_slice_boundaries() {
239+
let empty: &[u8] = &[];
240+
241+
let mut r = thread_rng();
242+
243+
// sample 0 items
244+
assert_eq!(sample_slice(&mut r, empty, 0), vec![]);
245+
assert_eq!(sample_slice(&mut r, &[42, 2, 42], 0), vec![]);
246+
247+
// sample 1 item
248+
assert_eq!(sample_slice(&mut r, &[42], 1), vec![42]);
249+
let v = sample_slice(&mut r, &[1, 42], 1)[0];
250+
assert!(v == 1 || v == 42);
251+
252+
// sample "all" the items
253+
let v = sample_slice(&mut r, &[42, 133], 2);
254+
assert!(v == vec![42, 133] || v == vec![133, 42]);
255+
}
284256

285257
#[test]
286-
/// This test mainly works by asserting that the two cases are equivalent,
287-
/// as well as equivalent to the exported function.
288-
fn test_sample_indices() {
258+
fn test_sample_slice() {
289259
let xor_rng = XorShiftRng::from_seed;
290260

291261
let max_range = 100;
@@ -299,7 +269,7 @@ mod test {
299269

300270
println!("Selecting indices: len={}, amount={}, seed={:?}", length, amount, seed);
301271

302-
// assert that the two methods give exactly the same result
272+
// assert that the two index methods give exactly the same result
303273
let inplace = sample_indices_inplace(
304274
&mut xor_rng(seed), length, amount);
305275
let cache = sample_indices_cache(
@@ -313,15 +283,15 @@ mod test {
313283
assert!(regular.iter().all(|e| *e < length));
314284
assert_eq!(regular, inplace);
315285

316-
// just for fun, also test sampling from a vector
286+
// also test that sampling the slice works
317287
let vec: Vec<usize> = (0..length).collect();
318288
{
319-
let result = vec.sample(&mut xor_rng(seed), amount);
289+
let result = sample_slice(&mut xor_rng(seed), &vec, amount);
320290
assert_eq!(result, regular);
321291
}
322292

323293
{
324-
let result = vec.as_slice().sample_ref(&mut xor_rng(seed), amount);
294+
let result = sample_slice_ref(&mut xor_rng(seed), &vec, amount);
325295
let expected = regular.iter().map(|v| v).collect::<Vec<_>>();
326296
assert_eq!(result, expected);
327297
}

0 commit comments

Comments
 (0)