From 81b7f410ae3623b1ea3a479a67b77fb65f3bc696 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Sat, 26 May 2018 18:42:34 +0100 Subject: [PATCH 1/7] Sequence functionality: API revisions --- src/prelude.rs | 1 + src/seq.rs | 159 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 157 insertions(+), 3 deletions(-) diff --git a/src/prelude.rs b/src/prelude.rs index 358c2370823..ace5c89d700 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -26,3 +26,4 @@ #[doc(no_inline)] #[cfg(feature="std")] pub use rngs::ThreadRng; #[doc(no_inline)] pub use {Rng, RngCore, CryptoRng, SeedableRng}; #[doc(no_inline)] #[cfg(feature="std")] pub use {FromEntropy, random, thread_rng}; +#[doc(no_inline)] pub use seq::{SliceRandom, IteratorRandom}; diff --git a/src/seq.rs b/src/seq.rs index 4456183a9da..6454cb58675 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -9,9 +9,8 @@ // except according to those terms. //! Functions for randomly accessing and sampling sequences. - -#[cfg(feature = "alloc")] -use super::Rng; +//! +//! TODO: module doc // BTreeMap is not as fast in tests, but better than nothing. #[cfg(feature="std")] use std::collections::HashMap; @@ -19,6 +18,160 @@ use super::Rng; #[cfg(all(feature="alloc", not(feature="std")))] use alloc::Vec; +use super::Rng; + + +/// Extension trait on slices, providing random mutation and sampling methods. +/// +/// An implementation is provided for slices. This may also be implementable for +/// other types. +pub trait SliceRandom { + /// The element type. + type Item; + + /// Returns a reference to one random element of the slice, or `None` if the + /// slice is empty. + /// + /// Depending on the implementation, complexity is expected to be `O(1)`. + fn choose(&self, rng: &mut R) -> Option<&Self::Item> + where R: Rng + ?Sized; + + /// Returns a mutable reference to one random element of the slice, or + /// `None` if the slice is empty. + /// + /// Depending on the implementation, complexity is expected to be `O(1)`. + fn choose_mut(&mut self, rng: &mut R) -> Option<&mut Self::Item> + where R: Rng + ?Sized; + + /// Produces an iterator that chooses `amount` elements from the slice at + /// random without repeating any. + /// + /// In case this API is not sufficiently flexible, use `sample_indices` then + /// apply the indices to the slice. + /// + /// Although the elements are selected randomly, the order of returned + /// elements is neither stable nor fully random. If random ordering is + /// desired, either use `partial_shuffle` or use this method and shuffle + /// the result. If stable order is desired, use `sample_indices`, sort the + /// result, then apply to the slice. + /// + /// Complexity is expected to be the same as `sample_indices`. + #[cfg(feature = "alloc")] + fn choose_multiple(&self, rng: &mut R, amount: usize) -> Vec<&Self::Item> + where R: Rng + ?Sized; + + /// Shuffle a slice in place. + /// + /// Depending on the implementation, complexity is expected to be `O(1)`. + fn shuffle(&mut self, rng: &mut R) where R: Rng + ?Sized; + + /// Shuffle a slice in place, but exit early. + /// + /// Returns two mutable slices from the source slice. The first contains + /// `amount` elements randomly permuted. The second has the remaining + /// elements that are not fully shuffled. + /// + /// This is an efficient method to select `amount` elements at random from + /// the slice, provided the slice may be mutated. + /// + /// If you only need to choose elements randomly and `amount > self.len()/2` + /// then you may improve performance by taking + /// `amount = values.len() - amount` and using only the second slice. + /// + /// If `amount` is greater than the number of elements in the slice, this + /// will perform a full shuffle. + /// + /// Complexity is expected to be `O(m)` where `m = amount`. + fn partial_shuffle(&mut self, rng: &mut R, amount: usize) + -> (&mut [Self::Item], &mut [Self::Item]) where R: Rng + ?Sized; +} + +/// Extension trait on iterators, providing random sampling methods. +pub trait IteratorRandom: Iterator + Sized { + /// Choose one element at random from the iterator. + /// + /// Returns `None` if and only if the iterator is empty. + /// + /// Complexity is `O(n)`, where `n` is the length of the iterator. + /// This likely consumes multiple random numbers, but the exact number + /// is unspecified. + fn choose(self, rng: &mut R) -> Option + where R: Rng + ?Sized + { + unimplemented!() + } + + /// Collects `amount` values at random from the iterator into a supplied + /// buffer. + /// + /// Returns the number of elements added to the buffer. This equals `amount` + /// unless the iterator contains insufficient elements, in which case this + /// equals the number of elements available. + /// + /// Complexity is TODO + fn choose_multiple_fill(self, rng: &mut R, amount: usize) -> usize + where R: Rng + ?Sized + { + unimplemented!() + } + + /// Collects `amount` values at random from the iterator into a vector. + /// + /// This is a convenience wrapper around `choose_multiple_fill`. + /// + /// The length of the returned vector equals `amount` unless the iterator + /// contains insufficient elements, in which case it equals the number of + /// elements available. + /// + /// Complexity is TODO + #[cfg(feature = "alloc")] + fn choose_multiple(self, rng: &mut R, amount: usize) -> Vec + where R: Rng + ?Sized + { + // Note: I think this must use unsafe to create an uninitialised buffer, then restrict length + unimplemented!() + } +} + + +impl SliceRandom for [T] { + type Item = T; + + fn choose(&self, rng: &mut R) -> Option<&Self::Item> + where R: Rng + ?Sized + { + unimplemented!() + } + + fn choose_mut(&mut self, rng: &mut R) -> Option<&mut Self::Item> + where R: Rng + ?Sized + { + unimplemented!() + } + + #[cfg(feature = "alloc")] + fn choose_multiple(&self, rng: &mut R, amount: usize) -> Vec<&Self::Item> + where R: Rng + ?Sized + { + unimplemented!() + } + + fn shuffle(&mut self, rng: &mut R) where R: Rng + ?Sized + { + unimplemented!() + } + + fn partial_shuffle(&mut self, rng: &mut R, amount: usize) + -> (&mut [Self::Item], &mut [Self::Item]) where R: Rng + ?Sized + { + unimplemented!() + } +} + +// ——— +// TODO: remove below methods once implemented above +// ——— + /// Randomly sample `amount` elements from a finite iterator. /// /// The following can be returned: From f64ce3ae7c7194a1d63202185eae418499e09c66 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Sun, 27 May 2018 12:22:15 +0100 Subject: [PATCH 2/7] Move choose, choose_mut and shuffle from Rng to SliceExt --- src/lib.rs | 102 ++++++++------------------------------------- src/rngs/thread.rs | 4 -- src/seq.rs | 85 +++++++++++++++++++++++++++++++++---- 3 files changed, 96 insertions(+), 95 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 3d4e7b315b8..51e06bc5fff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -588,64 +588,35 @@ pub trait Rng: RngCore { /// Return a random element from `values`. /// - /// Return `None` if `values` is empty. - /// - /// # Example - /// - /// ``` - /// use rand::{thread_rng, Rng}; - /// - /// let choices = [1, 2, 4, 8, 16, 32]; - /// let mut rng = thread_rng(); - /// println!("{:?}", rng.choose(&choices)); - /// assert_eq!(rng.choose(&choices[..0]), None); - /// ``` + /// Deprecated: use [`SliceRandom::choose`] instead. + /// + /// [`SliceRandom::choose`]: seq/trait.SliceRandom.html#method.choose + #[deprecated(since="0.6.0", note="use SliceRandom::choose instead")] fn choose<'a, T>(&mut self, values: &'a [T]) -> Option<&'a T> { - if values.is_empty() { - None - } else { - Some(&values[self.gen_range(0, values.len())]) - } + use seq::SliceRandom; + values.choose(self) } /// Return a mutable pointer to a random element from `values`. /// - /// Return `None` if `values` is empty. + /// Deprecated: use [`SliceRandom::choose_mut`] instead. + /// + /// [`SliceRandom::choose_mut`]: seq/trait.SliceRandom.html#method.choose_mut + #[deprecated(since="0.6.0", note="use SliceRandom::choose_mut instead")] fn choose_mut<'a, T>(&mut self, values: &'a mut [T]) -> Option<&'a mut T> { - if values.is_empty() { - None - } else { - let len = values.len(); - Some(&mut values[self.gen_range(0, len)]) - } + use seq::SliceRandom; + values.choose_mut(self) } /// Shuffle a mutable slice in place. /// - /// This applies Durstenfeld's algorithm for the [Fisher–Yates shuffle]( - /// https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm) - /// which produces an unbiased permutation. - /// - /// # Example - /// - /// ``` - /// use rand::{thread_rng, Rng}; - /// - /// let mut rng = thread_rng(); - /// let mut y = [1, 2, 3]; - /// rng.shuffle(&mut y); - /// println!("{:?}", y); - /// rng.shuffle(&mut y); - /// println!("{:?}", y); - /// ``` + /// Deprecated: use [`SliceRandom::shuffle`] instead. + /// + /// [`SliceRandom::shuffle`]: seq/trait.SliceRandom.html#method.shuffle + #[deprecated(since="0.6.0", note="use SliceRandom::shuffle instead")] fn shuffle(&mut self, values: &mut [T]) { - let mut i = values.len(); - while i >= 2 { - // invariant: elements with index >= i have been locked in place. - i -= 1; - // lock element i in place. - values.swap(i, self.gen_range(0, i + 1)); - } + use seq::SliceRandom; + values.shuffle(self) } } @@ -999,35 +970,6 @@ mod test { } } - #[test] - fn test_choose() { - let mut r = rng(107); - assert_eq!(r.choose(&[1, 1, 1]).map(|&x|x), Some(1)); - - let v: &[isize] = &[]; - assert_eq!(r.choose(v), None); - } - - #[test] - fn test_shuffle() { - let mut r = rng(108); - let empty: &mut [isize] = &mut []; - r.shuffle(empty); - let mut one = [1]; - r.shuffle(&mut one); - let b: &[_] = &[1]; - assert_eq!(one, b); - - let mut two = [1, 2]; - r.shuffle(&mut two); - assert!(two == [1, 2] || two == [2, 1]); - - let mut x = [1, 1, 1]; - r.shuffle(&mut x); - let b: &[_] = &[1, 1, 1]; - assert_eq!(x, b); - } - #[test] fn test_rng_trait_object() { use distributions::{Distribution, Standard}; @@ -1035,10 +977,6 @@ mod test { let mut r = &mut rng as &mut RngCore; r.next_u32(); r.gen::(); - let mut v = [1, 1, 1]; - r.shuffle(&mut v); - let b: &[_] = &[1, 1, 1]; - assert_eq!(v, b); assert_eq!(r.gen_range(0, 1), 0); let _c: u8 = Standard.sample(&mut r); } @@ -1051,10 +989,6 @@ mod test { let mut r = Box::new(rng) as Box; r.next_u32(); r.gen::(); - let mut v = [1, 1, 1]; - r.shuffle(&mut v); - let b: &[_] = &[1, 1, 1]; - assert_eq!(v, b); assert_eq!(r.gen_range(0, 1), 0); let _c: u8 = Standard.sample(&mut r); } diff --git a/src/rngs/thread.rs b/src/rngs/thread.rs index 863b79d31c7..c6611db92ff 100644 --- a/src/rngs/thread.rs +++ b/src/rngs/thread.rs @@ -132,10 +132,6 @@ mod test { use Rng; let mut r = ::thread_rng(); r.gen::(); - let mut v = [1, 1, 1]; - r.shuffle(&mut v); - let b: &[_] = &[1, 1, 1]; - assert_eq!(v, b); assert_eq!(r.gen_range(0, 1), 0); } } diff --git a/src/seq.rs b/src/seq.rs index 6454cb58675..39ff5a68a45 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -33,6 +33,18 @@ pub trait SliceRandom { /// slice is empty. /// /// Depending on the implementation, complexity is expected to be `O(1)`. + /// + /// # Example + /// + /// ``` + /// use rand::thread_rng; + /// use rand::seq::SliceRandom; + /// + /// let choices = [1, 2, 4, 8, 16, 32]; + /// let mut rng = thread_rng(); + /// println!("{:?}", choices.choose(&mut rng)); + /// assert_eq!(choices[..0].choose(&mut rng), None); + /// ``` fn choose(&self, rng: &mut R) -> Option<&Self::Item> where R: Rng + ?Sized; @@ -60,9 +72,22 @@ pub trait SliceRandom { fn choose_multiple(&self, rng: &mut R, amount: usize) -> Vec<&Self::Item> where R: Rng + ?Sized; - /// Shuffle a slice in place. + /// Shuffle a mutable slice in place. /// /// Depending on the implementation, complexity is expected to be `O(1)`. + /// + /// # Example + /// + /// ``` + /// use rand::thread_rng; + /// use rand::seq::SliceRandom; + /// + /// let mut rng = thread_rng(); + /// let mut y = [1, 2, 3, 4, 5]; + /// println!("Unshuffled: {:?}", y); + /// y.shuffle(&mut rng); + /// println!("Shuffled: {:?}", y); + /// ``` fn shuffle(&mut self, rng: &mut R) where R: Rng + ?Sized; /// Shuffle a slice in place, but exit early. @@ -140,13 +165,22 @@ impl SliceRandom for [T] { fn choose(&self, rng: &mut R) -> Option<&Self::Item> where R: Rng + ?Sized { - unimplemented!() + if self.is_empty() { + None + } else { + Some(&self[rng.gen_range(0, self.len())]) + } } fn choose_mut(&mut self, rng: &mut R) -> Option<&mut Self::Item> where R: Rng + ?Sized { - unimplemented!() + if self.is_empty() { + None + } else { + let len = self.len(); + Some(&mut self[rng.gen_range(0, len)]) + } } #[cfg(feature = "alloc")] @@ -158,7 +192,10 @@ impl SliceRandom for [T] { fn shuffle(&mut self, rng: &mut R) where R: Rng + ?Sized { - unimplemented!() + for i in (1..self.len()).rev() { + // invariant: elements with index > i have been locked in place. + self.swap(i, rng.gen_range(0, i + 1)); + } } fn partial_shuffle(&mut self, rng: &mut R, amount: usize) @@ -381,15 +418,46 @@ fn sample_indices_cache( out } -#[cfg(all(test, feature = "alloc"))] +#[cfg(test)] mod test { use super::*; - use {Rng, SeedableRng}; - use prng::XorShiftRng; + use Rng; + #[cfg(feature = "alloc")] use SeedableRng; + #[cfg(feature = "alloc")] use prng::XorShiftRng; #[cfg(all(feature="alloc", not(feature="std")))] use alloc::Vec; #[test] + fn test_choose() { + let mut r = ::test::rng(107); + assert_eq!([1, 1, 1].choose(&mut r).map(|&x|x), Some(1)); + + let v: &[isize] = &[]; + assert_eq!(v.choose(&mut r), None); + } + + #[test] + fn test_shuffle() { + let mut r = ::test::rng(108); + let empty: &mut [isize] = &mut []; + empty.shuffle(&mut r); + let mut one = [1]; + one.shuffle(&mut r); + let b: &[_] = &[1]; + assert_eq!(one, b); + + let mut two = [1, 2]; + two.shuffle(&mut r); + assert!(two == [1, 2] || two == [2, 1]); + + let mut x = [1, 1, 1]; + x.shuffle(&mut r); + let b: &[_] = &[1, 1, 1]; + assert_eq!(x, b); + } + + #[test] + #[cfg(feature = "alloc")] fn test_sample_iter() { let min_val = 1; let max_val = 100; @@ -408,7 +476,9 @@ mod test { **e >= min_val && **e <= max_val })); } + #[test] + #[cfg(feature = "alloc")] fn test_sample_slice_boundaries() { let empty: &[u8] = &[]; @@ -453,6 +523,7 @@ mod test { } #[test] + #[cfg(feature = "alloc")] fn test_sample_slice() { let xor_rng = XorShiftRng::from_seed; From 545895b84d7f70756bd48193eb614c5c4b6a27be Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Sun, 27 May 2018 14:57:50 +0100 Subject: [PATCH 3/7] Implement new choose_multiple functions --- examples/monty-hall.rs | 4 +- src/seq.rs | 218 +++++++++++++++++++++++++++-------------- 2 files changed, 144 insertions(+), 78 deletions(-) diff --git a/examples/monty-hall.rs b/examples/monty-hall.rs index 3750f8fabe2..faf795cb873 100644 --- a/examples/monty-hall.rs +++ b/examples/monty-hall.rs @@ -63,8 +63,8 @@ fn simulate(random_door: &Uniform, rng: &mut R) // Returns the door the game host opens given our choice and knowledge of // where the car is. The game host will never open the door with the car. fn game_host_open(car: u32, choice: u32, rng: &mut R) -> u32 { - let choices = free_doors(&[car, choice]); - rand::seq::sample_slice(rng, &choices, 1)[0] + use rand::seq::SliceRandom; + *free_doors(&[car, choice]).choose(rng).unwrap() } // Returns the door we switch to, given our current choice and diff --git a/src/seq.rs b/src/seq.rs index 39ff5a68a45..58ee67c5bf3 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -12,15 +12,17 @@ //! //! TODO: module doc +#[cfg(feature="alloc")] use core::ops::Index; + +#[cfg(feature="std")] use std::vec; +#[cfg(all(feature="alloc", not(feature="std")))] use alloc::{vec, Vec}; // BTreeMap is not as fast in tests, but better than nothing. #[cfg(feature="std")] use std::collections::HashMap; #[cfg(all(feature="alloc", not(feature="std")))] use alloc::btree_map::BTreeMap; -#[cfg(all(feature="alloc", not(feature="std")))] use alloc::Vec; use super::Rng; - /// Extension trait on slices, providing random mutation and sampling methods. /// /// An implementation is provided for slices. This may also be implementable for @@ -68,8 +70,25 @@ pub trait SliceRandom { /// result, then apply to the slice. /// /// Complexity is expected to be the same as `sample_indices`. + /// + /// # Example + /// ``` + /// use rand::seq::SliceRandom; + /// + /// let mut rng = &mut rand::thread_rng(); + /// let sample = "Hello, audience!".as_bytes(); + /// + /// // collect the results into a vector: + /// let v: Vec = sample.choose_multiple(&mut rng, 3).cloned().collect(); + /// + /// // store in a buffer: + /// let mut buf = [0u8; 5]; + /// for (b, slot) in sample.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) { + /// *slot = *b; + /// } + /// ``` #[cfg(feature = "alloc")] - fn choose_multiple(&self, rng: &mut R, amount: usize) -> Vec<&Self::Item> + fn choose_multiple(&self, rng: &mut R, amount: usize) -> SliceChooseIter where R: Rng + ?Sized; /// Shuffle a mutable slice in place. @@ -128,33 +147,78 @@ pub trait IteratorRandom: Iterator + Sized { /// Collects `amount` values at random from the iterator into a supplied /// buffer. - /// + /// + /// Although the elements are selected randomly, the order of elements in + /// the buffer is neither stable nor fully random. If random ordering is + /// desired, shuffle the result. + /// /// Returns the number of elements added to the buffer. This equals `amount` /// unless the iterator contains insufficient elements, in which case this /// equals the number of elements available. /// - /// Complexity is TODO - fn choose_multiple_fill(self, rng: &mut R, amount: usize) -> usize - where R: Rng + ?Sized + /// Complexity is `O(n)` where `n` is the length of the iterator. + fn choose_multiple_fill(mut self, rng: &mut R, buf: &mut [Self::Item]) + -> usize where R: Rng + ?Sized { - unimplemented!() + let amount = buf.len(); + let mut len = 0; + while len < amount { + if let Some(elem) = self.next() { + buf[len] = elem; + len += 1; + } else { + // Iterator exhausted; stop early + return len; + } + } + + // Continue, since the iterator was not exhausted + for (i, elem) in self.enumerate() { + let k = rng.gen_range(0, i + 1 + amount); + if let Some(slot) = buf.get_mut(k) { + *slot = elem; + } + } + len } /// Collects `amount` values at random from the iterator into a vector. /// - /// This is a convenience wrapper around `choose_multiple_fill`. + /// This is equivalent to `choose_multiple_fill` except for the result type. /// + /// Although the elements are selected randomly, the order of elements in + /// the buffer is neither stable nor fully random. If random ordering is + /// desired, shuffle the result. + /// /// The length of the returned vector equals `amount` unless the iterator /// contains insufficient elements, in which case it equals the number of /// elements available. /// - /// Complexity is TODO + /// Complexity is `O(n)` where `n` is the length of the iterator. #[cfg(feature = "alloc")] - fn choose_multiple(self, rng: &mut R, amount: usize) -> Vec + fn choose_multiple(mut self, rng: &mut R, amount: usize) -> Vec where R: Rng + ?Sized { - // Note: I think this must use unsafe to create an uninitialised buffer, then restrict length - unimplemented!() + let mut reservoir = Vec::with_capacity(amount); + reservoir.extend(self.by_ref().take(amount)); + + // Continue unless the iterator was exhausted + // + // note: this prevents iterators that "restart" from causing problems. + // If the iterator stops once, then so do we. + if reservoir.len() == amount { + for (i, elem) in self.enumerate() { + let k = rng.gen_range(0, i + 1 + amount); + if let Some(slot) = reservoir.get_mut(k) { + *slot = elem; + } + } + } else { + // Don't hang onto extra memory. There is a corner case where + // `amount` was much less than `self.len()`. + reservoir.shrink_to_fit(); + } + reservoir } } @@ -184,10 +248,15 @@ impl SliceRandom for [T] { } #[cfg(feature = "alloc")] - fn choose_multiple(&self, rng: &mut R, amount: usize) -> Vec<&Self::Item> + fn choose_multiple(&self, rng: &mut R, amount: usize) -> SliceChooseIter where R: Rng + ?Sized { - unimplemented!() + let amount = ::core::cmp::min(amount, self.len()); + SliceChooseIter { + slice: self, + _phantom: Default::default(), + indices: sample_indices(rng, self.len(), amount).into_iter(), + } } fn shuffle(&mut self, rng: &mut R) where R: Rng + ?Sized @@ -205,56 +274,61 @@ impl SliceRandom for [T] { } } -// ——— -// TODO: remove below methods once implemented above -// ——— +impl IteratorRandom for I where I: Iterator + Sized {} + + +/// Iterator over multiple choices, as returned by [`SliceRandom::choose_multiple]( +/// trait.SliceRandom.html#method.choose_multiple). +#[cfg(feature = "alloc")] +#[derive(Debug)] +pub struct SliceChooseIter<'a, S: ?Sized + 'a, T: 'a> { + slice: &'a S, + _phantom: ::core::marker::PhantomData, + indices: vec::IntoIter, +} + +#[cfg(feature = "alloc")] +impl<'a, S: Index + ?Sized + 'a, T: 'a> Iterator for SliceChooseIter<'a, S, T> { + type Item = &'a T; + + fn next(&mut self) -> Option { + // TODO: investigate using SliceIndex::get_unchecked when stable + self.indices.next().map(|i| &(*self.slice)[i]) + } + + fn size_hint(&self) -> (usize, Option) { + (self.indices.len(), Some(self.indices.len())) + } +} + +#[cfg(feature = "alloc")] +impl<'a, S: Index + ?Sized + 'a, T: 'a> ExactSizeIterator + for SliceChooseIter<'a, S, T> +{ + fn len(&self) -> usize { + self.indices.len() + } +} + /// Randomly sample `amount` elements from a finite iterator. /// -/// The following can be returned: -/// -/// - `Ok`: `Vec` of `amount` non-repeating randomly sampled elements. The order is not random. -/// - `Err`: `Vec` of all the elements from `iterable` in sequential order. This happens when the -/// length of `iterable` was less than `amount`. This is considered an error since exactly -/// `amount` elements is typically expected. -/// -/// This implementation uses `O(len(iterable))` time and `O(amount)` memory. -/// -/// # Example -/// -/// ``` -/// use rand::{thread_rng, seq}; -/// -/// let mut rng = thread_rng(); -/// let sample = seq::sample_iter(&mut rng, 1..100, 5).unwrap(); -/// println!("{:?}", sample); -/// ``` +/// Deprecated: use [`IteratorRandom::choose_multiple`] instead. +/// +/// [`IteratorRandom::choose_multiple`]: trait.IteratorRandom.html#method.choose_multiple #[cfg(feature = "alloc")] +#[deprecated(since="0.6.0", note="use IteratorRandom::choose_multiple instead")] pub fn sample_iter(rng: &mut R, iterable: I, amount: usize) -> Result, Vec> where I: IntoIterator, R: Rng + ?Sized, { - let mut iter = iterable.into_iter(); - let mut reservoir = Vec::with_capacity(amount); - reservoir.extend(iter.by_ref().take(amount)); - - // Continue unless the iterator was exhausted - // - // note: this prevents iterators that "restart" from causing problems. - // If the iterator stops once, then so do we. - if reservoir.len() == amount { - for (i, elem) in iter.enumerate() { - let k = rng.gen_range(0, i + 1 + amount); - if let Some(spot) = reservoir.get_mut(k) { - *spot = elem; - } - } - Ok(reservoir) + use seq::IteratorRandom; + let iter = iterable.into_iter(); + let result = iter.choose_multiple(rng, amount); + if result.len() == amount { + Ok(result) } else { - // Don't hang onto extra memory. There is a corner case where - // `amount` was much less than `len(iterable)`. - reservoir.shrink_to_fit(); - Err(reservoir) + Err(result) } } @@ -266,16 +340,11 @@ pub fn sample_iter(rng: &mut R, iterable: I, amount: usize) -> Result slice.len()` /// -/// # Example -/// -/// ``` -/// use rand::{thread_rng, seq}; -/// -/// let mut rng = thread_rng(); -/// let values = vec![5, 6, 1, 3, 4, 6, 7]; -/// println!("{:?}", seq::sample_slice(&mut rng, &values, 3)); -/// ``` +/// Deprecated: use [`SliceRandom::choose_multiple`] instead. +/// +/// [`SliceRandom::choose_multiple`]: trait.SliceRandom.html#method.choose_multiple #[cfg(feature = "alloc")] +#[deprecated(since="0.6.0", note="use SliceRandom::choose_multiple instead")] pub fn sample_slice(rng: &mut R, slice: &[T], amount: usize) -> Vec where R: Rng + ?Sized, T: Clone @@ -295,16 +364,11 @@ pub fn sample_slice(rng: &mut R, slice: &[T], amount: usize) -> Vec /// /// Panics if `amount > slice.len()` /// -/// # Example -/// -/// ``` -/// use rand::{thread_rng, seq}; -/// -/// let mut rng = thread_rng(); -/// let values = vec![5, 6, 1, 3, 4, 6, 7]; -/// println!("{:?}", seq::sample_slice_ref(&mut rng, &values, 3)); -/// ``` +/// Deprecated: use [`SliceRandom::choose_multiple`] instead. +/// +/// [`SliceRandom::choose_multiple`]: trait.SliceRandom.html#method.choose_multiple #[cfg(feature = "alloc")] +#[deprecated(since="0.6.0", note="use SliceRandom::choose_multiple instead")] pub fn sample_slice_ref<'a, R, T>(rng: &mut R, slice: &'a [T], amount: usize) -> Vec<&'a T> where R: Rng + ?Sized { @@ -464,8 +528,8 @@ mod test { let mut r = ::test::rng(401); let vals = (min_val..max_val).collect::>(); - let small_sample = sample_iter(&mut r, vals.iter(), 5).unwrap(); - let large_sample = sample_iter(&mut r, vals.iter(), vals.len() + 5).unwrap_err(); + let small_sample = vals.iter().choose_multiple(&mut r, 5); + let large_sample = vals.iter().choose_multiple(&mut r, vals.len() + 5); assert_eq!(small_sample.len(), 5); assert_eq!(large_sample.len(), vals.len()); @@ -479,6 +543,7 @@ mod test { #[test] #[cfg(feature = "alloc")] + #[allow(deprecated)] fn test_sample_slice_boundaries() { let empty: &[u8] = &[]; @@ -524,6 +589,7 @@ mod test { #[test] #[cfg(feature = "alloc")] + #[allow(deprecated)] fn test_sample_slice() { let xor_rng = XorShiftRng::from_seed; From 0de02f18e0b7a7ae1983c66c04c6acd159fa92e9 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Sun, 27 May 2018 15:03:03 +0100 Subject: [PATCH 4/7] Implement IteratorRandom::choose --- src/seq.rs | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index 58ee67c5bf3..3cf1d7b7d46 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -139,10 +139,24 @@ pub trait IteratorRandom: Iterator + Sized { /// Complexity is `O(n)`, where `n` is the length of the iterator. /// This likely consumes multiple random numbers, but the exact number /// is unspecified. - fn choose(self, rng: &mut R) -> Option + fn choose(mut self, rng: &mut R) -> Option where R: Rng + ?Sized { - unimplemented!() + if let Some(elem) = self.next() { + let mut result = elem; + + // Continue until the iterator is exhausted + for (i, elem) in self.enumerate() { + let denom = (i + 2) as f64; // accurate to 2^53 elements + if rng.gen_bool(1.0 / denom) { + result = elem; + } + } + + Some(result) + } else { + None + } } /// Collects `amount` values at random from the iterator into a supplied @@ -494,7 +508,14 @@ mod test { #[test] fn test_choose() { let mut r = ::test::rng(107); - assert_eq!([1, 1, 1].choose(&mut r).map(|&x|x), Some(1)); + assert_eq!([1, 1, 1].choose(&mut r), Some(&1)); + + let mut v = [2]; + v.choose_mut(&mut r).map(|x| *x = 5); + assert_eq!(v[0], 5); + + let v = [3, 3, 3, 3]; + assert_eq!(v.iter().choose(&mut r), Some(&3)); let v: &[isize] = &[]; assert_eq!(v.choose(&mut r), None); From b20944da9a0dbb00fddc47a9651f24bac2987b67 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Sun, 27 May 2018 15:24:23 +0100 Subject: [PATCH 5/7] Implement SliceRandom::partial_shuffle --- src/seq.rs | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/seq.rs b/src/seq.rs index 3cf1d7b7d46..50c50861e31 100644 --- a/src/seq.rs +++ b/src/seq.rs @@ -284,7 +284,20 @@ impl SliceRandom for [T] { fn partial_shuffle(&mut self, rng: &mut R, amount: usize) -> (&mut [Self::Item], &mut [Self::Item]) where R: Rng + ?Sized { - unimplemented!() + // This applies Durstenfeld's algorithm for the + // [Fisher–Yates shuffle](https://en.wikipedia.org/wiki/Fisher%E2%80%93Yates_shuffle#The_modern_algorithm) + // for an unbiased permutation, but exits early after choosing `amount` + // elements. + + let len = self.len(); + let end = if amount >= len { 0 } else { len - amount }; + + for i in (end..len).rev() { + // invariant: elements with index > i have been locked in place. + self.swap(i, rng.gen_range(0, i + 1)); + } + let r = self.split_at_mut(end); + (r.1, r.0) } } @@ -499,8 +512,7 @@ fn sample_indices_cache( #[cfg(test)] mod test { use super::*; - use Rng; - #[cfg(feature = "alloc")] use SeedableRng; + #[cfg(feature = "alloc")] use {Rng, SeedableRng}; #[cfg(feature = "alloc")] use prng::XorShiftRng; #[cfg(all(feature="alloc", not(feature="std")))] use alloc::Vec; @@ -540,6 +552,22 @@ mod test { let b: &[_] = &[1, 1, 1]; assert_eq!(x, b); } + + #[test] + fn test_partial_shuffle() { + let mut r = ::test::rng(118); + + let mut empty: [u32; 0] = []; + let res = empty.partial_shuffle(&mut r, 10); + assert_eq!((res.0.len(), res.1.len()), (0, 0)); + + let mut v = [1, 2, 3, 4, 5]; + let res = v.partial_shuffle(&mut r, 2); + assert_eq!((res.0.len(), res.1.len()), (2, 3)); + assert!(res.0[0] != res.0[1]); + // First elements are only modified if selected, so at least one isn't modified: + assert!(res.1[0] == 1 || res.1[1] == 2 || res.1[2] == 3); + } #[test] #[cfg(feature = "alloc")] From c083d580f6b7c91b1501ad11cf3b1101bd7ecf88 Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Sat, 9 Jun 2018 10:01:09 +0100 Subject: [PATCH 6/7] Move seq benches to new file --- benches/misc.rs | 54 ------------------------------------------ benches/seq.rs | 62 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 54 deletions(-) create mode 100644 benches/seq.rs diff --git a/benches/misc.rs b/benches/misc.rs index 4e9cbda37ae..93a5c506fdc 100644 --- a/benches/misc.rs +++ b/benches/misc.rs @@ -8,7 +8,6 @@ const RAND_BENCH_N: u64 = 1000; use test::Bencher; use rand::prelude::*; -use rand::seq::*; #[bench] fn misc_gen_bool_const(b: &mut Bencher) { @@ -108,59 +107,6 @@ sample_binomial!(misc_binomial_100, 100, 0.99); sample_binomial!(misc_binomial_1000, 1000, 0.01); sample_binomial!(misc_binomial_1e12, 1000_000_000_000, 0.2); -#[bench] -fn misc_shuffle_100(b: &mut Bencher) { - let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); - let x : &mut [usize] = &mut [1; 100]; - b.iter(|| { - rng.shuffle(x); - x[0] - }) -} - -#[bench] -fn misc_sample_iter_10_of_100(b: &mut Bencher) { - let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); - let x : &[usize] = &[1; 100]; - b.iter(|| { - sample_iter(&mut rng, x, 10).unwrap_or_else(|e| e) - }) -} - -#[bench] -fn misc_sample_slice_10_of_100(b: &mut Bencher) { - let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); - let x : &[usize] = &[1; 100]; - b.iter(|| { - sample_slice(&mut rng, x, 10) - }) -} - -#[bench] -fn misc_sample_slice_ref_10_of_100(b: &mut Bencher) { - let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); - let x : &[usize] = &[1; 100]; - b.iter(|| { - sample_slice_ref(&mut rng, x, 10) - }) -} - -macro_rules! sample_indices { - ($name:ident, $amount:expr, $length:expr) => { - #[bench] - fn $name(b: &mut Bencher) { - let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); - b.iter(|| { - sample_indices(&mut rng, $length, $amount) - }) - } - } -} - -sample_indices!(misc_sample_indices_10_of_1k, 10, 1000); -sample_indices!(misc_sample_indices_50_of_1k, 50, 1000); -sample_indices!(misc_sample_indices_100_of_1k, 100, 1000); - #[bench] fn gen_1k_iter_repeat(b: &mut Bencher) { use std::iter; diff --git a/benches/seq.rs b/benches/seq.rs new file mode 100644 index 00000000000..ebcfd68c30f --- /dev/null +++ b/benches/seq.rs @@ -0,0 +1,62 @@ +#![feature(test)] + +extern crate test; +extern crate rand; + +use test::Bencher; + +use rand::prelude::*; +use rand::seq::*; + +#[bench] +fn misc_shuffle_100(b: &mut Bencher) { + let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); + let x : &mut [usize] = &mut [1; 100]; + b.iter(|| { + rng.shuffle(x); + x[0] + }) +} + +#[bench] +fn misc_sample_iter_10_of_100(b: &mut Bencher) { + let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); + let x : &[usize] = &[1; 100]; + b.iter(|| { + sample_iter(&mut rng, x, 10).unwrap_or_else(|e| e) + }) +} + +#[bench] +fn misc_sample_slice_10_of_100(b: &mut Bencher) { + let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); + let x : &[usize] = &[1; 100]; + b.iter(|| { + sample_slice(&mut rng, x, 10) + }) +} + +#[bench] +fn misc_sample_slice_ref_10_of_100(b: &mut Bencher) { + let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); + let x : &[usize] = &[1; 100]; + b.iter(|| { + sample_slice_ref(&mut rng, x, 10) + }) +} + +macro_rules! sample_indices { + ($name:ident, $amount:expr, $length:expr) => { + #[bench] + fn $name(b: &mut Bencher) { + let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); + b.iter(|| { + sample_indices(&mut rng, $length, $amount) + }) + } + } +} + +sample_indices!(misc_sample_indices_10_of_1k, 10, 1000); +sample_indices!(misc_sample_indices_50_of_1k, 50, 1000); +sample_indices!(misc_sample_indices_100_of_1k, 100, 1000); From 1b118c23350ccb0fbbd9a86ec148adf93b0526da Mon Sep 17 00:00:00 2001 From: Diggory Hardy Date: Sat, 9 Jun 2018 10:09:07 +0100 Subject: [PATCH 7/7] Revise seq benches --- benches/seq.rs | 54 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 43 insertions(+), 11 deletions(-) diff --git a/benches/seq.rs b/benches/seq.rs index ebcfd68c30f..260e2334a41 100644 --- a/benches/seq.rs +++ b/benches/seq.rs @@ -9,39 +9,71 @@ use rand::prelude::*; use rand::seq::*; #[bench] -fn misc_shuffle_100(b: &mut Bencher) { +fn seq_shuffle_100(b: &mut Bencher) { let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); let x : &mut [usize] = &mut [1; 100]; b.iter(|| { - rng.shuffle(x); + x.shuffle(&mut rng); x[0] }) } #[bench] -fn misc_sample_iter_10_of_100(b: &mut Bencher) { +fn seq_slice_choose_1_of_1000(b: &mut Bencher) { + let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); + let x : &[usize] = &[1; 1000]; + b.iter(|| { + x.choose(&mut rng) + }) +} + +#[bench] +fn seq_slice_choose_multiple_1_of_1000(b: &mut Bencher) { + let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); + let x : &[usize] = &[1; 1000]; + b.iter(|| { + x.choose_multiple(&mut rng, 1).cloned().next() + }) +} + +#[bench] +fn seq_slice_choose_multiple_10_of_100(b: &mut Bencher) { + let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); + let x : &[usize] = &[1; 100]; + let mut buf = [0; 10]; + b.iter(|| { + for (v, slot) in x.choose_multiple(&mut rng, buf.len()).zip(buf.iter_mut()) { + *slot = *v; + } + buf + }) +} + +#[bench] +fn seq_iter_choose_from_100(b: &mut Bencher) { let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); let x : &[usize] = &[1; 100]; b.iter(|| { - sample_iter(&mut rng, x, 10).unwrap_or_else(|e| e) + x.iter().cloned().choose(&mut rng) }) } #[bench] -fn misc_sample_slice_10_of_100(b: &mut Bencher) { +fn seq_iter_choose_multiple_10_of_100(b: &mut Bencher) { let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); let x : &[usize] = &[1; 100]; b.iter(|| { - sample_slice(&mut rng, x, 10) + x.iter().cloned().choose_multiple(&mut rng, 10) }) } #[bench] -fn misc_sample_slice_ref_10_of_100(b: &mut Bencher) { +fn seq_iter_choose_multiple_fill_10_of_100(b: &mut Bencher) { let mut rng = SmallRng::from_rng(thread_rng()).unwrap(); let x : &[usize] = &[1; 100]; + let mut buf = [0; 10]; b.iter(|| { - sample_slice_ref(&mut rng, x, 10) + x.iter().cloned().choose_multiple_fill(&mut rng, &mut buf) }) } @@ -57,6 +89,6 @@ macro_rules! sample_indices { } } -sample_indices!(misc_sample_indices_10_of_1k, 10, 1000); -sample_indices!(misc_sample_indices_50_of_1k, 50, 1000); -sample_indices!(misc_sample_indices_100_of_1k, 100, 1000); +sample_indices!(seq_sample_indices_10_of_1k, 10, 1000); +sample_indices!(seq_sample_indices_50_of_1k, 50, 1000); +sample_indices!(seq_sample_indices_100_of_1k, 100, 1000);