diff --git a/rand_core/src/impls.rs b/rand_core/src/impls.rs index cffca3e882..0d2c98a2d1 100644 --- a/rand_core/src/impls.rs +++ b/rand_core/src/impls.rs @@ -21,6 +21,7 @@ use core::intrinsics::transmute; use core::slice; +use core::cmp::min; use Rng; /// Implement `next_u64` via `next_u32`, little-endian order. @@ -93,6 +94,82 @@ macro_rules! impl_uint_from_fill { }); } +macro_rules! fill_via_chunks { + ($src:expr, $dest:expr, $N:expr) => ({ + let chunk_size_u8 = min($src.len() * $N, $dest.len()); + let chunk_size = (chunk_size_u8 + $N - 1) / $N; + + // Convert to little-endian: + for ref mut x in $src[0..chunk_size].iter_mut() { + **x = (*x).to_le(); + } + + let bytes = unsafe { slice::from_raw_parts($src.as_ptr() as *const u8, + $src.len() * $N) }; + + let dest_chunk = &mut $dest[0..chunk_size_u8]; + dest_chunk.copy_from_slice(&bytes[0..chunk_size_u8]); + + (chunk_size, chunk_size_u8) + }); +} + +/// Implement `fill_bytes` by reading chunks from the output buffer of a block +/// based RNG. +/// +/// The return values are `(consumed_u32, filled_u8)`. +/// +/// `filled_u8` is the number of filled bytes in `dest`, which may be less than +/// the length of `dest`. +/// `consumed_u32` is the number of words consumed from `src`, which is the same +/// as `filled_u8 / 4` rounded up. +/// +/// Note that on big-endian systems values in the output buffer `src` are +/// mutated. `src[0..consumed_u32]` get converted to little-endian before +/// copying. +/// +/// # Example +/// (from `IsaacRng`) +/// +/// ```rust,ignore +/// fn fill_bytes(&mut self, dest: &mut [u8]) { +/// let mut read_len = 0; +/// while read_len < dest.len() { +/// if self.index >= self.rsl.len() { +/// self.isaac(); +/// } +/// +/// let (consumed_u32, filled_u8) = +/// impls::fill_via_u32_chunks(&mut self.rsl[self.index..], +/// &mut dest[read_len..]); +/// +/// self.index += consumed_u32; +/// read_len += filled_u8; +/// } +/// } +/// ``` +pub fn fill_via_u32_chunks(src: &mut [u32], dest: &mut [u8]) -> (usize, usize) { + fill_via_chunks!(src, dest, 4) +} + +/// Implement `fill_bytes` by reading chunks from the output buffer of a block +/// based RNG. +/// +/// The return values are `(consumed_u64, filled_u8)`. +/// `filled_u8` is the number of filled bytes in `dest`, which may be less than +/// the length of `dest`. +/// `consumed_u64` is the number of words consumed from `src`, which is the same +/// as `filled_u8 / 8` rounded up. +/// +/// Note that on big-endian systems values in the output buffer `src` are +/// mutated. `src[0..consumed_u64]` get converted to little-endian before +/// copying. +/// +/// See `fill_via_u32_chunks` for an example. +pub fn fill_via_u64_chunks(src: &mut [u64], dest: &mut [u8]) -> (usize, usize) { + fill_via_chunks!(src, dest, 8) +} + /// Implement `next_u32` via `fill_bytes`, little-endian order. pub fn next_u32_via_fill(rng: &mut R) -> u32 { impl_uint_from_fill!(rng, u32, 4) diff --git a/src/prng/chacha.rs b/src/prng/chacha.rs index a9ae8996b0..49e8653b27 100644 --- a/src/prng/chacha.rs +++ b/src/prng/chacha.rs @@ -10,13 +10,10 @@ //! The ChaCha random number generator. -use core::num::Wrapping as w; use core::fmt; +use rand_core::impls; use {Rng, CryptoRng, SeedFromRng, SeedableRng, Error}; -#[allow(bad_style)] -type w32 = w; - const KEY_WORDS : usize = 8; // 8 words for the 256-bit key const STATE_WORDS : usize = 16; const CHACHA_ROUNDS: u32 = 20; // Cryptographically secure from 8 upwards as of this writing @@ -32,9 +29,9 @@ const CHACHA_ROUNDS: u32 = 20; // Cryptographically secure from 8 upwards as of /// Salsa20*](http://cr.yp.to/chacha.html) #[derive(Clone)] pub struct ChaChaRng { - buffer: [w32; STATE_WORDS], // Internal buffer of output - state: [w32; STATE_WORDS], // Initial state - index: usize, // Index into state + buffer: [u32; STATE_WORDS], // Internal buffer of output + state: [u32; STATE_WORDS], // Initial state + index: usize, // Index into state } // Custom Debug implementation that does not expose the internal state @@ -46,10 +43,10 @@ impl fmt::Debug for ChaChaRng { macro_rules! quarter_round{ ($a: expr, $b: expr, $c: expr, $d: expr) => {{ - $a = $a + $b; $d = $d ^ $a; $d = w($d.0.rotate_left(16)); - $c = $c + $d; $b = $b ^ $c; $b = w($b.0.rotate_left(12)); - $a = $a + $b; $d = $d ^ $a; $d = w($d.0.rotate_left( 8)); - $c = $c + $d; $b = $b ^ $c; $b = w($b.0.rotate_left( 7)); + $a = $a.wrapping_add($b); $d ^= $a; $d = $d.rotate_left(16); + $c = $c.wrapping_add($d); $b ^= $c; $b = $b.rotate_left(12); + $a = $a.wrapping_add($b); $d ^= $a; $d = $d.rotate_left( 8); + $c = $c.wrapping_add($d); $b ^= $c; $b = $b.rotate_left( 7); }} } @@ -69,15 +66,15 @@ macro_rules! double_round{ } #[inline] -fn core(output: &mut [w32; STATE_WORDS], input: &[w32; STATE_WORDS]) { - *output = *input; +fn core(new: &mut [u32; STATE_WORDS], input: &[u32; STATE_WORDS]) { + *new = *input; for _ in 0..CHACHA_ROUNDS / 2 { - double_round!(output); + double_round!(new); } for i in 0..STATE_WORDS { - output[i] = output[i] + input[i]; + new[i] = new[i].wrapping_add(input[i]); } } @@ -104,8 +101,8 @@ impl ChaChaRng { /// - 2419978656 pub fn new_unseeded() -> ChaChaRng { let mut rng = ChaChaRng { - buffer: [w(0); STATE_WORDS], - state: [w(0); STATE_WORDS], + buffer: [0; STATE_WORDS], + state: [0; STATE_WORDS], index: STATE_WORDS }; rng.init(&[0; KEY_WORDS]); @@ -133,10 +130,10 @@ impl ChaChaRng { /// println!("{:?}", ra.next_u32()); /// ``` pub fn set_counter(&mut self, counter_low: u64, counter_high: u64) { - self.state[12] = w((counter_low >> 0) as u32); - self.state[13] = w((counter_low >> 32) as u32); - self.state[14] = w((counter_high >> 0) as u32); - self.state[15] = w((counter_high >> 32) as u32); + self.state[12] = (counter_low >> 0) as u32; + self.state[13] = (counter_low >> 32) as u32; + self.state[14] = (counter_high >> 0) as u32; + self.state[15] = (counter_high >> 32) as u32; self.index = STATE_WORDS; // force recomputation } @@ -159,19 +156,19 @@ impl ChaChaRng { /// [1]: Daniel J. Bernstein. [*Extending the Salsa20 /// nonce.*](http://cr.yp.to/papers.html#xsalsa) fn init(&mut self, key: &[u32; KEY_WORDS]) { - self.state[0] = w(0x61707865); - self.state[1] = w(0x3320646E); - self.state[2] = w(0x79622D32); - self.state[3] = w(0x6B206574); + self.state[0] = 0x61707865; + self.state[1] = 0x3320646E; + self.state[2] = 0x79622D32; + self.state[3] = 0x6B206574; for i in 0..KEY_WORDS { - self.state[4+i] = w(key[i]); + self.state[4+i] = key[i]; } - self.state[12] = w(0); - self.state[13] = w(0); - self.state[14] = w(0); - self.state[15] = w(0); + self.state[12] = 0; + self.state[13] = 0; + self.state[14] = 0; + self.state[15] = 0; self.index = STATE_WORDS; } @@ -181,69 +178,54 @@ impl ChaChaRng { core(&mut self.buffer, &self.state); self.index = 0; // update 128-bit counter - self.state[12] = self.state[12] + w(1); - if self.state[12] != w(0) { return }; - self.state[13] = self.state[13] + w(1); - if self.state[13] != w(0) { return }; - self.state[14] = self.state[14] + w(1); - if self.state[14] != w(0) { return }; - self.state[15] = self.state[15] + w(1); + self.state[12] = self.state[12].wrapping_add(1); + if self.state[12] != 0 { return }; + self.state[13] = self.state[13].wrapping_add(1); + if self.state[13] != 0 { return }; + self.state[14] = self.state[14].wrapping_add(1); + if self.state[14] != 0 { return }; + self.state[15] = self.state[15].wrapping_add(1); } } impl Rng for ChaChaRng { #[inline] fn next_u32(&mut self) -> u32 { - if self.index == STATE_WORDS { + // Using a local variable for `index`, and checking the size avoids a + // bounds check later on. + let mut index = self.index as usize; + if index >= STATE_WORDS { self.update(); + index = 0; } - let value = self.buffer[self.index % STATE_WORDS]; + let value = self.buffer[index]; self.index += 1; - value.0 + value } - + fn next_u64(&mut self) -> u64 { - ::rand_core::impls::next_u64_via_u32(self) + impls::next_u64_via_u32(self) } + #[cfg(feature = "i128_support")] fn next_u128(&mut self) -> u128 { - ::rand_core::impls::next_u128_via_u64(self) + impls::next_u128_via_u64(self) } - - // Custom implementation allowing larger reads from buffer is about 8% - // faster than default implementation in my tests + fn fill_bytes(&mut self, dest: &mut [u8]) { - use core::cmp::min; - use core::intrinsics::{transmute, copy_nonoverlapping}; - - let mut left = dest; - while left.len() >= 4 { - if self.index == STATE_WORDS { + let mut read_len = 0; + while read_len < dest.len() { + if self.index >= self.buffer.len() { self.update(); } - - let words = min(left.len() / 4, STATE_WORDS - self.index); - let (l, r) = {left}.split_at_mut(4 * words); - left = r; - - // convert to LE: - for ref mut x in self.buffer[self.index..self.index+words].iter_mut() { - **x = w((*x).0.to_le()); - } - - unsafe{ copy_nonoverlapping( - &self.buffer[self.index].0 as *const u32 as *const u8, - l.as_mut_ptr(), - 4 * words) }; - self.index += words; - } - let n = left.len(); - if n > 0 { - let chunk: [u8; 4] = unsafe { - transmute(self.next_u32().to_le()) - }; - left.copy_from_slice(&chunk[..n]); + + let (consumed_u32, filled_u8) = + impls::fill_via_u32_chunks(&mut self.buffer[self.index..], + &mut dest[read_len..]); + + self.index += consumed_u32; + read_len += filled_u8; } } @@ -271,8 +253,8 @@ impl<'a> SeedableRng<&'a [u32]> for ChaChaRng { /// words are used, the remaining are set to zero. fn from_seed(seed: &'a [u32]) -> ChaChaRng { let mut rng = ChaChaRng { - buffer: [w(0); STATE_WORDS], - state: [w(0); STATE_WORDS], + buffer: [0; STATE_WORDS], + state: [0; STATE_WORDS], index: STATE_WORDS }; rng.init(&[0u32; KEY_WORDS]); @@ -280,7 +262,7 @@ impl<'a> SeedableRng<&'a [u32]> for ChaChaRng { { let key = &mut rng.state[4 .. 4+KEY_WORDS]; for (k, s) in key.iter_mut().zip(seed.iter()) { - *k = w(*s); + *k = *s; } } rng diff --git a/src/prng/isaac.rs b/src/prng/isaac.rs index 82944144cb..47dc8247f7 100644 --- a/src/prng/isaac.rs +++ b/src/prng/isaac.rs @@ -15,6 +15,8 @@ use core::iter::repeat; use core::num::Wrapping as w; use core::fmt; +use rand_core::impls; + use {Rng, SeedFromRng, SeedableRng, Error}; #[allow(non_camel_case_types)] @@ -87,12 +89,12 @@ const RAND_SIZE: usize = 1 << RAND_SIZE_LEN; /// [3]: Jean-Philippe Aumasson, [*On the pseudo-random generator ISAAC*] /// (http://eprint.iacr.org/2006/438) pub struct IsaacRng { - rsl: [w32; RAND_SIZE], + rsl: [u32; RAND_SIZE], mem: [w32; RAND_SIZE], a: w32, b: w32, c: w32, - cnt: u32, + index: u32, } // Cannot be derived because [u32; 256] does not implement Clone @@ -105,7 +107,7 @@ impl Clone for IsaacRng { a: self.a, b: self.b, c: self.c, - cnt: self.cnt, + index: self.index, } } } @@ -149,6 +151,9 @@ impl IsaacRng { /// - We maintain one index `i` and add `m` or `m2` as base (m2 for the /// `s[i+128 mod 256]`), relying on the optimizer to turn it into pointer /// arithmetic. + /// - We fill `rsl` backwards. The reference implementation reads values + /// from `rsl` in reverse. We read them in the normal direction, to make + /// `fill_bytes` a memcopy. To maintain compatibility we fill in reverse. fn isaac(&mut self) { self.c += w(1); // abbreviations @@ -156,13 +161,13 @@ impl IsaacRng { let mut b = self.b + self.c; const MIDPOINT: usize = RAND_SIZE / 2; - #[inline(always)] + #[inline] fn ind(mem:&[w32; RAND_SIZE], v: w32, amount: usize) -> w32 { let index = (v >> amount).0 as usize % RAND_SIZE; mem[index] } - #[inline(always)] + #[inline] fn rngstep(ctx: &mut IsaacRng, mix: w32, a: &mut w32, @@ -175,7 +180,7 @@ impl IsaacRng { let y = *a + *b + ind(&ctx.mem, x, 2); ctx.mem[base + m] = y; *b = x + ind(&ctx.mem, y, 2 + RAND_SIZE_LEN); - ctx.rsl[base + m] = *b; + ctx.rsl[RAND_SIZE - 1 - base - m] = (*b).0; } let mut m = 0; @@ -198,44 +203,50 @@ impl IsaacRng { self.a = a; self.b = b; - self.cnt = RAND_SIZE as u32; + self.index = 0; } } impl Rng for IsaacRng { #[inline] fn next_u32(&mut self) -> u32 { - if self.cnt == 0 { - // make some more numbers + // Using a local variable for `index`, and checking the size avoids a + // bounds check later on. + let mut index = self.index as usize; + if index >= RAND_SIZE { self.isaac(); + index = 0; } - self.cnt -= 1; - - // self.cnt is at most RAND_SIZE, but that is before the - // subtraction above. We want to index without bounds - // checking, but this could lead to incorrect code if someone - // misrefactors, so we check, sometimes. - // - // (Changes here should be reflected in Isaac64Rng.next_u64.) - debug_assert!((self.cnt as usize) < RAND_SIZE); - - // (the % is cheaply telling the optimiser that we're always - // in bounds, without unsafe. NB. this is a power of two, so - // it optimises to a bitwise mask). - self.rsl[self.cnt as usize % RAND_SIZE].0 + + let value = self.rsl[index]; + self.index += 1; + value } + #[inline] fn next_u64(&mut self) -> u64 { - ::rand_core::impls::next_u64_via_u32(self) + impls::next_u64_via_u32(self) } #[cfg(feature = "i128_support")] fn next_u128(&mut self) -> u128 { - ::rand_core::impls::next_u128_via_u64(self) + impls::next_u128_via_u64(self) } fn fill_bytes(&mut self, dest: &mut [u8]) { - ::rand_core::impls::fill_bytes_via_u32(self, dest); + let mut read_len = 0; + while read_len < dest.len() { + if self.index as usize >= RAND_SIZE { + self.isaac(); + } + + let (consumed_u32, filled_u8) = + impls::fill_via_u32_chunks(&mut self.rsl[(self.index as usize)..], + &mut dest[read_len..]); + + self.index += consumed_u32 as u32; + read_len += filled_u8; + } } fn try_fill(&mut self, dest: &mut [u8]) -> Result<(), Error> { @@ -300,12 +311,12 @@ fn init(mut mem: [w32; RAND_SIZE], rounds: u32) -> IsaacRng { } let mut rng = IsaacRng { - rsl: [w(0); RAND_SIZE], + rsl: [0; RAND_SIZE], mem: mem, a: w(0), b: w(0), c: w(0), - cnt: 0, + index: 0, }; // Prepare the first set of results diff --git a/src/prng/isaac64.rs b/src/prng/isaac64.rs index 2c84927c2d..3c34b7fde5 100644 --- a/src/prng/isaac64.rs +++ b/src/prng/isaac64.rs @@ -15,6 +15,8 @@ use core::iter::repeat; use core::num::Wrapping as w; use core::fmt; +use rand_core::impls; + use {Rng, SeedFromRng, SeedableRng, Error}; #[allow(non_camel_case_types)] @@ -71,12 +73,13 @@ const RAND_SIZE: usize = 1 << RAND_SIZE_LEN; /// [1]: Bob Jenkins, [*ISAAC and RC4*] /// (http://burtleburtle.net/bob/rand/isaac.html) pub struct Isaac64Rng { - rsl: [w64; RAND_SIZE], + rsl: [u64; RAND_SIZE], mem: [w64; RAND_SIZE], a: w64, b: w64, c: w64, - cnt: u32, + index: u32, + half_used: bool, // true if only half of the previous result is used } // Cannot be derived because [u64; 256] does not implement Clone @@ -89,7 +92,8 @@ impl Clone for Isaac64Rng { a: self.a, b: self.b, c: self.c, - cnt: self.cnt, + index: self.index, + half_used: self.half_used, } } } @@ -132,6 +136,9 @@ impl Isaac64Rng { /// - We maintain one index `i` and add `m` or `m2` as base (m2 for the /// `s[i+128 mod 256]`), relying on the optimizer to turn it into pointer /// arithmetic. + /// - We fill `rsl` backwards. The reference implementation reads values + /// from `rsl` in reverse. We read them in the normal direction, to make + /// `fill_bytes` a memcopy. To maintain compatibility we fill in reverse. fn isaac64(&mut self) { self.c += w(1); // abbreviations @@ -139,13 +146,13 @@ impl Isaac64Rng { let mut b = self.b + self.c; const MIDPOINT: usize = RAND_SIZE / 2; - #[inline(always)] + #[inline] fn ind(mem:&[w64; RAND_SIZE], v: w64, amount: usize) -> w64 { let index = (v >> amount).0 as usize % RAND_SIZE; mem[index] } - #[inline(always)] + #[inline] fn rngstep(ctx: &mut Isaac64Rng, mix: w64, a: &mut w64, @@ -158,7 +165,7 @@ impl Isaac64Rng { let y = *a + *b + ind(&ctx.mem, x, 3); ctx.mem[base + m] = y; *b = x + ind(&ctx.mem, y, 3 + RAND_SIZE_LEN); - ctx.rsl[base + m] = *b; + ctx.rsl[RAND_SIZE - 1 - base - m] = (*b).0; } let mut m = 0; @@ -181,45 +188,69 @@ impl Isaac64Rng { self.a = a; self.b = b; - self.cnt = RAND_SIZE as u32; + self.index = 0; + self.half_used = false; } } impl Rng for Isaac64Rng { #[inline] fn next_u32(&mut self) -> u32 { - self.next_u64() as u32 + // Using a local variable for `index`, and checking the size avoids a + // bounds check later on. + let mut index = self.index as usize * 2 - self.half_used as usize; + if index >= RAND_SIZE * 2 { + self.isaac64(); + index = 0; + } + + self.half_used = !self.half_used; + self.index += self.half_used as u32; + + // Index as if this is a u32 slice. + let rsl = unsafe { &*(&mut self.rsl as *mut [u64; RAND_SIZE] + as *mut [u32; RAND_SIZE * 2]) }; + + if cfg!(target_endian = "little") { + rsl[index] + } else { + rsl[index ^ 1] + } } #[inline] fn next_u64(&mut self) -> u64 { - if self.cnt == 0 { - // make some more numbers + let mut index = self.index as usize; + if index >= RAND_SIZE { self.isaac64(); + index = 0; } - self.cnt -= 1; - - // self.cnt is at most RAND_SIZE, but that is before the - // subtraction above. We want to index without bounds - // checking, but this could lead to incorrect code if someone - // misrefactors, so we check, sometimes. - // - // (Changes here should be reflected in IsaacRng.next_u32.) - debug_assert!((self.cnt as usize) < RAND_SIZE); - - // (the % is cheaply telling the optimiser that we're always - // in bounds, without unsafe. NB. this is a power of two, so - // it optimises to a bitwise mask). - self.rsl[self.cnt as usize % RAND_SIZE].0 + + let value = self.rsl[index]; + self.index += 1; + self.half_used = false; + value } #[cfg(feature = "i128_support")] fn next_u128(&mut self) -> u128 { - ::rand_core::impls::next_u128_via_u64(self) + impls::next_u128_via_u64(self) } fn fill_bytes(&mut self, dest: &mut [u8]) { - ::rand_core::impls::fill_bytes_via_u64(self, dest); + let mut read_len = 0; + while read_len < dest.len() { + if self.index as usize >= RAND_SIZE { + self.isaac64(); + } + + let (consumed_u64, filled_u8) = + impls::fill_via_u64_chunks(&mut self.rsl[self.index as usize..], + &mut dest[read_len..]); + + self.index += consumed_u64 as u32; + read_len += filled_u8; + } } fn try_fill(&mut self, dest: &mut [u8]) -> Result<(), Error> { @@ -259,12 +290,13 @@ fn init(mut mem: [w64; RAND_SIZE], rounds: u32) -> Isaac64Rng { } let mut rng = Isaac64Rng { - rsl: [w(0); RAND_SIZE], + rsl: [0; RAND_SIZE], mem: mem, a: w(0), b: w(0), c: w(0), - cnt: 0, + index: 0, + half_used: false, }; // Prepare the first set of results @@ -370,29 +402,39 @@ mod test { 596345674630742204, 9947027391921273664, 11788097613744130851, 10391409374914919106)); } - + #[test] fn test_isaac64_true_values_32() { let seed: &[_] = &[1, 23, 456, 7890, 12345]; let mut rng1 = Isaac64Rng::from_seed(seed); - let v = (0..10).map(|_| rng1.next_u32()).collect::>(); + let v = (0..12).map(|_| rng1.next_u32()).collect::>(); // Subset of above values, as an LE u32 sequence - // TODO: switch to this sequence? -// assert_eq!(v, -// [141028748, 127386717, -// 1058730652, 3347555894, -// 851491469, 4039984500, -// 2692730210, 288449107, -// 646103879, 2782923823]); - // Subset of above values, using only low-half of each u64 assert_eq!(v, - [141028748, 1058730652, - 851491469, 2692730210, - 646103879, 4195642895, - 2836348583, 1312677241, - 999139615, 253604626]); + [141028748, 127386717, + 1058730652, 3347555894, + 851491469, 4039984500, + 2692730210, 288449107, + 646103879, 2782923823, + 4195642895, 3252674613]); + } + + #[test] + fn test_isaac64_true_values_mixed() { + let seed: &[_] = &[1, 23, 456, 7890, 12345]; + let mut rng = Isaac64Rng::from_seed(seed); + // Test alternating between `next_u64` and `next_u32` works as expected. + // Values are the same as `test_isaac64_true_values` and + // `test_isaac64_true_values_32`. + assert_eq!(rng.next_u64(), 547121783600835980); + assert_eq!(rng.next_u32(), 1058730652); + assert_eq!(rng.next_u32(), 3347555894); + assert_eq!(rng.next_u64(), 17351601304698403469); + assert_eq!(rng.next_u32(), 2692730210); + // Skip one u32 + assert_eq!(rng.next_u64(), 11952566807690396487); + assert_eq!(rng.next_u32(), 4195642895); } - + #[test] fn test_isaac64_true_bytes() { let seed: &[_] = &[1, 23, 456, 7890, 12345]; @@ -406,7 +448,7 @@ mod test { 141, 186, 192, 50, 116, 69, 205, 240, 98, 205, 127, 160, 83, 98, 49, 17]); } - + #[test] fn test_isaac_new_uninitialized() { // Compare the results from initializing `IsaacRng` with