|
| 1 | +// Source: https://gist.github.com/orlp/32f5d1b631ab092608b1 |
| 2 | +/* |
| 3 | + * Modified: |
| 4 | + * |
| 5 | + * - keysetup length increased to 10 words |
| 6 | + * - seed function uses unique values for keysetup[8] and [9] |
| 7 | + * - added binary_checkpoint |
| 8 | + * - added include guard |
| 9 | + */ |
| 10 | + |
| 11 | +/* |
| 12 | + Copyright (c) 2015 Orson Peters <orsonpeters@gmail.com> |
| 13 | + |
| 14 | + This software is provided 'as-is', without any express or implied warranty. In no event will the |
| 15 | + authors be held liable for any damages arising from the use of this software. |
| 16 | + |
| 17 | + Permission is granted to anyone to use this software for any purpose, including commercial |
| 18 | + applications, and to alter it and redistribute it freely, subject to the following restrictions: |
| 19 | + |
| 20 | + 1. The origin of this software must not be misrepresented; you must not claim that you wrote the |
| 21 | + original software. If you use this software in a product, an acknowledgment in the product |
| 22 | + documentation would be appreciated but is not required. |
| 23 | + |
| 24 | + 2. Altered source versions must be plainly marked as such, and must not be misrepresented as |
| 25 | + being the original software. |
| 26 | + |
| 27 | + 3. This notice may not be removed or altered from any source distribution. |
| 28 | +*/ |
| 29 | + |
| 30 | +#ifndef OPETERS_CHACHA |
| 31 | +#define OPETERS_CHACHA |
| 32 | + |
| 33 | +#include <cstdint> |
| 34 | +#include <limits> |
| 35 | + |
| 36 | +template<size_t R> |
| 37 | +class ChaCha { |
| 38 | +public: |
| 39 | + typedef uint32_t result_type; |
| 40 | + |
| 41 | + explicit ChaCha(uint64_t seedval, uint64_t stream = 0); |
| 42 | + template<class Sseq> explicit ChaCha(Sseq& seq); |
| 43 | + |
| 44 | + void seed(uint64_t seedval, uint64_t stream = 0); |
| 45 | + template<class Sseq> void seed(Sseq& seq); |
| 46 | + |
| 47 | + uint32_t operator()(); |
| 48 | + void discard(unsigned long long n); |
| 49 | + |
| 50 | + template<size_t R_> friend bool operator==(const ChaCha<R_>& lhs, const ChaCha<R_>& rhs); |
| 51 | + template<size_t R_> friend bool operator!=(const ChaCha<R_>& lhs, const ChaCha<R_>& rhs); |
| 52 | + |
| 53 | + template<typename CharT, typename Traits> |
| 54 | + friend std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os, const ChaCha<R>& rng); |
| 55 | + |
| 56 | + template<typename CharT, typename Traits> |
| 57 | + friend std::basic_istream<CharT, Traits>& operator>>(std::basic_istream<CharT, Traits>& is, ChaCha<R>& rng); |
| 58 | + |
| 59 | + // Incomplete checkpointing: assumes RNG was already seeded |
| 60 | + void binary_checkpoint(ostream& stream) { |
| 61 | + stream.write (reinterpret_cast<char*>(&ctr), sizeof(ctr)); |
| 62 | + } |
| 63 | + void binary_checkpoint(istream& stream) { |
| 64 | + stream.read (reinterpret_cast<char*>(&ctr), sizeof(ctr)); |
| 65 | + if (!stream || stream.gcount() != sizeof(ctr)) |
| 66 | + throw runtime_error ("pcg_random - binary_checkpoint: stream read error"); |
| 67 | + if ((ctr % 16) != 0) generate_block(); |
| 68 | + } |
| 69 | + |
| 70 | + static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); } |
| 71 | + static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); } |
| 72 | + |
| 73 | +private: |
| 74 | + void generate_block(); |
| 75 | + void chacha_core(); |
| 76 | + |
| 77 | + alignas(16) uint32_t block[16]; |
| 78 | + uint32_t keysetup[10]; |
| 79 | + uint64_t ctr; |
| 80 | +}; |
| 81 | + |
| 82 | + |
| 83 | +template<size_t R> |
| 84 | +inline ChaCha<R>::ChaCha(uint64_t seedval, uint64_t stream) { |
| 85 | + seed(seedval, stream); |
| 86 | +} |
| 87 | + |
| 88 | +template<size_t R> |
| 89 | +template<class Sseq> |
| 90 | +inline ChaCha<R>::ChaCha(Sseq& seq) { |
| 91 | + seed(seq); |
| 92 | +} |
| 93 | + |
| 94 | +template<size_t R> |
| 95 | +inline void ChaCha<R>::seed(uint64_t seedval, uint64_t stream) { |
| 96 | + ctr = 0; |
| 97 | + keysetup[0] = seedval & 0xffffffffu; |
| 98 | + keysetup[1] = seedval >> 32; |
| 99 | + keysetup[2] = keysetup[3] = 0xdeadbeef; // Could use 128-bit seed. |
| 100 | + keysetup[4] = stream & 0xffffffffu; |
| 101 | + keysetup[5] = stream >> 32; |
| 102 | + keysetup[6] = keysetup[7] = 0xdeadbeef; // Could use 128-bit stream. |
| 103 | + // Use an IV unique to this application |
| 104 | + keysetup[8] = 0xac4fd2ff; |
| 105 | + keysetup[9] = 0x1b48daba; |
| 106 | +} |
| 107 | + |
| 108 | +template<size_t R> |
| 109 | +template<class Sseq> |
| 110 | +inline void ChaCha<R>::seed(Sseq& seq) { |
| 111 | + ctr = 0; |
| 112 | + seq.generate(keysetup, keysetup + 10); |
| 113 | +} |
| 114 | + |
| 115 | + |
| 116 | +template<size_t R> |
| 117 | +inline uint32_t ChaCha<R>::operator()() { |
| 118 | + int idx = ctr % 16; |
| 119 | + if (idx == 0) generate_block(); |
| 120 | + ++ctr; |
| 121 | + |
| 122 | + return block[idx]; |
| 123 | +} |
| 124 | + |
| 125 | +template<size_t R> |
| 126 | +inline void ChaCha<R>::discard(unsigned long long n) { |
| 127 | + int idx = ctr % 16; |
| 128 | + ctr += n; |
| 129 | + if (idx + n >= 16 && ctr % 16 != 0) generate_block(); |
| 130 | +} |
| 131 | + |
| 132 | +template<size_t R> |
| 133 | +inline void ChaCha<R>::generate_block() { |
| 134 | + uint32_t constants[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574}; |
| 135 | + |
| 136 | + uint32_t input[16]; |
| 137 | + for (int i = 0; i < 4; ++i) input[i] = constants[i]; |
| 138 | + for (int i = 0; i < 8; ++i) input[4 + i] = keysetup[i]; |
| 139 | + input[12] = (ctr / 16) & 0xffffffffu; |
| 140 | + input[13] = (ctr / 16) >> 32; |
| 141 | + input[14] = keysetup[8]; |
| 142 | + input[15] = keysetup[9]; |
| 143 | + |
| 144 | + for (int i = 0; i < 16; ++i) block[i] = input[i]; |
| 145 | + chacha_core(); |
| 146 | + for (int i = 0; i < 16; ++i) block[i] += input[i]; |
| 147 | +} |
| 148 | + |
| 149 | +#ifdef __SSE2__ |
| 150 | +#include "emmintrin.h" |
| 151 | + |
| 152 | +// Get an efficient _mm_roti_epi32 based on enabled features. |
| 153 | +#if !defined(__XOP__) |
| 154 | + #if defined(__SSSE3__) |
| 155 | + #include <tmmintrin.h> |
| 156 | + #define _mm_roti_epi32(r, c) ( \ |
| 157 | + ((c) == 8) ? \ |
| 158 | + _mm_shuffle_epi8((r), _mm_set_epi8(14, 13, 12, 15, \ |
| 159 | + 10, 9, 8, 11, \ |
| 160 | + 6, 5, 4, 7, \ |
| 161 | + 2, 1, 0, 3)) \ |
| 162 | + : ((c) == 16) ? \ |
| 163 | + _mm_shuffle_epi8((r), _mm_set_epi8(13, 12, 15, 14, \ |
| 164 | + 9, 8, 11, 10, \ |
| 165 | + 5, 4, 7, 6, \ |
| 166 | + 1, 0, 3, 2)) \ |
| 167 | + : ((c) == 24) ? \ |
| 168 | + _mm_shuffle_epi8((r), _mm_set_epi8(12, 15, 14, 13, \ |
| 169 | + 8, 11, 10, 9, \ |
| 170 | + 4, 7, 6, 5, \ |
| 171 | + 0, 3, 2, 1)) \ |
| 172 | + : \ |
| 173 | + _mm_xor_si128(_mm_slli_epi32((r), (c)), \ |
| 174 | + _mm_srli_epi32((r), 32-(c))) \ |
| 175 | + ) |
| 176 | + #else |
| 177 | + #define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_slli_epi32((r), (c)), \ |
| 178 | + _mm_srli_epi32((r), 32-(c))) |
| 179 | + #endif |
| 180 | +#else |
| 181 | + #include <xopintrin.h> |
| 182 | +#endif |
| 183 | + |
| 184 | +template<size_t R> |
| 185 | +inline void ChaCha<R>::chacha_core() { |
| 186 | + // ROTVn rotates the elements in the given vector n places to the left. |
| 187 | + #define CHACHA_ROTV1(x) _mm_shuffle_epi32((__m128i) x, 0x39) |
| 188 | + #define CHACHA_ROTV2(x) _mm_shuffle_epi32((__m128i) x, 0x4e) |
| 189 | + #define CHACHA_ROTV3(x) _mm_shuffle_epi32((__m128i) x, 0x93) |
| 190 | + |
| 191 | + __m128i a = _mm_load_si128((__m128i*) (block)); |
| 192 | + __m128i b = _mm_load_si128((__m128i*) (block + 4)); |
| 193 | + __m128i c = _mm_load_si128((__m128i*) (block + 8)); |
| 194 | + __m128i d = _mm_load_si128((__m128i*) (block + 12)); |
| 195 | + |
| 196 | + for (int i = 0; i < R; i += 2) { |
| 197 | + a = _mm_add_epi32(a, b); |
| 198 | + d = _mm_xor_si128(d, a); |
| 199 | + d = _mm_roti_epi32(d, 16); |
| 200 | + c = _mm_add_epi32(c, d); |
| 201 | + b = _mm_xor_si128(b, c); |
| 202 | + b = _mm_roti_epi32(b, 12); |
| 203 | + a = _mm_add_epi32(a, b); |
| 204 | + d = _mm_xor_si128(d, a); |
| 205 | + d = _mm_roti_epi32(d, 8); |
| 206 | + c = _mm_add_epi32(c, d); |
| 207 | + b = _mm_xor_si128(b, c); |
| 208 | + b = _mm_roti_epi32(b, 7); |
| 209 | + |
| 210 | + b = CHACHA_ROTV1(b); |
| 211 | + c = CHACHA_ROTV2(c); |
| 212 | + d = CHACHA_ROTV3(d); |
| 213 | + |
| 214 | + a = _mm_add_epi32(a, b); |
| 215 | + d = _mm_xor_si128(d, a); |
| 216 | + d = _mm_roti_epi32(d, 16); |
| 217 | + c = _mm_add_epi32(c, d); |
| 218 | + b = _mm_xor_si128(b, c); |
| 219 | + b = _mm_roti_epi32(b, 12); |
| 220 | + a = _mm_add_epi32(a, b); |
| 221 | + d = _mm_xor_si128(d, a); |
| 222 | + d = _mm_roti_epi32(d, 8); |
| 223 | + c = _mm_add_epi32(c, d); |
| 224 | + b = _mm_xor_si128(b, c); |
| 225 | + b = _mm_roti_epi32(b, 7); |
| 226 | + |
| 227 | + b = CHACHA_ROTV3(b); |
| 228 | + c = CHACHA_ROTV2(c); |
| 229 | + d = CHACHA_ROTV1(d); |
| 230 | + } |
| 231 | + |
| 232 | + _mm_store_si128((__m128i*) (block), a); |
| 233 | + _mm_store_si128((__m128i*) (block + 4), b); |
| 234 | + _mm_store_si128((__m128i*) (block + 8), c); |
| 235 | + _mm_store_si128((__m128i*) (block + 12), d); |
| 236 | + |
| 237 | + #undef CHACHA_ROTV3 |
| 238 | + #undef CHACHA_ROTV2 |
| 239 | + #undef CHACHA_ROTV1 |
| 240 | +} |
| 241 | +#else |
| 242 | +template<size_t R> |
| 243 | +inline void ChaCha<R>::chacha_core() { |
| 244 | + #define CHACHA_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) |
| 245 | + |
| 246 | + #define CHACHA_QUARTERROUND(x, a, b, c, d) \ |
| 247 | + x[a] = x[a] + x[b]; x[d] ^= x[a]; x[d] = CHACHA_ROTL32(x[d], 16); \ |
| 248 | + x[c] = x[c] + x[d]; x[b] ^= x[c]; x[b] = CHACHA_ROTL32(x[b], 12); \ |
| 249 | + x[a] = x[a] + x[b]; x[d] ^= x[a]; x[d] = CHACHA_ROTL32(x[d], 8); \ |
| 250 | + x[c] = x[c] + x[d]; x[b] ^= x[c]; x[b] = CHACHA_ROTL32(x[b], 7) |
| 251 | + |
| 252 | + for (int i = 0; i < R; i += 2) { |
| 253 | + CHACHA_QUARTERROUND(block, 0, 4, 8, 12); |
| 254 | + CHACHA_QUARTERROUND(block, 1, 5, 9, 13); |
| 255 | + CHACHA_QUARTERROUND(block, 2, 6, 10, 14); |
| 256 | + CHACHA_QUARTERROUND(block, 3, 7, 11, 15); |
| 257 | + CHACHA_QUARTERROUND(block, 0, 5, 10, 15); |
| 258 | + CHACHA_QUARTERROUND(block, 1, 6, 11, 12); |
| 259 | + CHACHA_QUARTERROUND(block, 2, 7, 8, 13); |
| 260 | + CHACHA_QUARTERROUND(block, 3, 4, 9, 14); |
| 261 | + } |
| 262 | + |
| 263 | + #undef CHACHA_QUARTERROUND |
| 264 | + #undef CHACHA_ROTL32 |
| 265 | +} |
| 266 | +#endif |
| 267 | + |
| 268 | + |
| 269 | +// Implement <random> interface. |
| 270 | +template<size_t R> |
| 271 | +inline bool operator==(const ChaCha<R>& lhs, const ChaCha<R>& rhs) { |
| 272 | + for (int i = 0; i < 10; ++i) { |
| 273 | + if (lhs.keysetup[i] != rhs.keysetup[i]) return false; |
| 274 | + } |
| 275 | + |
| 276 | + return lhs.ctr == rhs.ctr; |
| 277 | +} |
| 278 | + |
| 279 | +template<size_t R> |
| 280 | +inline bool operator!=(const ChaCha<R>& lhs, const ChaCha<R>& rhs) { return !(lhs == rhs); } |
| 281 | + |
| 282 | +template<size_t R, typename CharT, typename Traits> |
| 283 | +inline std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os, const ChaCha<R>& rng) { |
| 284 | + typedef typename std::basic_ostream<CharT, Traits>::ios_base ios_base; |
| 285 | + |
| 286 | + // Save old state. |
| 287 | + auto flags = os.flags(); |
| 288 | + auto fill = os.fill(); |
| 289 | + |
| 290 | + // Set flags and fill to space. |
| 291 | + auto space = os.widen(' '); |
| 292 | + os.flags(ios_base::dec | ios_base::fixed | ios_base::left); |
| 293 | + os.fill(space); |
| 294 | + |
| 295 | + // Serialize. |
| 296 | + for (int i = 0; i < 10; ++i) os << rng.keysetup[i] << space; |
| 297 | + os << rng.ctr; |
| 298 | + |
| 299 | + // Sestore old state. |
| 300 | + os.flags(flags); |
| 301 | + os.fill(fill); |
| 302 | + |
| 303 | + return os; |
| 304 | +} |
| 305 | + |
| 306 | +template<size_t R, typename CharT, typename Traits> |
| 307 | +inline std::basic_istream<CharT, Traits>& operator>>(std::basic_istream<CharT, Traits>& is, ChaCha<R>& rng) { |
| 308 | + typedef typename std::basic_istream<CharT, Traits> ::ios_base ios_base; |
| 309 | + |
| 310 | + // Save old flags and set ours. |
| 311 | + auto flags = is.flags(); |
| 312 | + is.flags(ios_base::dec); |
| 313 | + |
| 314 | + // Deserialize. |
| 315 | + for (int i = 0; i < 10; ++i) is >> rng.keysetup[i]; |
| 316 | + is >> rng.ctr; |
| 317 | + |
| 318 | + // Restore old flags. |
| 319 | + is.flags(flags); |
| 320 | + |
| 321 | + return is; |
| 322 | +} |
| 323 | + |
| 324 | +#endif |
0 commit comments