Skip to content

Commit d5502b8

Browse files
authored
Merge pull request #255 from dhardy/parallel
Parallel
2 parents 884bfe7 + 67c312d commit d5502b8

File tree

238 files changed

+96818
-100090
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

238 files changed

+96818
-100090
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ util/SchemaTranslator/*.class
1111
.sconsign.dblite
1212
build*/
1313
lib
14+
.clangd

.kateproject

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"name": "OpenMalaria"
3+
, "files": [ { "git": 1 } ]
4+
}

CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ include_directories (SYSTEM
147147
${GSL_INCLUDE_DIRS}
148148
${Z_INCLUDE_DIRS}
149149
${Boost_INCLUDE_DIRS}
150+
${CMAKE_SOURCE_DIR}/contrib
151+
${CMAKE_SOURCE_DIR}/contrib/pcg-cpp/include
150152
)
151153
include_directories (
152154
${CMAKE_SOURCE_DIR}/model ${CMAKE_BINARY_DIR}

contrib/chacha.h

+324
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
// Source: https://gist.github.com/orlp/32f5d1b631ab092608b1
2+
/*
3+
* Modified:
4+
*
5+
* - keysetup length increased to 10 words
6+
* - seed function uses unique values for keysetup[8] and [9]
7+
* - added binary_checkpoint
8+
* - added include guard
9+
*/
10+
11+
/*
12+
Copyright (c) 2015 Orson Peters <orsonpeters@gmail.com>
13+
14+
This software is provided 'as-is', without any express or implied warranty. In no event will the
15+
authors be held liable for any damages arising from the use of this software.
16+
17+
Permission is granted to anyone to use this software for any purpose, including commercial
18+
applications, and to alter it and redistribute it freely, subject to the following restrictions:
19+
20+
1. The origin of this software must not be misrepresented; you must not claim that you wrote the
21+
original software. If you use this software in a product, an acknowledgment in the product
22+
documentation would be appreciated but is not required.
23+
24+
2. Altered source versions must be plainly marked as such, and must not be misrepresented as
25+
being the original software.
26+
27+
3. This notice may not be removed or altered from any source distribution.
28+
*/
29+
30+
#ifndef OPETERS_CHACHA
31+
#define OPETERS_CHACHA
32+
33+
#include <cstdint>
34+
#include <limits>
35+
36+
template<size_t R>
37+
class ChaCha {
38+
public:
39+
typedef uint32_t result_type;
40+
41+
explicit ChaCha(uint64_t seedval, uint64_t stream = 0);
42+
template<class Sseq> explicit ChaCha(Sseq& seq);
43+
44+
void seed(uint64_t seedval, uint64_t stream = 0);
45+
template<class Sseq> void seed(Sseq& seq);
46+
47+
uint32_t operator()();
48+
void discard(unsigned long long n);
49+
50+
template<size_t R_> friend bool operator==(const ChaCha<R_>& lhs, const ChaCha<R_>& rhs);
51+
template<size_t R_> friend bool operator!=(const ChaCha<R_>& lhs, const ChaCha<R_>& rhs);
52+
53+
template<typename CharT, typename Traits>
54+
friend std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os, const ChaCha<R>& rng);
55+
56+
template<typename CharT, typename Traits>
57+
friend std::basic_istream<CharT, Traits>& operator>>(std::basic_istream<CharT, Traits>& is, ChaCha<R>& rng);
58+
59+
// Incomplete checkpointing: assumes RNG was already seeded
60+
void binary_checkpoint(ostream& stream) {
61+
stream.write (reinterpret_cast<char*>(&ctr), sizeof(ctr));
62+
}
63+
void binary_checkpoint(istream& stream) {
64+
stream.read (reinterpret_cast<char*>(&ctr), sizeof(ctr));
65+
if (!stream || stream.gcount() != sizeof(ctr))
66+
throw runtime_error ("pcg_random - binary_checkpoint: stream read error");
67+
if ((ctr % 16) != 0) generate_block();
68+
}
69+
70+
static constexpr uint32_t min() { return std::numeric_limits<uint32_t>::min(); }
71+
static constexpr uint32_t max() { return std::numeric_limits<uint32_t>::max(); }
72+
73+
private:
74+
void generate_block();
75+
void chacha_core();
76+
77+
alignas(16) uint32_t block[16];
78+
uint32_t keysetup[10];
79+
uint64_t ctr;
80+
};
81+
82+
83+
template<size_t R>
84+
inline ChaCha<R>::ChaCha(uint64_t seedval, uint64_t stream) {
85+
seed(seedval, stream);
86+
}
87+
88+
template<size_t R>
89+
template<class Sseq>
90+
inline ChaCha<R>::ChaCha(Sseq& seq) {
91+
seed(seq);
92+
}
93+
94+
template<size_t R>
95+
inline void ChaCha<R>::seed(uint64_t seedval, uint64_t stream) {
96+
ctr = 0;
97+
keysetup[0] = seedval & 0xffffffffu;
98+
keysetup[1] = seedval >> 32;
99+
keysetup[2] = keysetup[3] = 0xdeadbeef; // Could use 128-bit seed.
100+
keysetup[4] = stream & 0xffffffffu;
101+
keysetup[5] = stream >> 32;
102+
keysetup[6] = keysetup[7] = 0xdeadbeef; // Could use 128-bit stream.
103+
// Use an IV unique to this application
104+
keysetup[8] = 0xac4fd2ff;
105+
keysetup[9] = 0x1b48daba;
106+
}
107+
108+
template<size_t R>
109+
template<class Sseq>
110+
inline void ChaCha<R>::seed(Sseq& seq) {
111+
ctr = 0;
112+
seq.generate(keysetup, keysetup + 10);
113+
}
114+
115+
116+
template<size_t R>
117+
inline uint32_t ChaCha<R>::operator()() {
118+
int idx = ctr % 16;
119+
if (idx == 0) generate_block();
120+
++ctr;
121+
122+
return block[idx];
123+
}
124+
125+
template<size_t R>
126+
inline void ChaCha<R>::discard(unsigned long long n) {
127+
int idx = ctr % 16;
128+
ctr += n;
129+
if (idx + n >= 16 && ctr % 16 != 0) generate_block();
130+
}
131+
132+
template<size_t R>
133+
inline void ChaCha<R>::generate_block() {
134+
uint32_t constants[4] = {0x61707865, 0x3320646e, 0x79622d32, 0x6b206574};
135+
136+
uint32_t input[16];
137+
for (int i = 0; i < 4; ++i) input[i] = constants[i];
138+
for (int i = 0; i < 8; ++i) input[4 + i] = keysetup[i];
139+
input[12] = (ctr / 16) & 0xffffffffu;
140+
input[13] = (ctr / 16) >> 32;
141+
input[14] = keysetup[8];
142+
input[15] = keysetup[9];
143+
144+
for (int i = 0; i < 16; ++i) block[i] = input[i];
145+
chacha_core();
146+
for (int i = 0; i < 16; ++i) block[i] += input[i];
147+
}
148+
149+
#ifdef __SSE2__
150+
#include "emmintrin.h"
151+
152+
// Get an efficient _mm_roti_epi32 based on enabled features.
153+
#if !defined(__XOP__)
154+
#if defined(__SSSE3__)
155+
#include <tmmintrin.h>
156+
#define _mm_roti_epi32(r, c) ( \
157+
((c) == 8) ? \
158+
_mm_shuffle_epi8((r), _mm_set_epi8(14, 13, 12, 15, \
159+
10, 9, 8, 11, \
160+
6, 5, 4, 7, \
161+
2, 1, 0, 3)) \
162+
: ((c) == 16) ? \
163+
_mm_shuffle_epi8((r), _mm_set_epi8(13, 12, 15, 14, \
164+
9, 8, 11, 10, \
165+
5, 4, 7, 6, \
166+
1, 0, 3, 2)) \
167+
: ((c) == 24) ? \
168+
_mm_shuffle_epi8((r), _mm_set_epi8(12, 15, 14, 13, \
169+
8, 11, 10, 9, \
170+
4, 7, 6, 5, \
171+
0, 3, 2, 1)) \
172+
: \
173+
_mm_xor_si128(_mm_slli_epi32((r), (c)), \
174+
_mm_srli_epi32((r), 32-(c))) \
175+
)
176+
#else
177+
#define _mm_roti_epi32(r, c) _mm_xor_si128(_mm_slli_epi32((r), (c)), \
178+
_mm_srli_epi32((r), 32-(c)))
179+
#endif
180+
#else
181+
#include <xopintrin.h>
182+
#endif
183+
184+
template<size_t R>
185+
inline void ChaCha<R>::chacha_core() {
186+
// ROTVn rotates the elements in the given vector n places to the left.
187+
#define CHACHA_ROTV1(x) _mm_shuffle_epi32((__m128i) x, 0x39)
188+
#define CHACHA_ROTV2(x) _mm_shuffle_epi32((__m128i) x, 0x4e)
189+
#define CHACHA_ROTV3(x) _mm_shuffle_epi32((__m128i) x, 0x93)
190+
191+
__m128i a = _mm_load_si128((__m128i*) (block));
192+
__m128i b = _mm_load_si128((__m128i*) (block + 4));
193+
__m128i c = _mm_load_si128((__m128i*) (block + 8));
194+
__m128i d = _mm_load_si128((__m128i*) (block + 12));
195+
196+
for (int i = 0; i < R; i += 2) {
197+
a = _mm_add_epi32(a, b);
198+
d = _mm_xor_si128(d, a);
199+
d = _mm_roti_epi32(d, 16);
200+
c = _mm_add_epi32(c, d);
201+
b = _mm_xor_si128(b, c);
202+
b = _mm_roti_epi32(b, 12);
203+
a = _mm_add_epi32(a, b);
204+
d = _mm_xor_si128(d, a);
205+
d = _mm_roti_epi32(d, 8);
206+
c = _mm_add_epi32(c, d);
207+
b = _mm_xor_si128(b, c);
208+
b = _mm_roti_epi32(b, 7);
209+
210+
b = CHACHA_ROTV1(b);
211+
c = CHACHA_ROTV2(c);
212+
d = CHACHA_ROTV3(d);
213+
214+
a = _mm_add_epi32(a, b);
215+
d = _mm_xor_si128(d, a);
216+
d = _mm_roti_epi32(d, 16);
217+
c = _mm_add_epi32(c, d);
218+
b = _mm_xor_si128(b, c);
219+
b = _mm_roti_epi32(b, 12);
220+
a = _mm_add_epi32(a, b);
221+
d = _mm_xor_si128(d, a);
222+
d = _mm_roti_epi32(d, 8);
223+
c = _mm_add_epi32(c, d);
224+
b = _mm_xor_si128(b, c);
225+
b = _mm_roti_epi32(b, 7);
226+
227+
b = CHACHA_ROTV3(b);
228+
c = CHACHA_ROTV2(c);
229+
d = CHACHA_ROTV1(d);
230+
}
231+
232+
_mm_store_si128((__m128i*) (block), a);
233+
_mm_store_si128((__m128i*) (block + 4), b);
234+
_mm_store_si128((__m128i*) (block + 8), c);
235+
_mm_store_si128((__m128i*) (block + 12), d);
236+
237+
#undef CHACHA_ROTV3
238+
#undef CHACHA_ROTV2
239+
#undef CHACHA_ROTV1
240+
}
241+
#else
242+
template<size_t R>
243+
inline void ChaCha<R>::chacha_core() {
244+
#define CHACHA_ROTL32(x, n) (((x) << (n)) | ((x) >> (32 - (n))))
245+
246+
#define CHACHA_QUARTERROUND(x, a, b, c, d) \
247+
x[a] = x[a] + x[b]; x[d] ^= x[a]; x[d] = CHACHA_ROTL32(x[d], 16); \
248+
x[c] = x[c] + x[d]; x[b] ^= x[c]; x[b] = CHACHA_ROTL32(x[b], 12); \
249+
x[a] = x[a] + x[b]; x[d] ^= x[a]; x[d] = CHACHA_ROTL32(x[d], 8); \
250+
x[c] = x[c] + x[d]; x[b] ^= x[c]; x[b] = CHACHA_ROTL32(x[b], 7)
251+
252+
for (int i = 0; i < R; i += 2) {
253+
CHACHA_QUARTERROUND(block, 0, 4, 8, 12);
254+
CHACHA_QUARTERROUND(block, 1, 5, 9, 13);
255+
CHACHA_QUARTERROUND(block, 2, 6, 10, 14);
256+
CHACHA_QUARTERROUND(block, 3, 7, 11, 15);
257+
CHACHA_QUARTERROUND(block, 0, 5, 10, 15);
258+
CHACHA_QUARTERROUND(block, 1, 6, 11, 12);
259+
CHACHA_QUARTERROUND(block, 2, 7, 8, 13);
260+
CHACHA_QUARTERROUND(block, 3, 4, 9, 14);
261+
}
262+
263+
#undef CHACHA_QUARTERROUND
264+
#undef CHACHA_ROTL32
265+
}
266+
#endif
267+
268+
269+
// Implement <random> interface.
270+
template<size_t R>
271+
inline bool operator==(const ChaCha<R>& lhs, const ChaCha<R>& rhs) {
272+
for (int i = 0; i < 10; ++i) {
273+
if (lhs.keysetup[i] != rhs.keysetup[i]) return false;
274+
}
275+
276+
return lhs.ctr == rhs.ctr;
277+
}
278+
279+
template<size_t R>
280+
inline bool operator!=(const ChaCha<R>& lhs, const ChaCha<R>& rhs) { return !(lhs == rhs); }
281+
282+
template<size_t R, typename CharT, typename Traits>
283+
inline std::basic_ostream<CharT, Traits>& operator<<(std::basic_ostream<CharT, Traits>& os, const ChaCha<R>& rng) {
284+
typedef typename std::basic_ostream<CharT, Traits>::ios_base ios_base;
285+
286+
// Save old state.
287+
auto flags = os.flags();
288+
auto fill = os.fill();
289+
290+
// Set flags and fill to space.
291+
auto space = os.widen(' ');
292+
os.flags(ios_base::dec | ios_base::fixed | ios_base::left);
293+
os.fill(space);
294+
295+
// Serialize.
296+
for (int i = 0; i < 10; ++i) os << rng.keysetup[i] << space;
297+
os << rng.ctr;
298+
299+
// Sestore old state.
300+
os.flags(flags);
301+
os.fill(fill);
302+
303+
return os;
304+
}
305+
306+
template<size_t R, typename CharT, typename Traits>
307+
inline std::basic_istream<CharT, Traits>& operator>>(std::basic_istream<CharT, Traits>& is, ChaCha<R>& rng) {
308+
typedef typename std::basic_istream<CharT, Traits> ::ios_base ios_base;
309+
310+
// Save old flags and set ours.
311+
auto flags = is.flags();
312+
is.flags(ios_base::dec);
313+
314+
// Deserialize.
315+
for (int i = 0; i < 10; ++i) is >> rng.keysetup[i];
316+
is >> rng.ctr;
317+
318+
// Restore old flags.
319+
is.flags(flags);
320+
321+
return is;
322+
}
323+
324+
#endif

0 commit comments

Comments
 (0)