Skip to content

Commit e8d6370

Browse files
First working SIMD version with basic AVX2 support (runtime use is decided at compile time)
1 parent 0df53bb commit e8d6370

File tree

8 files changed

+789
-5
lines changed

8 files changed

+789
-5
lines changed

Makefile.am

+3-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,9 @@ bin_md5_birthdaysearch_SOURCES=\
8686
src/md5birthdaysearch/main.cpp \
8787
src/md5birthdaysearch/main.hpp \
8888
src/md5birthdaysearch/storage.cpp \
89-
src/md5birthdaysearch/storage.hpp
89+
src/md5birthdaysearch/storage.hpp \
90+
src/md5birthdaysearch/simd_avx256.cpp
91+
9092
if HAVE_CUDA
9193
bin_md5_birthdaysearch_SOURCES+=\
9294
src/md5birthdaysearch/cuda_md5.cu

configure.ac

+7
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ AC_TYPE_UINT8_T
3535
AS_IF([test "x$cross_compiling" != "xyes" && test "x$usedefaultcxxflags" = "xyes" ],
3636
[AX_CHECK_COMPILE_FLAG([-march=native], [CXXFLAGS="$CXXFLAGS -march=native"], [])])
3737

38+
# hide the masses of deprecated warnings because of the old Boost version
39+
AX_CHECK_COMPILE_FLAG([-Wno-deprecated-declarations], [CXXFLAGS="$CXXFLAGS -Wno-deprecated-declarations"], [])
40+
AX_CHECK_COMPILE_FLAG([-Wno-deprecated], [CXXFLAGS="$CXXFLAGS -Wno-deprecated"], [])
41+
3842
AX_PTHREAD()
3943
LIBS="$PTHREAD_LIBS $LIBS"
4044
CXXFLAGS="$CXXFLAGS $PTHREAD_CFLAGS"
@@ -53,6 +57,9 @@ CPPFLAGS="$BOOST_CPPFLAGS $CPPFLAGS"
5357
LDFLAGS="$BOOST_LDFLAGS $LDFLAGS"
5458
LIBS="$BOOST_FILESYSTEM_LIB $BOOST_IOSTREAMS_LIB $BOOST_PROGRAM_OPTIONS_LIB $BOOST_SERIALIZATION_LIB $BOOST_SYSTEM_LIB $BOOST_THREAD_LIB $LIBS"
5559

60+
AX_EXT
61+
CXXFLAGS="$CXXFLAGS $SIMD_FLAGS"
62+
5663
AX_CUDA
5764

5865
AC_CONFIG_FILES([

lib/hashclash/simd/simd_avx256.h

+110
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
/***
2+
* Copyright 2017 Marc Stevens <marc@marc-stevens.nl>, Dan Shumow (danshu@microsoft.com)
3+
* Distributed under the MIT Software License.
4+
* See accompanying file LICENSE.txt or copy at
5+
* https://opensource.org/licenses/MIT
6+
***/
7+
8+
/*
9+
* this header defines SIMD MACROS for avx256 intrinsics
10+
* used to generate avx256 code from generic SIMD code (sha1_simd.cinc, ubc_check_simd.cinc)
11+
*/
12+
13+
#ifndef SIMD_AVX256_HEADER
14+
#define SIMD_AVX256_HEADER
15+
16+
#ifdef HASHCLASH_HAVE_AVX2
17+
/* requires AVX2 not just AVX */
18+
#define SIMD_VERSION avx256
19+
#define SIMD_VECSIZE 8
20+
21+
#ifdef __GNUC__
22+
23+
/* TODO */
24+
#include <immintrin.h>
25+
26+
#define SIMD_WORD __m256i
27+
28+
#define SIMD_ZERO _mm256_setzero_si256()
29+
#define SIMD_WTOV(l) _mm256_set1_epi32(l)
30+
#define SIMD_ADD_VV(l,r) _mm256_add_epi32(l,r)
31+
#define SIMD_ADD_VW(l,r) _mm256_add_epi32(l, _mm256_set1_epi32(r))
32+
#define SIMD_SUB_VV(l,r) _mm256_sub_epi32(l,r)
33+
#define SIMD_SUB_VW(l,r) _mm256_sub_epi32(l, _mm256_set1_epi32(r))
34+
#define SIMD_AND_VV(l,r) _mm256_and_si256(l,r)
35+
#define SIMD_AND_VW(l,r) _mm256_and_si256(l, _mm256_set1_epi32(r))
36+
#define SIMD_ANDNOT_VV(l,r) _mm256_andnot_si256(l,r)
37+
#define SIMD_ANDNOT_VW(l,r) _mm256_andnot_si256(l, _mm256_set1_epi32(r))
38+
#define SIMD_ANDNOT_WV(l,r) _mm256_andnot_si256(_mm256_set1_epi32(l), r)
39+
#define SIMD_OR_VV(l,r) _mm256_or_si256(l,r)
40+
#define SIMD_OR_VW(l,r) _mm256_or_si256(l, _mm256_set1_epi32(r))
41+
#define SIMD_XOR_VV(l,r) _mm256_xor_si256(l,r)
42+
#define SIMD_XOR_VW(l,r) _mm256_xor_si256(l, _mm256_set1_epi32(r))
43+
/*#define SIMD_NOT_V(l) _mm256_andnot_si256(l,l)*/
44+
#define SIMD_SHL_V(l,i) _mm256_slli_epi32(l,i)
45+
#define SIMD_SHR_V(l,i) _mm256_srli_epi32(l,i)
46+
/*#define SIMD_ROL_V(l,i) _mm256_rol_epi32(l,i)*/
47+
/*#define SIMD_ROR_V(l,i) _mm256_ror_epi32(l,i)*/
48+
49+
#define SIMD_EQ_VV(a,b) _mm256_cmpeq_epi32(a,b)
50+
#define SIMD_MIN_VV(a,b) _mm256_min_epu32(a,b)
51+
#define SIMD_SEL_VWW(m,a,b) SIMD_OR_VV( SIMD_AND_VW(m,a), SIMD_ANDNOT_VW(m,b) )
52+
#define SIMD_SEL_VVV(m,a,b) SIMD_OR_VV( SIMD_AND_VV(m,a), SIMD_ANDNOT_VV(m,b) )
53+
54+
#define SIMD_CLEANUP
55+
56+
#else /* __GNUC__ */
57+
58+
/* VISUAL STUDIO */
59+
60+
#include <immintrin.h>
61+
62+
#define SIMD_WORD __m256i
63+
64+
#define SIMD_ZERO _mm256_setzero_si256()
65+
#define SIMD_WTOV(l) _mm256_set1_epi32(l)
66+
#define SIMD_ADD_VV(l,r) _mm256_add_epi32(l,r)
67+
#define SIMD_ADD_VW(l,r) _mm256_add_epi32(l, _mm256_set1_epi32(r))
68+
#define SIMD_SUB_VV(l,r) _mm256_sub_epi32(l,r)
69+
#define SIMD_SUB_VW(l,r) _mm256_sub_epi32(l, _mm256_set1_epi32(r))
70+
#define SIMD_AND_VV(l,r) _mm256_and_si256(l,r)
71+
#define SIMD_AND_VW(l,r) _mm256_and_si256(l, _mm256_set1_epi32(r))
72+
#define SIMD_ANDNOT_VV(l,r) _mm256_andnot_si256(l,r)
73+
#define SIMD_ANDNOT_VW(l,r) _mm256_andnot_si256(l, _mm256_set1_epi32(r))
74+
#define SIMD_ANDNOT_WV(l,r) _mm256_andnot_si256(_mm256_set1_epi32(l), r)
75+
#define SIMD_OR_VV(l,r) _mm256_or_si256(l,r)
76+
#define SIMD_OR_VW(l,r) _mm256_or_si256(l, _mm256_set1_epi32(r))
77+
#define SIMD_XOR_VV(l,r) _mm256_xor_si256(l,r)
78+
#define SIMD_XOR_VW(l,r) _mm256_xor_si256(l, _mm256_set1_epi32(r))
79+
/*#define SIMD_NOT_V(l) _mm256_andnot_si256(l,l)*/
80+
#define SIMD_SHL_V(l,i) _mm256_slli_epi32(l,i)
81+
#define SIMD_SHR_V(l,i) _mm256_srli_epi32(l,i)
82+
/*#define SIMD_ROL_V(l,i) _mm256_rol_epi32(l,i)*/
83+
/*#define SIMD_ROR_V(l,i) _mm256_ror_epi32(l,i)*/
84+
85+
#define SIMD_EQ_VV(a,b) _mm256_cmpeq_epi32(a,b)
86+
#define SIMD_MIN_VV(a,b) _mm256_min_epu32(a,b)
87+
#define SIMD_SEL_VWW(m,a,b) SIMD_OR_VV( SIMD_AND_VW(m,a), SIMD_ANDNOT_VW(m,b) )
88+
#define SIMD_SEL_VVV(m,a,b) SIMD_OR_VV( SIMD_AND_VV(m,a), SIMD_ANDNOT_VV(m,b) )
89+
90+
#define SIMD_CLEANUP
91+
92+
#endif /* __GNUC__ */
93+
94+
95+
/* these are general definitions for lacking SIMD operations */
96+
97+
#ifndef SIMD_NOT_V
98+
#define SIMD_NOT_V(l) SIMD_XOR_VW(l,0xFFFFFFFF)
99+
#endif
100+
101+
#ifndef SIMD_NEG_V
102+
#define SIMD_NEG_V(l) SIMD_SUB_VV(SIMD_ZERO,l)
103+
#endif
104+
105+
#ifndef SIMD_ROL_V
106+
#define SIMD_ROL_V(l,i) SIMD_OR_VV(SIMD_SHL_V(l,i),SIMD_SHR_V(l,32-i))
107+
#define SIMD_ROR_V(l,i) SIMD_OR_VV(SIMD_SHR_V(l,i),SIMD_SHL_V(l,32-i))
108+
#endif
109+
#endif /* HASHCLASH_HAVE_AVX2 */
110+
#endif /* SIMD_AVX256_HEADER */

0 commit comments

Comments
 (0)