Skip to content

Commit 43419ad

Browse files
addaleaxdanielleadams
authored andcommitted
deps: fix zlib compilation for CPUs without SIMD features
Fix the compile flags so that zlib can run on CPUs that do not have SSSE3/SSE4.2/etc. Do not compile zlib with flags that indicate that those features are available, and instead enable them selectively for functions that use them. There are probably better way to do this, e.g. through gyp file modifications as suggested in the issue. However, this patch should do just fine until that happens. Fixes: #32553 PR-URL: #32627 Reviewed-By: Gireesh Punathil <gpunathi@in.ibm.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Tobias Nießen <tniessen@tnie.de> Reviewed-By: James M Snell <jasnell@gmail.com> PR-URL: #45387 Reviewed-By: Rafael Gonzaga <rafael.nunu@hotmail.com>
1 parent 978cfad commit 43419ad

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

deps/zlib/adler32_simd.c

+4
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,13 @@
5050
#define NMAX 5552
5151

5252
#if defined(ADLER32_SIMD_SSSE3)
53+
#ifndef __GNUC__
54+
#define __attribute__()
55+
#endif
5356

5457
#include <tmmintrin.h>
5558

59+
__attribute__((target("ssse3")))
5660
uint32_t ZLIB_INTERNAL adler32_simd_( /* SSSE3 */
5761
uint32_t adler,
5862
const unsigned char *buf,

deps/zlib/crc32_simd.c

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
#include "crc32_simd.h"
99

1010
#if defined(CRC32_SIMD_SSE42_PCLMUL)
11+
#ifndef __GNUC__
12+
#define __attribute__()
13+
#endif
1114

1215
/*
1316
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
@@ -21,6 +24,7 @@
2124
#include <smmintrin.h>
2225
#include <wmmintrin.h>
2326

27+
__attribute__((target("sse4.2,pclmul")))
2428
uint32_t ZLIB_INTERNAL crc32_sse42_simd_( /* SSE4.2+PCLMUL */
2529
const unsigned char *buf,
2630
z_size_t len,

deps/zlib/crc_folding.c

+12
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@
2525
#include <immintrin.h>
2626
#include <wmmintrin.h>
2727

28+
#ifndef __GNUC__
29+
#define __attribute__()
30+
#endif
31+
2832
#define CRC_LOAD(s) \
2933
do { \
3034
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);\
@@ -41,6 +45,7 @@
4145
_mm_storeu_si128((__m128i *)s->crc0 + 4, xmm_crc_part);\
4246
} while (0);
4347

48+
__attribute__((target("sse4.2,pclmul")))
4449
ZLIB_INTERNAL void crc_fold_init(deflate_state *const s)
4550
{
4651
CRC_LOAD(s)
@@ -55,6 +60,7 @@ ZLIB_INTERNAL void crc_fold_init(deflate_state *const s)
5560
s->strm->adler = 0;
5661
}
5762

63+
__attribute__((target("sse4.2,pclmul")))
5864
local void fold_1(deflate_state *const s,
5965
__m128i *xmm_crc0, __m128i *xmm_crc1,
6066
__m128i *xmm_crc2, __m128i *xmm_crc3)
@@ -81,6 +87,7 @@ local void fold_1(deflate_state *const s,
8187
*xmm_crc3 = _mm_castps_si128(ps_res);
8288
}
8389

90+
__attribute__((target("sse4.2,pclmul")))
8491
local void fold_2(deflate_state *const s,
8592
__m128i *xmm_crc0, __m128i *xmm_crc1,
8693
__m128i *xmm_crc2, __m128i *xmm_crc3)
@@ -115,6 +122,7 @@ local void fold_2(deflate_state *const s,
115122
*xmm_crc3 = _mm_castps_si128(ps_res31);
116123
}
117124

125+
__attribute__((target("sse4.2,pclmul")))
118126
local void fold_3(deflate_state *const s,
119127
__m128i *xmm_crc0, __m128i *xmm_crc1,
120128
__m128i *xmm_crc2, __m128i *xmm_crc3)
@@ -155,6 +163,7 @@ local void fold_3(deflate_state *const s,
155163
*xmm_crc3 = _mm_castps_si128(ps_res32);
156164
}
157165

166+
__attribute__((target("sse4.2,pclmul")))
158167
local void fold_4(deflate_state *const s,
159168
__m128i *xmm_crc0, __m128i *xmm_crc1,
160169
__m128i *xmm_crc2, __m128i *xmm_crc3)
@@ -221,6 +230,7 @@ local const unsigned zalign(32) pshufb_shf_table[60] = {
221230
0x0201008f,0x06050403,0x0a090807,0x0e0d0c0b /* shl 1 (16 -15)/shr15*/
222231
};
223232

233+
__attribute__((target("sse4.2,pclmul")))
224234
local void partial_fold(deflate_state *const s, const size_t len,
225235
__m128i *xmm_crc0, __m128i *xmm_crc1,
226236
__m128i *xmm_crc2, __m128i *xmm_crc3,
@@ -271,6 +281,7 @@ local void partial_fold(deflate_state *const s, const size_t len,
271281
*xmm_crc3 = _mm_castps_si128(ps_res);
272282
}
273283

284+
__attribute__((target("sse4.2,pclmul")))
274285
ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s,
275286
unsigned char *dst, const unsigned char *src, long len)
276287
{
@@ -427,6 +438,7 @@ local const unsigned zalign(16) crc_mask2[4] = {
427438
0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
428439
};
429440

441+
__attribute__((target("sse4.2,pclmul")))
430442
unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s)
431443
{
432444
const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask);

0 commit comments

Comments
 (0)