Skip to content

Commit 4874db7

Browse files
addaleaxtargos
authored andcommitted
deps: fix zlib compilation for CPUs without SIMD features
Fix the compile flags so that zlib can run on CPUs that do not have SSSE3/SSE4.2/etc. Do not compile zlib with flags that indicate that those features are available, and instead enable them selectively for functions that use them. There are probably better way to do this, e.g. through gyp file modifications as suggested in the issue. However, this patch should do just fine until that happens. Fixes: #32553 PR-URL: #32627 Reviewed-By: Gireesh Punathil <gpunathi@in.ibm.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Tobias Nießen <tniessen@tnie.de> Reviewed-By: James M Snell <jasnell@gmail.com>
1 parent bdb2df7 commit 4874db7

File tree

4 files changed

+20
-14
lines changed

4 files changed

+20
-14
lines changed

deps/zlib/adler32_simd.c

+4
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,13 @@
5050
#define NMAX 5552
5151

5252
#if defined(ADLER32_SIMD_SSSE3)
53+
#ifndef __GNUC__
54+
#define __attribute__()
55+
#endif
5356

5457
#include <tmmintrin.h>
5558

59+
__attribute__((target("ssse3")))
5660
uint32_t ZLIB_INTERNAL adler32_simd_( /* SSSE3 */
5761
uint32_t adler,
5862
const unsigned char *buf,

deps/zlib/crc32_simd.c

+4
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
#include "crc32_simd.h"
99

1010
#if defined(CRC32_SIMD_SSE42_PCLMUL)
11+
#ifndef __GNUC__
12+
#define __attribute__()
13+
#endif
1114

1215
/*
1316
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
@@ -21,6 +24,7 @@
2124
#include <smmintrin.h>
2225
#include <wmmintrin.h>
2326

27+
__attribute__((target("sse4.2,pclmul")))
2428
uint32_t ZLIB_INTERNAL crc32_sse42_simd_( /* SSE4.2+PCLMUL */
2529
const unsigned char *buf,
2630
z_size_t len,

deps/zlib/crc_folding.c

+12
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@
2323
#include <immintrin.h>
2424
#include <wmmintrin.h>
2525

26+
#ifndef __GNUC__
27+
#define __attribute__()
28+
#endif
29+
2630
#define CRC_LOAD(s) \
2731
do { \
2832
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);\
@@ -39,6 +43,7 @@
3943
_mm_storeu_si128((__m128i *)s->crc0 + 4, xmm_crc_part);\
4044
} while (0);
4145

46+
__attribute__((target("sse4.2,pclmul")))
4247
ZLIB_INTERNAL void crc_fold_init(deflate_state *const s)
4348
{
4449
CRC_LOAD(s)
@@ -53,6 +58,7 @@ ZLIB_INTERNAL void crc_fold_init(deflate_state *const s)
5358
s->strm->adler = 0;
5459
}
5560

61+
__attribute__((target("sse4.2,pclmul")))
5662
local void fold_1(deflate_state *const s,
5763
__m128i *xmm_crc0, __m128i *xmm_crc1,
5864
__m128i *xmm_crc2, __m128i *xmm_crc3)
@@ -79,6 +85,7 @@ local void fold_1(deflate_state *const s,
7985
*xmm_crc3 = _mm_castps_si128(ps_res);
8086
}
8187

88+
__attribute__((target("sse4.2,pclmul")))
8289
local void fold_2(deflate_state *const s,
8390
__m128i *xmm_crc0, __m128i *xmm_crc1,
8491
__m128i *xmm_crc2, __m128i *xmm_crc3)
@@ -113,6 +120,7 @@ local void fold_2(deflate_state *const s,
113120
*xmm_crc3 = _mm_castps_si128(ps_res31);
114121
}
115122

123+
__attribute__((target("sse4.2,pclmul")))
116124
local void fold_3(deflate_state *const s,
117125
__m128i *xmm_crc0, __m128i *xmm_crc1,
118126
__m128i *xmm_crc2, __m128i *xmm_crc3)
@@ -153,6 +161,7 @@ local void fold_3(deflate_state *const s,
153161
*xmm_crc3 = _mm_castps_si128(ps_res32);
154162
}
155163

164+
__attribute__((target("sse4.2,pclmul")))
156165
local void fold_4(deflate_state *const s,
157166
__m128i *xmm_crc0, __m128i *xmm_crc1,
158167
__m128i *xmm_crc2, __m128i *xmm_crc3)
@@ -219,6 +228,7 @@ local const unsigned zalign(32) pshufb_shf_table[60] = {
219228
0x0201008f,0x06050403,0x0a090807,0x0e0d0c0b /* shl 1 (16 -15)/shr15*/
220229
};
221230

231+
__attribute__((target("sse4.2,pclmul")))
222232
local void partial_fold(deflate_state *const s, const size_t len,
223233
__m128i *xmm_crc0, __m128i *xmm_crc1,
224234
__m128i *xmm_crc2, __m128i *xmm_crc3,
@@ -269,6 +279,7 @@ local void partial_fold(deflate_state *const s, const size_t len,
269279
*xmm_crc3 = _mm_castps_si128(ps_res);
270280
}
271281

282+
__attribute__((target("sse4.2,pclmul")))
272283
ZLIB_INTERNAL void crc_fold_copy(deflate_state *const s,
273284
unsigned char *dst, const unsigned char *src, long len)
274285
{
@@ -425,6 +436,7 @@ local const unsigned zalign(16) crc_mask2[4] = {
425436
0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
426437
};
427438

439+
__attribute__((target("sse4.2,pclmul")))
428440
unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s)
429441
{
430442
const __m128i xmm_mask = _mm_load_si128((__m128i *)crc_mask);

deps/zlib/zlib.gyp

-14
Original file line numberDiff line numberDiff line change
@@ -91,20 +91,6 @@
9191
'x86.c',
9292
],
9393
'conditions': [
94-
['OS!="win" or llvm_version!="0.0"', {
95-
'cflags': [
96-
'-mssse3',
97-
'-msse4.2',
98-
'-mpclmul',
99-
],
100-
'xcode_settings': {
101-
'OTHER_CFLAGS': [
102-
'-mssse3',
103-
'-msse4.2',
104-
'-mpclmul',
105-
],
106-
},
107-
}],
10894
['target_arch=="x64"', {
10995
'defines': [ 'INFLATE_CHUNK_READ_64LE' ],
11096
}],

0 commit comments

Comments
 (0)