Skip to content

Commit 8c31ea0

Browse files
nodejs-github-botCeres6
authored andcommittedAug 14, 2023
deps: update zlib to 337322d
PR-URL: nodejs#48218 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com>
1 parent 35abf87 commit 8c31ea0

26 files changed

+1273
-28
lines changed
 

‎deps/zlib/BUILD.gn

+4
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,10 @@ if (build_with_chromium) {
515515

516516
data = [ "google/test/data/" ]
517517

518+
if (is_ios) {
519+
bundle_deps = [ "google:zlib_pak_bundle_data" ]
520+
}
521+
518522
deps = [
519523
":zlib",
520524
"google:compression_utils",

‎deps/zlib/CMakeLists.txt

+8-2
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ set(CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS ON)
33

44
project(zlib C)
55

6-
set(VERSION "1.2.13")
6+
set(VERSION "1.2.13.1")
77

88
set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
99
set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
@@ -22,6 +22,7 @@ check_include_file(stdint.h HAVE_STDINT_H)
2222
check_include_file(stddef.h HAVE_STDDEF_H)
2323

2424
option(ENABLE_SIMD_OPTIMIZATIONS "Enable all SIMD optimizations" OFF)
25+
option(ENABLE_SIMD_AVX512 "Enable SIMD AXV512 optimizations" OFF)
2526

2627
# TODO(cavalcantii): add support for other OSes (e.g. Android, fuchsia, osx)
2728
# and architectures (e.g. Arm).
@@ -30,8 +31,13 @@ if (ENABLE_SIMD_OPTIMIZATIONS)
3031
add_definitions(-DADLER32_SIMD_SSSE3)
3132
add_definitions(-DINFLATE_CHUNK_READ_64LE)
3233
add_definitions(-DCRC32_SIMD_SSE42_PCLMUL)
34+
if (ENABLE_SIMD_AVX512)
35+
add_definitions(-DCRC32_SIMD_AVX512_PCLMUL)
36+
add_compile_options(-mvpclmulqdq -msse2 -mavx512f -mpclmul)
37+
else()
38+
add_compile_options(-msse4.2 -mpclmul)
39+
endif()
3340
add_definitions(-DDEFLATE_SLIDE_HASH_SSE2)
34-
add_compile_options(-msse4.2 -mpclmul)
3541
# Required by CPU features detection code.
3642
add_definitions(-DX86_NOT_WINDOWS)
3743
# Apparently some environments (e.g. CentOS) require to explicitly link

‎deps/zlib/contrib/optimizations/inflate.c

+2
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,8 @@ int value;
257257
struct inflate_state FAR *state;
258258

259259
if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
260+
if (bits == 0)
261+
return Z_OK;
260262
state = (struct inflate_state FAR *)strm->state;
261263
if (bits < 0) {
262264
state->hold = 0;

‎deps/zlib/contrib/tests/fuzzers/inflate_with_header_fuzzer.cc

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212

1313
#include <fuzzer/FuzzedDataProvider.h>
1414

15-
#include "third_party/zlib/zlib.h"
15+
#include "zlib.h"
1616

1717
// Fuzzer builds often have NDEBUG set, so roll our own assert macro.
1818
#define ASSERT(cond) \

‎deps/zlib/cpu_features.c

+9
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ int ZLIB_INTERNAL arm_cpu_enable_pmull = 0;
3131
int ZLIB_INTERNAL x86_cpu_enable_sse2 = 0;
3232
int ZLIB_INTERNAL x86_cpu_enable_ssse3 = 0;
3333
int ZLIB_INTERNAL x86_cpu_enable_simd = 0;
34+
int ZLIB_INTERNAL x86_cpu_enable_avx512 = 0;
3435

3536
#ifndef CPU_NO_SIMD
3637

@@ -138,6 +139,10 @@ static void _cpu_check_features(void)
138139
/* On x86 we simply use a instruction to check the CPU features.
139140
* (i.e. CPUID).
140141
*/
142+
#ifdef CRC32_SIMD_AVX512_PCLMUL
143+
#include <immintrin.h>
144+
#include <xsaveintrin.h>
145+
#endif
141146
static void _cpu_check_features(void)
142147
{
143148
int x86_cpu_has_sse2;
@@ -164,6 +169,10 @@ static void _cpu_check_features(void)
164169
x86_cpu_enable_simd = x86_cpu_has_sse2 &&
165170
x86_cpu_has_sse42 &&
166171
x86_cpu_has_pclmulqdq;
172+
173+
#ifdef CRC32_SIMD_AVX512_PCLMUL
174+
x86_cpu_enable_avx512 = _xgetbv(0) & 0x00000040;
175+
#endif
167176
}
168177
#endif
169178
#endif

‎deps/zlib/cpu_features.h

+1
Original file line numberDiff line numberDiff line change
@@ -14,5 +14,6 @@ extern int arm_cpu_enable_pmull;
1414
extern int x86_cpu_enable_sse2;
1515
extern int x86_cpu_enable_ssse3;
1616
extern int x86_cpu_enable_simd;
17+
extern int x86_cpu_enable_avx512;
1718

1819
void cpu_check_features(void);

‎deps/zlib/crc32.c

+13-1
Original file line numberDiff line numberDiff line change
@@ -773,7 +773,19 @@ unsigned long ZEXPORT crc32_z(crc, buf, len)
773773
}
774774

775775
#endif
776-
#if defined(CRC32_SIMD_SSE42_PCLMUL)
776+
#if defined(CRC32_SIMD_AVX512_PCLMUL)
777+
if (x86_cpu_enable_avx512 && len >= Z_CRC32_AVX512_MINIMUM_LENGTH) {
778+
/* crc32 64-byte chunks */
779+
z_size_t chunk_size = len & ~Z_CRC32_AVX512_CHUNKSIZE_MASK;
780+
crc = ~crc32_avx512_simd_(buf, chunk_size, ~(uint32_t)crc);
781+
/* check remaining data */
782+
len -= chunk_size;
783+
if (!len)
784+
return crc;
785+
/* Fall into the default crc32 for the remaining data. */
786+
buf += chunk_size;
787+
}
788+
#elif defined(CRC32_SIMD_SSE42_PCLMUL)
777789
if (x86_cpu_enable_simd && len >= Z_CRC32_SSE42_MINIMUM_LENGTH) {
778790
/* crc32 16-byte chunks */
779791
z_size_t chunk_size = len & ~Z_CRC32_SSE42_CHUNKSIZE_MASK;

‎deps/zlib/crc32_simd.c

+194-4
Original file line numberDiff line numberDiff line change
@@ -6,17 +6,207 @@
66
*/
77

88
#include "crc32_simd.h"
9-
10-
#if defined(CRC32_SIMD_SSE42_PCLMUL)
9+
#if defined(CRC32_SIMD_AVX512_PCLMUL)
1110

1211
/*
13-
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
14-
* length must be at least 64, and a multiple of 16. Based on:
12+
* crc32_avx512_simd_(): compute the crc32 of the buffer, where the buffer
13+
* length must be at least 256, and a multiple of 64. Based on:
1514
*
1615
* "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction"
1716
* V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0
1817
*/
1918

19+
#include <emmintrin.h>
20+
#include <smmintrin.h>
21+
#include <wmmintrin.h>
22+
#include <immintrin.h>
23+
24+
uint32_t ZLIB_INTERNAL crc32_avx512_simd_( /* AVX512+PCLMUL */
25+
const unsigned char *buf,
26+
z_size_t len,
27+
uint32_t crc)
28+
{
29+
/*
30+
* Definitions of the bit-reflected domain constants k1,k2,k3,k4
31+
* are similar to those given at the end of the paper, and remaining
32+
* constants and CRC32+Barrett polynomials remain unchanged.
33+
*
34+
* Replace the index of x from 128 to 512. As follows:
35+
* k1 = ( x ^ ( 512 * 4 + 32 ) mod P(x) << 32 )' << 1 = 0x011542778a
36+
* k2 = ( x ^ ( 512 * 4 - 32 ) mod P(x) << 32 )' << 1 = 0x01322d1430
37+
* k3 = ( x ^ ( 512 + 32 ) mod P(x) << 32 )' << 1 = 0x0154442bd4
38+
* k4 = ( x ^ ( 512 - 32 ) mod P(x) << 32 )' << 1 = 0x01c6e41596
39+
*/
40+
static const uint64_t zalign(64) k1k2[] = { 0x011542778a, 0x01322d1430,
41+
0x011542778a, 0x01322d1430,
42+
0x011542778a, 0x01322d1430,
43+
0x011542778a, 0x01322d1430 };
44+
static const uint64_t zalign(64) k3k4[] = { 0x0154442bd4, 0x01c6e41596,
45+
0x0154442bd4, 0x01c6e41596,
46+
0x0154442bd4, 0x01c6e41596,
47+
0x0154442bd4, 0x01c6e41596 };
48+
static const uint64_t zalign(16) k5k6[] = { 0x01751997d0, 0x00ccaa009e };
49+
static const uint64_t zalign(16) k7k8[] = { 0x0163cd6124, 0x0000000000 };
50+
static const uint64_t zalign(16) poly[] = { 0x01db710641, 0x01f7011641 };
51+
__m512i x0, x1, x2, x3, x4, x5, x6, x7, x8, y5, y6, y7, y8;
52+
__m128i a0, a1, a2, a3;
53+
54+
/*
55+
* There's at least one block of 256.
56+
*/
57+
x1 = _mm512_loadu_si512((__m512i *)(buf + 0x00));
58+
x2 = _mm512_loadu_si512((__m512i *)(buf + 0x40));
59+
x3 = _mm512_loadu_si512((__m512i *)(buf + 0x80));
60+
x4 = _mm512_loadu_si512((__m512i *)(buf + 0xC0));
61+
62+
x1 = _mm512_xor_si512(x1, _mm512_castsi128_si512(_mm_cvtsi32_si128(crc)));
63+
64+
x0 = _mm512_load_si512((__m512i *)k1k2);
65+
66+
buf += 256;
67+
len -= 256;
68+
69+
/*
70+
* Parallel fold blocks of 256, if any.
71+
*/
72+
while (len >= 256)
73+
{
74+
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
75+
x6 = _mm512_clmulepi64_epi128(x2, x0, 0x00);
76+
x7 = _mm512_clmulepi64_epi128(x3, x0, 0x00);
77+
x8 = _mm512_clmulepi64_epi128(x4, x0, 0x00);
78+
79+
80+
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
81+
x2 = _mm512_clmulepi64_epi128(x2, x0, 0x11);
82+
x3 = _mm512_clmulepi64_epi128(x3, x0, 0x11);
83+
x4 = _mm512_clmulepi64_epi128(x4, x0, 0x11);
84+
85+
y5 = _mm512_loadu_si512((__m512i *)(buf + 0x00));
86+
y6 = _mm512_loadu_si512((__m512i *)(buf + 0x40));
87+
y7 = _mm512_loadu_si512((__m512i *)(buf + 0x80));
88+
y8 = _mm512_loadu_si512((__m512i *)(buf + 0xC0));
89+
90+
x1 = _mm512_xor_si512(x1, x5);
91+
x2 = _mm512_xor_si512(x2, x6);
92+
x3 = _mm512_xor_si512(x3, x7);
93+
x4 = _mm512_xor_si512(x4, x8);
94+
95+
x1 = _mm512_xor_si512(x1, y5);
96+
x2 = _mm512_xor_si512(x2, y6);
97+
x3 = _mm512_xor_si512(x3, y7);
98+
x4 = _mm512_xor_si512(x4, y8);
99+
100+
buf += 256;
101+
len -= 256;
102+
}
103+
104+
/*
105+
* Fold into 512-bits.
106+
*/
107+
x0 = _mm512_load_si512((__m512i *)k3k4);
108+
109+
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
110+
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
111+
x1 = _mm512_xor_si512(x1, x2);
112+
x1 = _mm512_xor_si512(x1, x5);
113+
114+
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
115+
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
116+
x1 = _mm512_xor_si512(x1, x3);
117+
x1 = _mm512_xor_si512(x1, x5);
118+
119+
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
120+
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
121+
x1 = _mm512_xor_si512(x1, x4);
122+
x1 = _mm512_xor_si512(x1, x5);
123+
124+
/*
125+
* Single fold blocks of 64, if any.
126+
*/
127+
while (len >= 64)
128+
{
129+
x2 = _mm512_loadu_si512((__m512i *)buf);
130+
131+
x5 = _mm512_clmulepi64_epi128(x1, x0, 0x00);
132+
x1 = _mm512_clmulepi64_epi128(x1, x0, 0x11);
133+
x1 = _mm512_xor_si512(x1, x2);
134+
x1 = _mm512_xor_si512(x1, x5);
135+
136+
buf += 64;
137+
len -= 64;
138+
}
139+
140+
/*
141+
* Fold 512-bits to 384-bits.
142+
*/
143+
a0 = _mm_load_si128((__m128i *)k5k6);
144+
145+
a1 = _mm512_extracti32x4_epi32(x1, 0);
146+
a2 = _mm512_extracti32x4_epi32(x1, 1);
147+
148+
a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
149+
a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
150+
151+
a1 = _mm_xor_si128(a1, a3);
152+
a1 = _mm_xor_si128(a1, a2);
153+
154+
/*
155+
* Fold 384-bits to 256-bits.
156+
*/
157+
a2 = _mm512_extracti32x4_epi32(x1, 2);
158+
a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
159+
a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
160+
a1 = _mm_xor_si128(a1, a3);
161+
a1 = _mm_xor_si128(a1, a2);
162+
163+
/*
164+
* Fold 256-bits to 128-bits.
165+
*/
166+
a2 = _mm512_extracti32x4_epi32(x1, 3);
167+
a3 = _mm_clmulepi64_si128(a1, a0, 0x00);
168+
a1 = _mm_clmulepi64_si128(a1, a0, 0x11);
169+
a1 = _mm_xor_si128(a1, a3);
170+
a1 = _mm_xor_si128(a1, a2);
171+
172+
/*
173+
* Fold 128-bits to 64-bits.
174+
*/
175+
a2 = _mm_clmulepi64_si128(a1, a0, 0x10);
176+
a3 = _mm_setr_epi32(~0, 0, ~0, 0);
177+
a1 = _mm_srli_si128(a1, 8);
178+
a1 = _mm_xor_si128(a1, a2);
179+
180+
a0 = _mm_loadl_epi64((__m128i*)k7k8);
181+
a2 = _mm_srli_si128(a1, 4);
182+
a1 = _mm_and_si128(a1, a3);
183+
a1 = _mm_clmulepi64_si128(a1, a0, 0x00);
184+
a1 = _mm_xor_si128(a1, a2);
185+
186+
/*
187+
* Barret reduce to 32-bits.
188+
*/
189+
a0 = _mm_load_si128((__m128i*)poly);
190+
191+
a2 = _mm_and_si128(a1, a3);
192+
a2 = _mm_clmulepi64_si128(a2, a0, 0x10);
193+
a2 = _mm_and_si128(a2, a3);
194+
a2 = _mm_clmulepi64_si128(a2, a0, 0x00);
195+
a1 = _mm_xor_si128(a1, a2);
196+
197+
/*
198+
* Return the crc32.
199+
*/
200+
return _mm_extract_epi32(a1, 1);
201+
}
202+
203+
#elif defined(CRC32_SIMD_SSE42_PCLMUL)
204+
205+
/*
206+
* crc32_sse42_simd_(): compute the crc32 of the buffer, where the buffer
207+
* length must be at least 64, and a multiple of 16.
208+
*/
209+
20210
#include <emmintrin.h>
21211
#include <smmintrin.h>
22212
#include <wmmintrin.h>

‎deps/zlib/crc32_simd.h

+6
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,18 @@ uint32_t ZLIB_INTERNAL crc32_sse42_simd_(const unsigned char* buf,
1919
z_size_t len,
2020
uint32_t crc);
2121

22+
uint32_t ZLIB_INTERNAL crc32_avx512_simd_(const unsigned char* buf,
23+
z_size_t len,
24+
uint32_t crc);
25+
2226
/*
2327
* crc32_sse42_simd_ buffer size constraints: see the use in zlib/crc32.c
2428
* for computing the crc32 of an arbitrary length buffer.
2529
*/
2630
#define Z_CRC32_SSE42_MINIMUM_LENGTH 64
2731
#define Z_CRC32_SSE42_CHUNKSIZE_MASK 15
32+
#define Z_CRC32_AVX512_MINIMUM_LENGTH 256
33+
#define Z_CRC32_AVX512_CHUNKSIZE_MASK 63
2834

2935
/*
3036
* CRC32 checksums using ARMv8-a crypto instructions.

‎deps/zlib/crc_folding.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,10 @@ unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s)
435435
unsigned crc;
436436
__m128i x_tmp0, x_tmp1, x_tmp2, crc_fold;
437437

438-
CRC_LOAD(s)
438+
__m128i xmm_crc0 = _mm_loadu_si128((__m128i *)s->crc0 + 0);
439+
__m128i xmm_crc1 = _mm_loadu_si128((__m128i *)s->crc0 + 1);
440+
__m128i xmm_crc2 = _mm_loadu_si128((__m128i *)s->crc0 + 2);
441+
__m128i xmm_crc3 = _mm_loadu_si128((__m128i *)s->crc0 + 3);
439442

440443
/*
441444
* k1
@@ -491,7 +494,6 @@ unsigned ZLIB_INTERNAL crc_fold_512to32(deflate_state *const s)
491494

492495
crc = _mm_extract_epi32(xmm_crc3, 2);
493496
return ~crc;
494-
CRC_SAVE(s)
495497
}
496498

497499
#endif /* CRC32_SIMD_SSE42_PCLMUL */

‎deps/zlib/deflate.c

+3-2
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
#endif
6666

6767
const char deflate_copyright[] =
68-
" deflate 1.2.13 Copyright 1995-2022 Jean-loup Gailly and Mark Adler ";
68+
" deflate 1.2.13.1 Copyright 1995-2022 Jean-loup Gailly and Mark Adler ";
6969
/*
7070
If you use the zlib library in a product, an acknowledgment is welcome
7171
in the documentation of your product. If for some reason you cannot
@@ -774,7 +774,8 @@ uLong ZEXPORT deflateBound(strm, sourceLen)
774774

775775
/* if not default parameters, return one of the conservative bounds */
776776
if (s->w_bits != 15 || s->hash_bits != 8 + 7)
777-
return (s->w_bits <= s->hash_bits ? fixedlen : storelen) + wraplen;
777+
return (s->w_bits <= s->hash_bits && s->level ? fixedlen : storelen) +
778+
wraplen;
778779

779780
/* default settings: return tight bound for that case -- ~0.03% overhead
780781
plus a small constant */

0 commit comments

Comments
 (0)
Please sign in to comment.