Skip to content

Commit 61efb2a

Browse files
committed
Add ZSTD_d_maxBlockSize parameter
Reduces memory when blocks are guaranteed to be smaller than allowed by the format. This is useful for streaming compression in conjunction with ZSTD_c_maxBlockSize. This PR saves 2 * (formatMaxBlockSize - paramMaxBlockSize) when streaming. Once it is rebased on top of PR #3616 it will save 3 * (formatMaxBlockSize - paramMaxBlockSize).
1 parent ed31334 commit 61efb2a

7 files changed

+165
-6
lines changed

lib/decompress/zstd_decompress.c

+26-3
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ static void ZSTD_DCtx_resetParameters(ZSTD_DCtx* dctx)
245245
dctx->forceIgnoreChecksum = ZSTD_d_validateChecksum;
246246
dctx->refMultipleDDicts = ZSTD_rmd_refSingleDDict;
247247
dctx->disableHufAsm = 0;
248+
dctx->maxBlockSizeParam = 0;
248249
}
249250

250251
static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
@@ -972,6 +973,10 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
972973
ip += frameHeaderSize; remainingSrcSize -= frameHeaderSize;
973974
}
974975

976+
/* Shrink the blockSizeMax if enabled */
977+
if (dctx->maxBlockSizeParam != 0)
978+
dctx->fParams.blockSizeMax = MIN(dctx->fParams.blockSizeMax, (unsigned)dctx->maxBlockSizeParam);
979+
975980
/* Loop on each block */
976981
while (1) {
977982
BYTE* oBlockEnd = oend;
@@ -1823,6 +1828,10 @@ ZSTD_bounds ZSTD_dParam_getBounds(ZSTD_dParameter dParam)
18231828
bounds.lowerBound = 0;
18241829
bounds.upperBound = 1;
18251830
return bounds;
1831+
case ZSTD_d_maxBlockSize:
1832+
bounds.lowerBound = ZSTD_BLOCKSIZE_MAX_MIN;
1833+
bounds.upperBound = ZSTD_BLOCKSIZE_MAX;
1834+
return bounds;
18261835

18271836
default:;
18281837
}
@@ -1867,6 +1876,9 @@ size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParameter param, int* value
18671876
case ZSTD_d_disableHuffmanAssembly:
18681877
*value = (int)dctx->disableHufAsm;
18691878
return 0;
1879+
case ZSTD_d_maxBlockSize:
1880+
*value = dctx->maxBlockSizeParam;
1881+
return 0;
18701882
default:;
18711883
}
18721884
RETURN_ERROR(parameter_unsupported, "");
@@ -1904,6 +1916,10 @@ size_t ZSTD_DCtx_setParameter(ZSTD_DCtx* dctx, ZSTD_dParameter dParam, int value
19041916
CHECK_DBOUNDS(ZSTD_d_disableHuffmanAssembly, value);
19051917
dctx->disableHufAsm = value != 0;
19061918
return 0;
1919+
case ZSTD_d_maxBlockSize:
1920+
if (value != 0) CHECK_DBOUNDS(ZSTD_d_maxBlockSize, value);
1921+
dctx->maxBlockSizeParam = value;
1922+
return 0;
19071923
default:;
19081924
}
19091925
RETURN_ERROR(parameter_unsupported, "");
@@ -1932,9 +1948,9 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
19321948
return ZSTD_sizeof_DCtx(dctx);
19331949
}
19341950

1935-
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
1951+
static size_t ZSTD_decodingBufferSize_internal(unsigned long long windowSize, unsigned long long frameContentSize, size_t blockSizeMax)
19361952
{
1937-
size_t const blockSize = (size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
1953+
size_t const blockSize = MIN((size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX), blockSizeMax);
19381954
/* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
19391955
* ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
19401956
* the block at the beginning of the output buffer, and maintain a full window.
@@ -1950,6 +1966,11 @@ size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long
19501966
return minRBSize;
19511967
}
19521968

1969+
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
1970+
{
1971+
return ZSTD_decodingBufferSize_internal(windowSize, frameContentSize, ZSTD_BLOCKSIZE_MAX);
1972+
}
1973+
19531974
size_t ZSTD_estimateDStreamSize(size_t windowSize)
19541975
{
19551976
size_t const blockSize = MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
@@ -2188,11 +2209,13 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
21882209
zds->fParams.windowSize = MAX(zds->fParams.windowSize, 1U << ZSTD_WINDOWLOG_ABSOLUTEMIN);
21892210
RETURN_ERROR_IF(zds->fParams.windowSize > zds->maxWindowSize,
21902211
frameParameter_windowTooLarge, "");
2212+
if (zds->maxBlockSizeParam != 0)
2213+
zds->fParams.blockSizeMax = MIN(zds->fParams.blockSizeMax, (unsigned)zds->maxBlockSizeParam);
21912214

21922215
/* Adapt buffer sizes to frame header instructions */
21932216
{ size_t const neededInBuffSize = MAX(zds->fParams.blockSizeMax, 4 /* frame checksum */);
21942217
size_t const neededOutBuffSize = zds->outBufferMode == ZSTD_bm_buffered
2195-
? ZSTD_decodingBufferSize_min(zds->fParams.windowSize, zds->fParams.frameContentSize)
2218+
? ZSTD_decodingBufferSize_internal(zds->fParams.windowSize, zds->fParams.frameContentSize, zds->fParams.blockSizeMax)
21962219
: 0;
21972220

21982221
ZSTD_DCtx_updateOversizedDuration(zds, neededInBuffSize, neededOutBuffSize);

lib/decompress/zstd_decompress_internal.h

+1
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ struct ZSTD_DCtx_s
167167
ZSTD_DDictHashSet* ddictSet; /* Hash set for multiple ddicts */
168168
ZSTD_refMultipleDDicts_e refMultipleDDicts; /* User specified: if == 1, will allow references to multiple DDicts. Default == 0 (disabled) */
169169
int disableHufAsm;
170+
int maxBlockSizeParam;
170171

171172
/* streaming */
172173
ZSTD_dStreamStage streamStage;

lib/zstd.h

+19-1
Original file line numberDiff line numberDiff line change
@@ -618,14 +618,16 @@ typedef enum {
618618
* ZSTD_d_forceIgnoreChecksum
619619
* ZSTD_d_refMultipleDDicts
620620
* ZSTD_d_disableHuffmanAssembly
621+
* ZSTD_d_maxBlockSize
621622
* Because they are not stable, it's necessary to define ZSTD_STATIC_LINKING_ONLY to access them.
622623
* note : never ever use experimentalParam? names directly
623624
*/
624625
ZSTD_d_experimentalParam1=1000,
625626
ZSTD_d_experimentalParam2=1001,
626627
ZSTD_d_experimentalParam3=1002,
627628
ZSTD_d_experimentalParam4=1003,
628-
ZSTD_d_experimentalParam5=1004
629+
ZSTD_d_experimentalParam5=1004,
630+
ZSTD_d_experimentalParam6=1005
629631

630632
} ZSTD_dParameter;
631633

@@ -2430,6 +2432,22 @@ ZSTDLIB_STATIC_API size_t ZSTD_DCtx_getParameter(ZSTD_DCtx* dctx, ZSTD_dParamete
24302432
*/
24312433
#define ZSTD_d_disableHuffmanAssembly ZSTD_d_experimentalParam5
24322434

2435+
/* ZSTD_d_maxBlockSize
2436+
* Allowed values are between 1KB and ZSTD_BLOCKSIZE_MAX (128KB).
2437+
* The default is ZSTD_BLOCKSIZE_MAX, and setting to 0 will set to the default.
2438+
*
2439+
* Forces the decompressor to reject blocks whose content size is
2440+
* larger than the configured maxBlockSize. When maxBlockSize is
2441+
* larger than the windowSize, the windowSize is used instead.
2442+
* This saves memory on the decoder when you know all blocks are small.
2443+
*
2444+
* This option is typically used in conjunction with ZSTD_c_maxBlockSize.
2445+
*
2446+
* WARNING: This causes the decoder to reject otherwise valid frames
2447+
* that have block sizes larger than the configured maxBlockSize.
2448+
*/
2449+
#define ZSTD_d_maxBlockSize ZSTD_d_experimentalParam6
2450+
24332451

24342452
/*! ZSTD_DCtx_setFormat() :
24352453
* This function is REDUNDANT. Prefer ZSTD_DCtx_setParameter().

tests/fuzz/simple_round_trip.c

+3
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,9 @@ static size_t roundTripTest(void *result, size_t resultCapacity,
9090
FUZZ_ASSERT(XXH64(compressed, cSize, 0) == hash0);
9191
}
9292
}
93+
if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
94+
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, maxBlockSize));
95+
}
9396
dSize = ZSTD_decompressDCtx(dctx, result, resultCapacity, compressed, cSize);
9497
FUZZ_ZASSERT(dSize);
9598
FUZZ_ASSERT_MSG(dSize == srcSize, "Incorrect regenerated size");

tests/fuzz/stream_round_trip.c

+22-2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ static size_t compress(uint8_t *dst, size_t capacity,
6363
size_t dstSize = 0;
6464
ZSTD_CCtx_reset(cctx, ZSTD_reset_session_only);
6565
FUZZ_setRandomParameters(cctx, srcSize, producer);
66+
int maxBlockSize;
67+
FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize));
6668

6769
while (srcSize > 0) {
6870
ZSTD_inBuffer in = makeInBuffer(&src, &srcSize, producer);
@@ -93,6 +95,8 @@ static size_t compress(uint8_t *dst, size_t capacity,
9395
if (FUZZ_dataProducer_uint32Range(producer, 0, 7) == 0) {
9496
size_t const remaining = in.size - in.pos;
9597
FUZZ_setRandomParameters(cctx, remaining, producer);
98+
/* Always use the same maxBlockSize */
99+
FUZZ_ZASSERT(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, maxBlockSize));
96100
}
97101
mode = -1;
98102
}
@@ -132,6 +136,23 @@ static size_t compress(uint8_t *dst, size_t capacity,
132136
return dstSize;
133137
}
134138

139+
size_t decompress(void* dst, size_t dstCapacity, void const* src, size_t srcSize, FUZZ_dataProducer_t* producer)
140+
{
141+
ZSTD_inBuffer in = {src, srcSize, 0};
142+
ZSTD_outBuffer out = {dst, dstCapacity, 0};
143+
int maxBlockSize;
144+
FUZZ_ZASSERT(ZSTD_CCtx_getParameter(cctx, ZSTD_c_maxBlockSize, &maxBlockSize));
145+
if (FUZZ_dataProducer_uint32Range(producer, 0, 1)) {
146+
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, maxBlockSize));
147+
}
148+
while (in.pos < in.size) {
149+
size_t const ret = ZSTD_decompressStream(dctx, &out, &in);
150+
FUZZ_ZASSERT(ret);
151+
FUZZ_ASSERT(ret == 0);
152+
}
153+
return out.pos;
154+
}
155+
135156
int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
136157
{
137158
FUZZ_SEQ_PROD_SETUP();
@@ -163,8 +184,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
163184

164185
{
165186
size_t const cSize = compress(cBuf, neededBufSize, src, size, producer);
166-
size_t const rSize =
167-
ZSTD_decompressDCtx(dctx, rBuf, neededBufSize, cBuf, cSize);
187+
size_t const rSize = decompress(rBuf, neededBufSize, cBuf, cSize, producer);
168188
FUZZ_ZASSERT(rSize);
169189
FUZZ_ASSERT_MSG(rSize == size, "Incorrect regenerated size");
170190
FUZZ_ASSERT_MSG(!FUZZ_memcmp(src, rBuf, size), "Corruption!");

tests/fuzzer.c

+19
Original file line numberDiff line numberDiff line change
@@ -952,6 +952,25 @@ static int basicUnitTests(U32 const seed, double compressibility)
952952
ZSTD_freeCDict(cdict);
953953
ZSTD_freeCCtx(cctx);
954954
}
955+
956+
DISPLAYLEVEL(3, "test%3i : maxBlockSize = 2K", testNb++);
957+
{
958+
ZSTD_CCtx* cctx = ZSTD_createCCtx();
959+
ZSTD_DCtx* dctx = ZSTD_createDCtx();
960+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
961+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 2048));
962+
CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 2048));
963+
964+
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBuffSize);
965+
CHECK_Z(cSize);
966+
CHECK_Z(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize));
967+
968+
CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 1024));
969+
CHECK(ZSTD_isError(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBuffSize, compressedBuffer, cSize)));
970+
971+
ZSTD_freeDCtx(dctx);
972+
ZSTD_freeCCtx(cctx);
973+
}
955974

956975
DISPLAYLEVEL(3, "test%3i : ldm fill dict out-of-bounds check", testNb++);
957976
{

tests/zstreamtest.c

+75
Original file line numberDiff line numberDiff line change
@@ -722,6 +722,67 @@ static int basicUnitTests(U32 seed, double compressibility, int bigTests)
722722
}
723723
DISPLAYLEVEL(3, "OK \n");
724724

725+
DISPLAYLEVEL(3, "test%3i : maxBlockSize = 2KB : ", testNb++);
726+
{
727+
ZSTD_DCtx* dctx = ZSTD_createDCtx();
728+
size_t singlePassSize, streamingSize, streaming2KSize;
729+
730+
{
731+
ZSTD_CCtx* cctx = ZSTD_createCCtx();
732+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_checksumFlag, 1));
733+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, 18));
734+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_contentSizeFlag, 0));
735+
CHECK_Z(ZSTD_CCtx_setParameter(cctx, ZSTD_c_maxBlockSize, 2048));
736+
cSize = ZSTD_compress2(cctx, compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize);
737+
CHECK_Z(cSize);
738+
ZSTD_freeCCtx(cctx);
739+
}
740+
741+
CHECK_Z(ZSTD_decompressDCtx(dctx, decodedBuffer, CNBufferSize, compressedBuffer, cSize));
742+
singlePassSize = ZSTD_sizeof_DCtx(dctx);
743+
CHECK_Z(singlePassSize);
744+
745+
inBuff.src = compressedBuffer;
746+
inBuff.size = cSize;
747+
748+
outBuff.dst = decodedBuffer;
749+
outBuff.size = decodedBufferSize;
750+
751+
CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 2048));
752+
inBuff.pos = 0;
753+
outBuff.pos = 0;
754+
{
755+
size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff);
756+
CHECK_Z(r);
757+
CHECK(r != 0, "Entire frame must be decompressed");
758+
}
759+
streaming2KSize = ZSTD_sizeof_DCtx(dctx);
760+
CHECK_Z(streaming2KSize);
761+
762+
CHECK_Z(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
763+
inBuff.pos = 0;
764+
outBuff.pos = 0;
765+
{
766+
size_t const r = ZSTD_decompressStream(dctx, &outBuff, &inBuff);
767+
CHECK_Z(r);
768+
CHECK(r != 0, "Entire frame must be decompressed");
769+
}
770+
streamingSize = ZSTD_sizeof_DCtx(dctx);
771+
CHECK_Z(streamingSize);
772+
773+
CHECK_Z(ZSTD_DCtx_setParameter(dctx, ZSTD_d_maxBlockSize, 1024));
774+
inBuff.pos = 0;
775+
outBuff.pos = 0;
776+
CHECK(!ZSTD_isError(ZSTD_decompressStream(dctx, &outBuff, &inBuff)), "decompression must fail");
777+
778+
CHECK(streamingSize < singlePassSize + (1 << 18) + 3 * ZSTD_BLOCKSIZE_MAX, "Streaming doesn't use the right amount of memory");
779+
CHECK(streamingSize != streaming2KSize + 3 * (ZSTD_BLOCKSIZE_MAX - 2048), "ZSTD_d_blockSizeMax didn't save the right amount of memory");
780+
DISPLAYLEVEL(3, "| %zu | %zu | %zu | ", singlePassSize, streaming2KSize, streamingSize);
781+
782+
ZSTD_freeDCtx(dctx);
783+
}
784+
DISPLAYLEVEL(3, "OK \n");
785+
725786
/* Decompression with ZSTD_d_stableOutBuffer */
726787
cSize = ZSTD_compress(compressedBuffer, compressedBufferSize, CNBuffer, CNBufferSize, 1);
727788
CHECK_Z(cSize);
@@ -2845,6 +2906,13 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
28452906
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_forceMaxWindow, FUZ_rand(&lseed) & 1, opaqueAPI) );
28462907
if (FUZ_rand(&lseed) & 1) CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_deterministicRefPrefix, FUZ_rand(&lseed) & 1, opaqueAPI) );
28472908

2909+
/* Set max block size parameters */
2910+
if (FUZ_rand(&lseed) & 1) {
2911+
int maxBlockSize = (int)(FUZ_rand(&lseed) % ZSTD_BLOCKSIZE_MAX);
2912+
maxBlockSize = MAX(1024, maxBlockSize);
2913+
CHECK_Z( setCCtxParameter(zc, cctxParams, ZSTD_c_maxBlockSize, maxBlockSize, opaqueAPI) );
2914+
}
2915+
28482916
/* Apply parameters */
28492917
if (opaqueAPI) {
28502918
DISPLAYLEVEL(5, "t%u: applying CCtxParams \n", testNb);
@@ -2976,6 +3044,13 @@ static int fuzzerTests_newAPI(U32 seed, int nbTests, int startTest,
29763044
if (FUZ_rand(&lseed) & 1) {
29773045
CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_disableHuffmanAssembly, FUZ_rand(&lseed) & 1));
29783046
}
3047+
if (FUZ_rand(&lseed) & 1) {
3048+
int maxBlockSize;
3049+
CHECK_Z(ZSTD_CCtx_getParameter(zc, ZSTD_c_maxBlockSize, &maxBlockSize));
3050+
CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_maxBlockSize, maxBlockSize));
3051+
} else {
3052+
CHECK_Z(ZSTD_DCtx_setParameter(zd, ZSTD_d_maxBlockSize, 0));
3053+
}
29793054
{ size_t decompressionResult = 1;
29803055
ZSTD_inBuffer inBuff = { cBuffer, cSize, 0 };
29813056
ZSTD_outBuffer outBuff= { dstBuffer, dstBufferSize, 0 };

0 commit comments

Comments
 (0)