Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce streaming decompression memory by (128KB - blockSizeMax) #3616

Merged
merged 1 commit into from
Apr 17, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 16 additions & 5 deletions lib/decompress/zstd_decompress.c
Original file line number Diff line number Diff line change
@@ -265,6 +265,7 @@ static void ZSTD_initDCtx_internal(ZSTD_DCtx* dctx)
#endif
dctx->noForwardProgress = 0;
dctx->oversizedDuration = 0;
dctx->isFrameDecompression = 1;
#if DYNAMIC_BMI2
dctx->bmi2 = ZSTD_cpuSupportsBmi2();
#endif
@@ -1003,7 +1004,8 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
switch(blockProperties.blockType)
{
case bt_compressed:
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, /* frame */ 1, not_streaming);
assert(dctx->isFrameDecompression == 1);
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oBlockEnd-op), ip, cBlockSize, not_streaming);
break;
case bt_raw :
/* Use oend instead of oBlockEnd because this function is safe to overlap. It uses memmove. */
@@ -1319,7 +1321,8 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
{
case bt_compressed:
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
assert(dctx->isFrameDecompression == 1);
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, is_streaming);
dctx->expected = 0; /* Streaming not supported */
break;
case bt_raw :
@@ -1548,6 +1551,7 @@ size_t ZSTD_decompressBegin(ZSTD_DCtx* dctx)
dctx->litEntropy = dctx->fseEntropy = 0;
dctx->dictID = 0;
dctx->bType = bt_reserved;
dctx->isFrameDecompression = 1;
ZSTD_STATIC_ASSERT(sizeof(dctx->entropy.rep) == sizeof(repStartValue));
ZSTD_memcpy(dctx->entropy.rep, repStartValue, sizeof(repStartValue)); /* initial repcodes */
dctx->LLTptr = dctx->entropy.LLTable;
@@ -1911,6 +1915,7 @@ size_t ZSTD_DCtx_reset(ZSTD_DCtx* dctx, ZSTD_ResetDirective reset)
|| (reset == ZSTD_reset_session_and_parameters) ) {
dctx->streamStage = zdss_init;
dctx->noForwardProgress = 0;
dctx->isFrameDecompression = 1;
}
if ( (reset == ZSTD_reset_parameters)
|| (reset == ZSTD_reset_session_and_parameters) ) {
@@ -1929,9 +1934,15 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)

size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
{
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
/* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
size_t const blockSize = (size_t)MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
/* We need blockSize + WILDCOPY_OVERLENGTH worth of buffer so that if a block
* ends at windowSize + WILDCOPY_OVERLENGTH + 1 bytes, we can start writing
* the block at the beginning of the output buffer, and maintain a full window.
*
* We need another blockSize worth of buffer so that we can store split
* literals at the end of the block without overwriting the extDict window.
*/
unsigned long long const neededRBSize = windowSize + (blockSize * 2) + (WILDCOPY_OVERLENGTH * 2);
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
size_t const minRBSize = (size_t) neededSize;
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,
220 changes: 119 additions & 101 deletions lib/decompress/zstd_decompress_block.c

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion lib/decompress/zstd_decompress_block.h
Original file line number Diff line number Diff line change
@@ -47,7 +47,7 @@ typedef enum {
*/
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
void* dst, size_t dstCapacity,
const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
const void* src, size_t srcSize, const streaming_operation streaming);

/* ZSTD_buildFSETable() :
* generate FSE decoding table for one symbol (ll, ml or off)
1 change: 1 addition & 0 deletions lib/decompress/zstd_decompress_internal.h
Original file line number Diff line number Diff line change
@@ -153,6 +153,7 @@ struct ZSTD_DCtx_s
size_t litSize;
size_t rleSize;
size_t staticSize;
int isFrameDecompression;
#if DYNAMIC_BMI2 != 0
int bmi2; /* == 1 if the CPU supports BMI2 and 0 otherwise. CPU support is determined dynamically once per context lifetime. */
#endif
13 changes: 6 additions & 7 deletions tests/fullbench.c
Original file line number Diff line number Diff line change
@@ -141,15 +141,14 @@ static size_t local_ZSTD_decompress(const void* src, size_t srcSize,
static ZSTD_DCtx* g_zdc = NULL;

#ifndef ZSTD_DLL_IMPORT
typedef enum {
not_streaming = 0,
is_streaming = 1
} streaming_operation;
extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize, void* dst, size_t dstCapacity, const streaming_operation streaming);

extern size_t ZSTD_decodeLiteralsBlock_wrapper(ZSTD_DCtx* dctx,
const void* src, size_t srcSize,
void* dst, size_t dstCapacity);
static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
{
(void)src; (void)srcSize; (void)dst; (void)dstSize;
return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize, dst, dstSize, not_streaming);
return ZSTD_decodeLiteralsBlock_wrapper(g_zdc, buff2, g_cSize, dst, dstSize);
}

static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
@@ -606,7 +605,7 @@ static int benchMem(unsigned benchNb,
ip += ZSTD_blockHeaderSize; /* skip block header */
ZSTD_decompressBegin(g_zdc);
CONTROL(iend > ip);
ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip), dstBuff, dstBuffSize, not_streaming); /* skip literal segment */
ip += ZSTD_decodeLiteralsBlock_wrapper(g_zdc, ip, (size_t)(iend-ip), dstBuff, dstBuffSize); /* skip literal segment */
g_cSize = (size_t)(iend-ip);
memcpy(dstBuff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */
srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */