Skip to content

Commit 4b2b2d4

Browse files
authored
Update (google#749)
Update: * Bazel: fix MSVC configuration * C: common: extended documentation and helpers around distance codes * C: common: enable BROTLI_DCHECK in "debug" builds * C: common: fix implicit trailing zero in `kPrefixSuffix` * C: dec: fix possible bit reader discharge for "large-window" mode * C: dec: simplify distance decoding via lookup table * C: dec: reuse decoder state members memory via union with lookup table * C: dec: add decoder state diagram * C: enc: clarify access to static dictionary * C: enc: improve static dictionary hash * C: enc: add "stream offset" parameter for parallel encoding * C: enc: reorganize hasher; now Q2-Q3 require exactly 256KiB to avoid global TCMalloc lock * C: enc: fix rare access to uninitialized data in ring-buffer * C: enc: reorganize logging / checks in `write_bits.h` * Java: dec: add "large-window" support * Java: dec: improve speed * Java: dec: debug and 32-bit mode are now activated via system properties * Java: dec: demystify some state variables (use better names) * Dictionary generator: add single input mode * Java: dec: modernize tests * Bazel: js: pick working commit for closure rules
1 parent 9cd01c0 commit 4b2b2d4

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+4874
-2697
lines changed

BUILD

+3-5
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,9 @@ config_setting(
3939
visibility = ["//visibility:public"],
4040
)
4141

42-
config_setting(
43-
name = "msvc",
44-
values = {"compiler": "msvc-cl"},
45-
visibility = ["//visibility:public"],
46-
)
42+
load(":compiler_config_setting.bzl", "create_msvc_config")
43+
44+
create_msvc_config()
4745

4846
STRICT_C_OPTIONS = select({
4947
":msvc": [],

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ The basic commands to build, test and install brotli are:
3232
$ make
3333
$ make test
3434
$ make install
35-
35+
3636
By default, debug binaries are built. To generate "release" `Makefile` specify `--disable-debug` option to `configure-cmake`.
3737

3838
#### Bazel

c/common/constants.h

+120
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,17 @@
44
See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
55
*/
66

7+
/**
8+
* @file
9+
* Common constants used in decoder and encoder API.
10+
*/
11+
712
#ifndef BROTLI_COMMON_CONSTANTS_H_
813
#define BROTLI_COMMON_CONSTANTS_H_
914

15+
#include "./platform.h"
16+
#include <brotli/types.h>
17+
1018
/* Specification: 7.3. Encoding of the context map */
1119
#define BROTLI_CONTEXT_MAP_MAX_RLE 16
1220

@@ -29,12 +37,31 @@
2937
#define BROTLI_INITIAL_REPEATED_CODE_LENGTH 8
3038

3139
/* "Large Window Brotli" */
40+
41+
/**
42+
* The theoretical maximum number of distance bits specified for large window
43+
* brotli, for 64-bit encoders and decoders. Even when in practice 32-bit
44+
* encoders and decoders only support up to 30 max distance bits, the value is
45+
* set to 62 because it affects the large window brotli file format.
46+
* Specifically, it affects the encoding of simple huffman tree for distances,
47+
* see Specification RFC 7932 chapter 3.4.
48+
*/
3249
#define BROTLI_LARGE_MAX_DISTANCE_BITS 62U
3350
#define BROTLI_LARGE_MIN_WBITS 10
51+
/**
52+
* The maximum supported large brotli window bits by the encoder and decoder.
53+
* Large window brotli allows up to 62 bits, however the current encoder and
54+
* decoder, designed for 32-bit integers, only support up to 30 bits maximum.
55+
*/
3456
#define BROTLI_LARGE_MAX_WBITS 30
3557

3658
/* Specification: 4. Encoding of distances */
3759
#define BROTLI_NUM_DISTANCE_SHORT_CODES 16
60+
/**
61+
* Maximal number of "postfix" bits.
62+
*
63+
* Number of "postfix" bits is stored as 2 bits in meta-block header.
64+
*/
3865
#define BROTLI_MAX_NPOSTFIX 3
3966
#define BROTLI_MAX_NDIRECT 120
4067
#define BROTLI_MAX_DISTANCE_BITS 24U
@@ -45,7 +72,16 @@
4572
#define BROTLI_NUM_DISTANCE_SYMBOLS \
4673
BROTLI_DISTANCE_ALPHABET_SIZE( \
4774
BROTLI_MAX_NDIRECT, BROTLI_MAX_NPOSTFIX, BROTLI_LARGE_MAX_DISTANCE_BITS)
75+
76+
/* ((1 << 26) - 4) is the maximal distance that can be expressed in RFC 7932
77+
brotli stream using NPOSTFIX = 0 and NDIRECT = 0. With other NPOSTFIX and
78+
NDIRECT values distances up to ((1 << 29) + 88) could be expressed. */
4879
#define BROTLI_MAX_DISTANCE 0x3FFFFFC
80+
81+
/* ((1 << 31) - 4) is the safe distance limit. Using this number as a limit
82+
allows safe distance calculation without overflows, given the distance
83+
alphabet size is limited to corresponding size
84+
(see kLargeWindowDistanceCodeLimits). */
4985
#define BROTLI_MAX_ALLOWED_DISTANCE 0x7FFFFFFC
5086

5187
/* 7.1. Context modes and context ID lookup for literals */
@@ -61,4 +97,88 @@
6197
#define BROTLI_WINDOW_GAP 16
6298
#define BROTLI_MAX_BACKWARD_LIMIT(W) (((size_t)1 << (W)) - BROTLI_WINDOW_GAP)
6399

100+
typedef struct BrotliDistanceCodeLimit {
101+
uint32_t max_alphabet_size;
102+
uint32_t max_distance;
103+
} BrotliDistanceCodeLimit;
104+
105+
/* This function calculates maximal size of distance alphabet, such that the
106+
distances greater than the given values can not be represented.
107+
108+
This limits are designed to support fast and safe 32-bit decoders.
109+
"32-bit" means that signed integer values up to ((1 << 31) - 1) could be
110+
safely expressed.
111+
112+
Brotli distance alphabet symbols do not represent consecutive distance
113+
ranges. Each distance alphabet symbol (excluding direct distances and short
114+
codes), represent interleaved (for NPOSTFIX > 0) range of distances.
115+
A "group" of consecutive (1 << NPOSTFIX) symbols represent non-interleaved
116+
range. Two consecutive groups require the same amount of "extra bits".
117+
118+
It is important that distance alphabet represents complete "groups".
119+
To avoid complex logic on encoder side about interleaved ranges
120+
it was decided to restrict both sides to complete distance code "groups".
121+
*/
122+
BROTLI_UNUSED_FUNCTION BrotliDistanceCodeLimit BrotliCalculateDistanceCodeLimit(
123+
uint32_t max_distance, uint32_t npostfix, uint32_t ndirect) {
124+
BrotliDistanceCodeLimit result;
125+
/* Marking this function as unused, because not all files
126+
including "constants.h" use it -> compiler warns about that. */
127+
BROTLI_UNUSED(&BrotliCalculateDistanceCodeLimit);
128+
if (max_distance <= ndirect) {
129+
/* This case never happens / exists only for the sake of completeness. */
130+
result.max_alphabet_size = max_distance + BROTLI_NUM_DISTANCE_SHORT_CODES;
131+
result.max_distance = max_distance;
132+
return result;
133+
} else {
134+
/* The first prohibited value. */
135+
uint32_t forbidden_distance = max_distance + 1;
136+
/* Subtract "directly" encoded region. */
137+
uint32_t offset = forbidden_distance - ndirect - 1;
138+
uint32_t ndistbits = 0;
139+
uint32_t tmp;
140+
uint32_t half;
141+
uint32_t group;
142+
/* Postfix for the last dcode in the group. */
143+
uint32_t postfix = (1u << npostfix) - 1;
144+
uint32_t extra;
145+
uint32_t start;
146+
/* Remove postfix and "head-start". */
147+
offset = (offset >> npostfix) + 4;
148+
/* Calculate the number of distance bits. */
149+
tmp = offset / 2;
150+
/* Poor-man's log2floor, to avoid extra dependencies. */
151+
while (tmp != 0) {ndistbits++; tmp = tmp >> 1;}
152+
/* One bit is covered with subrange addressing ("half"). */
153+
ndistbits--;
154+
/* Find subrange. */
155+
half = (offset >> ndistbits) & 1;
156+
/* Calculate the "group" part of dcode. */
157+
group = ((ndistbits - 1) << 1) | half;
158+
/* Calculated "group" covers the prohibited distance value. */
159+
if (group == 0) {
160+
/* This case is added for correctness; does not occur for limit > 128. */
161+
result.max_alphabet_size = ndirect + BROTLI_NUM_DISTANCE_SHORT_CODES;
162+
result.max_distance = ndirect;
163+
return result;
164+
}
165+
/* Decrement "group", so it is the last permitted "group". */
166+
group--;
167+
/* After group was decremented, ndistbits and half must be recalculated. */
168+
ndistbits = (group >> 1) + 1;
169+
/* The last available distance in the subrange has all extra bits set. */
170+
extra = (1u << ndistbits) - 1;
171+
/* Calculate region start. NB: ndistbits >= 1. */
172+
start = (1u << (ndistbits + 1)) - 4;
173+
/* Move to subregion. */
174+
start += (group & 1) << ndistbits;
175+
/* Calculate the alphabet size. */
176+
result.max_alphabet_size = ((group << npostfix) | postfix) + ndirect +
177+
BROTLI_NUM_DISTANCE_SHORT_CODES + 1;
178+
/* Calculate the maximal distance representable by alphabet. */
179+
result.max_distance = ((start + extra) << npostfix) + postfix + ndirect + 1;
180+
return result;
181+
}
182+
}
183+
64184
#endif /* BROTLI_COMMON_CONSTANTS_H_ */

c/common/platform.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -466,20 +466,20 @@ static BROTLI_INLINE void BROTLI_UNALIGNED_STORE64LE(void* p, uint64_t v) {
466466
#endif
467467

468468
#if defined(BROTLI_ENABLE_LOG)
469-
#define BROTLI_DCHECK(x) assert(x)
470469
#define BROTLI_LOG(x) printf x
471470
#else
472-
#define BROTLI_DCHECK(x)
473471
#define BROTLI_LOG(x)
474472
#endif
475473

476474
#if defined(BROTLI_DEBUG) || defined(BROTLI_ENABLE_LOG)
475+
#define BROTLI_DCHECK(x) assert(x)
477476
static BROTLI_INLINE void BrotliDump(const char* f, int l, const char* fn) {
478477
fprintf(stderr, "%s:%d (%s)\n", f, l, fn);
479478
fflush(stderr);
480479
}
481480
#define BROTLI_DUMP() BrotliDump(__FILE__, __LINE__, __FUNCTION__)
482481
#else
482+
#define BROTLI_DCHECK(x)
483483
#define BROTLI_DUMP() (void)(0)
484484
#endif
485485

c/common/transform.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,8 @@ static const char kPrefixSuffix[217] =
2424
/* 8x _0 _ _3 _8 _C _E _ _1 _7 _F */
2525
" not \3er \3al \4ful \4ive \5less \4es"
2626
/* Ax _5 _9 _D _2 _7 _D */
27-
"t \4ize \2\xc2\xa0\4ous \5 the \2e \0";
28-
/* Cx _2 _7___ ___ _A _F _5 _8 */
27+
"t \4ize \2\xc2\xa0\4ous \5 the \2e "; /* \0 - implicit trailing zero. */
28+
/* Cx _2 _7___ ___ _A _F _5 _8 */
2929

3030
static const uint16_t kPrefixSuffixMap[50] = {
3131
0x00, 0x02, 0x05, 0x0E, 0x13, 0x16, 0x18, 0x1E, 0x23, 0x25,

c/dec/bit_reader.c

+17
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,23 @@ BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br) {
4343
return BROTLI_TRUE;
4444
}
4545

46+
BROTLI_BOOL BrotliSafeReadBits32Slow(BrotliBitReader* const br,
47+
uint32_t n_bits, uint32_t* val) {
48+
uint32_t low_val;
49+
uint32_t high_val;
50+
BrotliBitReaderState memento;
51+
BROTLI_DCHECK(n_bits <= 32);
52+
BROTLI_DCHECK(n_bits > 24);
53+
BrotliBitReaderSaveState(br, &memento);
54+
if (!BrotliSafeReadBits(br, 16, &low_val) ||
55+
!BrotliSafeReadBits(br, n_bits - 16, &high_val)) {
56+
BrotliBitReaderRestoreState(br, &memento);
57+
return BROTLI_FALSE;
58+
}
59+
*val = low_val | (high_val << 16);
60+
return BROTLI_TRUE;
61+
}
62+
4663
#if defined(__cplusplus) || defined(c_plusplus)
4764
} /* extern "C" */
4865
#endif

c/dec/bit_reader.h

+51-4
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@ BROTLI_INTERNAL void BrotliInitBitReader(BrotliBitReader* const br);
6565
reading. */
6666
BROTLI_INTERNAL BROTLI_BOOL BrotliWarmupBitReader(BrotliBitReader* const br);
6767

68+
/* Fallback for BrotliSafeReadBits32. Extracted as noninlined method to unburden
69+
the main code-path. Never called for RFC brotli streams, required only for
70+
"large-window" mode and other extensions. */
71+
BROTLI_INTERNAL BROTLI_NOINLINE BROTLI_BOOL BrotliSafeReadBits32Slow(
72+
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val);
73+
6874
static BROTLI_INLINE void BrotliBitReaderSaveState(
6975
BrotliBitReader* const from, BrotliBitReaderState* to) {
7076
to->val_ = from->val_;
@@ -237,15 +243,17 @@ static BROTLI_INLINE void BrotliBitReaderUnload(BrotliBitReader* br) {
237243
static BROTLI_INLINE void BrotliTakeBits(
238244
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
239245
*val = (uint32_t)BrotliGetBitsUnmasked(br) & BitMask(n_bits);
240-
BROTLI_LOG(("[BrotliReadBits] %d %d %d val: %6x\n",
246+
BROTLI_LOG(("[BrotliTakeBits] %d %d %d val: %6x\n",
241247
(int)br->avail_in, (int)br->bit_pos_, (int)n_bits, (int)*val));
242248
BrotliDropBits(br, n_bits);
243249
}
244250

245251
/* Reads the specified number of bits from |br| and advances the bit pos.
246-
Assumes that there is enough input to perform BrotliFillBitWindow. */
247-
static BROTLI_INLINE uint32_t BrotliReadBits(
252+
Assumes that there is enough input to perform BrotliFillBitWindow.
253+
Up to 24 bits are allowed to be requested from this method. */
254+
static BROTLI_INLINE uint32_t BrotliReadBits24(
248255
BrotliBitReader* const br, uint32_t n_bits) {
256+
BROTLI_DCHECK(n_bits <= 24);
249257
if (BROTLI_64_BITS || (n_bits <= 16)) {
250258
uint32_t val;
251259
BrotliFillBitWindow(br, n_bits);
@@ -262,10 +270,32 @@ static BROTLI_INLINE uint32_t BrotliReadBits(
262270
}
263271
}
264272

273+
/* Same as BrotliReadBits24, but allows reading up to 32 bits. */
274+
static BROTLI_INLINE uint32_t BrotliReadBits32(
275+
BrotliBitReader* const br, uint32_t n_bits) {
276+
BROTLI_DCHECK(n_bits <= 32);
277+
if (BROTLI_64_BITS || (n_bits <= 16)) {
278+
uint32_t val;
279+
BrotliFillBitWindow(br, n_bits);
280+
BrotliTakeBits(br, n_bits, &val);
281+
return val;
282+
} else {
283+
uint32_t low_val;
284+
uint32_t high_val;
285+
BrotliFillBitWindow(br, 16);
286+
BrotliTakeBits(br, 16, &low_val);
287+
BrotliFillBitWindow(br, 16);
288+
BrotliTakeBits(br, n_bits - 16, &high_val);
289+
return low_val | (high_val << 16);
290+
}
291+
}
292+
265293
/* Tries to read the specified amount of bits. Returns BROTLI_FALSE, if there
266-
is not enough input. |n_bits| MUST be positive. */
294+
is not enough input. |n_bits| MUST be positive.
295+
Up to 24 bits are allowed to be requested from this method. */
267296
static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
268297
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
298+
BROTLI_DCHECK(n_bits <= 24);
269299
while (BrotliGetAvailableBits(br) < n_bits) {
270300
if (!BrotliPullByte(br)) {
271301
return BROTLI_FALSE;
@@ -275,6 +305,23 @@ static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits(
275305
return BROTLI_TRUE;
276306
}
277307

308+
/* Same as BrotliSafeReadBits, but allows reading up to 32 bits. */
309+
static BROTLI_INLINE BROTLI_BOOL BrotliSafeReadBits32(
310+
BrotliBitReader* const br, uint32_t n_bits, uint32_t* val) {
311+
BROTLI_DCHECK(n_bits <= 32);
312+
if (BROTLI_64_BITS || (n_bits <= 24)) {
313+
while (BrotliGetAvailableBits(br) < n_bits) {
314+
if (!BrotliPullByte(br)) {
315+
return BROTLI_FALSE;
316+
}
317+
}
318+
BrotliTakeBits(br, n_bits, val);
319+
return BROTLI_TRUE;
320+
} else {
321+
return BrotliSafeReadBits32Slow(br, n_bits, val);
322+
}
323+
}
324+
278325
/* Advances the bit reader position to the next byte boundary and verifies
279326
that any skipped bits are set to zero. */
280327
static BROTLI_INLINE BROTLI_BOOL BrotliJumpToByteBoundary(BrotliBitReader* br) {

0 commit comments

Comments
 (0)