Skip to content

Commit 0d78ef0

Browse files
authored
s2: Avoid Decode size checks (#328)
Add another decode loop that doesn't check sizes as long as src is big enough. For now only in Go version. Streams: ``` Before: Decompressing. 426243297 -> 1000000000 [234.61%]; 1.941s, 491.3MB/s After: Decompressing. 426243297 -> 1000000000 [234.61%]; 1.895s, 503.3MB/s ``` Some blocks like it a lot: ``` BenchmarkDecodeS2Block/4-pdf/block-32 379 336 -11.35% BenchmarkDecodeS2Block/4-pdf/block-better-32 691 630 -8.83% BenchmarkDecodeS2Block/4-pdf/block-best-32 816 702 -13.97% BenchmarkDecodeS2Block/5-html4/block-32 13645 8727 -36.04% BenchmarkDecodeS2Block/5-html4/block-better-32 14059 9204 -34.53% BenchmarkDecodeS2Block/5-html4/block-best-32 13729 8975 -34.63% ```
1 parent a576225 commit 0d78ef0

File tree

1 file changed

+105
-0
lines changed

1 file changed

+105
-0
lines changed

s2/decode_other.go

+105
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,110 @@ func s2Decode(dst, src []byte) int {
2121
}
2222
var d, s, length int
2323
offset := 0
24+
25+
// As long as we can read at least 5 bytes...
26+
for s < len(src)-5 {
27+
switch src[s] & 0x03 {
28+
case tagLiteral:
29+
x := uint32(src[s] >> 2)
30+
switch {
31+
case x < 60:
32+
s++
33+
case x == 60:
34+
s += 2
35+
x = uint32(src[s-1])
36+
case x == 61:
37+
s += 3
38+
x = uint32(src[s-2]) | uint32(src[s-1])<<8
39+
case x == 62:
40+
s += 4
41+
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
42+
case x == 63:
43+
s += 5
44+
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
45+
}
46+
length = int(x) + 1
47+
if length > len(dst)-d || length > len(src)-s {
48+
return decodeErrCodeCorrupt
49+
}
50+
if debug {
51+
fmt.Println("literals, length:", length, "d-after:", d+length)
52+
}
53+
54+
copy(dst[d:], src[s:s+length])
55+
d += length
56+
s += length
57+
continue
58+
59+
case tagCopy1:
60+
s += 2
61+
length = int(src[s-2]) >> 2 & 0x7
62+
toffset := int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
63+
if toffset == 0 {
64+
if debug {
65+
fmt.Print("(repeat) ")
66+
}
67+
// keep last offset
68+
switch length {
69+
case 5:
70+
s += 1
71+
length = int(uint32(src[s-1])) + 4
72+
case 6:
73+
s += 2
74+
length = int(uint32(src[s-2])|(uint32(src[s-1])<<8)) + (1 << 8)
75+
case 7:
76+
s += 3
77+
length = int(uint32(src[s-3])|(uint32(src[s-2])<<8)|(uint32(src[s-1])<<16)) + (1 << 16)
78+
default: // 0-> 4
79+
}
80+
} else {
81+
offset = toffset
82+
}
83+
length += 4
84+
case tagCopy2:
85+
s += 3
86+
length = 1 + int(src[s-3])>>2
87+
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
88+
89+
case tagCopy4:
90+
s += 5
91+
length = 1 + int(src[s-5])>>2
92+
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
93+
}
94+
95+
if offset <= 0 || d < offset || length > len(dst)-d {
96+
return decodeErrCodeCorrupt
97+
}
98+
99+
if debug {
100+
fmt.Println("copy, length:", length, "offset:", offset, "d-after:", d+length)
101+
}
102+
103+
// Copy from an earlier sub-slice of dst to a later sub-slice.
104+
// If no overlap, use the built-in copy:
105+
if offset > length {
106+
copy(dst[d:d+length], dst[d-offset:])
107+
d += length
108+
continue
109+
}
110+
111+
// Unlike the built-in copy function, this byte-by-byte copy always runs
112+
// forwards, even if the slices overlap. Conceptually, this is:
113+
//
114+
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
115+
//
116+
// We align the slices into a and b and show the compiler they are the same size.
117+
// This allows the loop to run without bounds checks.
118+
a := dst[d : d+length]
119+
b := dst[d-offset:]
120+
b = b[:len(a)]
121+
for i := range a {
122+
a[i] = b[i]
123+
}
124+
d += length
125+
}
126+
127+
// Remaining with extra checks...
24128
for s < len(src) {
25129
switch src[s] & 0x03 {
26130
case tagLiteral:
@@ -151,6 +255,7 @@ func s2Decode(dst, src []byte) int {
151255
}
152256
d += length
153257
}
258+
154259
if d != len(dst) {
155260
return decodeErrCodeCorrupt
156261
}

0 commit comments

Comments
 (0)