Skip to content

Commit ddc545b

Browse files
authored
PR #516: Improve CRC32 performance
1 parent 7520a3f commit ddc545b

File tree

4 files changed

+335
-136
lines changed

4 files changed

+335
-136
lines changed
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Runtime.CompilerServices;
23

34
namespace ICSharpCode.SharpZipLib.Checksum
45
{
@@ -25,9 +26,19 @@ namespace ICSharpCode.SharpZipLib.Checksum
2526
/// out is a one). We start with the highest power (least significant bit) of
2627
/// q and repeat for all eight bits of q.
2728
///
28-
/// The table is simply the CRC of all possible eight bit values. This is all
29-
/// the information needed to generate CRC's on data a byte at a time for all
30-
/// combinations of CRC register values and incoming bytes.
29+
/// This implementation uses sixteen lookup tables stored in one linear array
30+
/// to implement the slicing-by-16 algorithm, a variant of the slicing-by-8
31+
/// algorithm described in this Intel white paper:
32+
///
33+
/// https://web.archive.org/web/20120722193753/http://download.intel.com/technology/comms/perfnet/download/slicing-by-8.pdf
34+
///
35+
/// The first lookup table is simply the CRC of all possible eight bit values.
36+
/// Each successive lookup table is derived from the original table generated
37+
/// by Sarwate's algorithm. Slicing a 16-bit input and XORing the outputs
38+
/// together will produce the same output as a byte-by-byte CRC loop with
39+
/// fewer arithmetic and bit manipulation operations, at the cost of increased
40+
/// memory consumed by the lookup tables. (Slicing-by-16 requires a 16KB table,
41+
/// which is still small enough to fit in most processors' L1 cache.)
3142
/// </remarks>
3243
public sealed class BZip2Crc : IChecksum
3344
{
@@ -36,72 +47,7 @@ public sealed class BZip2Crc : IChecksum
3647
private const uint crcInit = 0xFFFFFFFF;
3748
//const uint crcXor = 0x00000000;
3849

39-
private static readonly uint[] crcTable = {
40-
0X00000000, 0X04C11DB7, 0X09823B6E, 0X0D4326D9,
41-
0X130476DC, 0X17C56B6B, 0X1A864DB2, 0X1E475005,
42-
0X2608EDB8, 0X22C9F00F, 0X2F8AD6D6, 0X2B4BCB61,
43-
0X350C9B64, 0X31CD86D3, 0X3C8EA00A, 0X384FBDBD,
44-
0X4C11DB70, 0X48D0C6C7, 0X4593E01E, 0X4152FDA9,
45-
0X5F15ADAC, 0X5BD4B01B, 0X569796C2, 0X52568B75,
46-
0X6A1936C8, 0X6ED82B7F, 0X639B0DA6, 0X675A1011,
47-
0X791D4014, 0X7DDC5DA3, 0X709F7B7A, 0X745E66CD,
48-
0X9823B6E0, 0X9CE2AB57, 0X91A18D8E, 0X95609039,
49-
0X8B27C03C, 0X8FE6DD8B, 0X82A5FB52, 0X8664E6E5,
50-
0XBE2B5B58, 0XBAEA46EF, 0XB7A96036, 0XB3687D81,
51-
0XAD2F2D84, 0XA9EE3033, 0XA4AD16EA, 0XA06C0B5D,
52-
0XD4326D90, 0XD0F37027, 0XDDB056FE, 0XD9714B49,
53-
0XC7361B4C, 0XC3F706FB, 0XCEB42022, 0XCA753D95,
54-
0XF23A8028, 0XF6FB9D9F, 0XFBB8BB46, 0XFF79A6F1,
55-
0XE13EF6F4, 0XE5FFEB43, 0XE8BCCD9A, 0XEC7DD02D,
56-
0X34867077, 0X30476DC0, 0X3D044B19, 0X39C556AE,
57-
0X278206AB, 0X23431B1C, 0X2E003DC5, 0X2AC12072,
58-
0X128E9DCF, 0X164F8078, 0X1B0CA6A1, 0X1FCDBB16,
59-
0X018AEB13, 0X054BF6A4, 0X0808D07D, 0X0CC9CDCA,
60-
0X7897AB07, 0X7C56B6B0, 0X71159069, 0X75D48DDE,
61-
0X6B93DDDB, 0X6F52C06C, 0X6211E6B5, 0X66D0FB02,
62-
0X5E9F46BF, 0X5A5E5B08, 0X571D7DD1, 0X53DC6066,
63-
0X4D9B3063, 0X495A2DD4, 0X44190B0D, 0X40D816BA,
64-
0XACA5C697, 0XA864DB20, 0XA527FDF9, 0XA1E6E04E,
65-
0XBFA1B04B, 0XBB60ADFC, 0XB6238B25, 0XB2E29692,
66-
0X8AAD2B2F, 0X8E6C3698, 0X832F1041, 0X87EE0DF6,
67-
0X99A95DF3, 0X9D684044, 0X902B669D, 0X94EA7B2A,
68-
0XE0B41DE7, 0XE4750050, 0XE9362689, 0XEDF73B3E,
69-
0XF3B06B3B, 0XF771768C, 0XFA325055, 0XFEF34DE2,
70-
0XC6BCF05F, 0XC27DEDE8, 0XCF3ECB31, 0XCBFFD686,
71-
0XD5B88683, 0XD1799B34, 0XDC3ABDED, 0XD8FBA05A,
72-
0X690CE0EE, 0X6DCDFD59, 0X608EDB80, 0X644FC637,
73-
0X7A089632, 0X7EC98B85, 0X738AAD5C, 0X774BB0EB,
74-
0X4F040D56, 0X4BC510E1, 0X46863638, 0X42472B8F,
75-
0X5C007B8A, 0X58C1663D, 0X558240E4, 0X51435D53,
76-
0X251D3B9E, 0X21DC2629, 0X2C9F00F0, 0X285E1D47,
77-
0X36194D42, 0X32D850F5, 0X3F9B762C, 0X3B5A6B9B,
78-
0X0315D626, 0X07D4CB91, 0X0A97ED48, 0X0E56F0FF,
79-
0X1011A0FA, 0X14D0BD4D, 0X19939B94, 0X1D528623,
80-
0XF12F560E, 0XF5EE4BB9, 0XF8AD6D60, 0XFC6C70D7,
81-
0XE22B20D2, 0XE6EA3D65, 0XEBA91BBC, 0XEF68060B,
82-
0XD727BBB6, 0XD3E6A601, 0XDEA580D8, 0XDA649D6F,
83-
0XC423CD6A, 0XC0E2D0DD, 0XCDA1F604, 0XC960EBB3,
84-
0XBD3E8D7E, 0XB9FF90C9, 0XB4BCB610, 0XB07DABA7,
85-
0XAE3AFBA2, 0XAAFBE615, 0XA7B8C0CC, 0XA379DD7B,
86-
0X9B3660C6, 0X9FF77D71, 0X92B45BA8, 0X9675461F,
87-
0X8832161A, 0X8CF30BAD, 0X81B02D74, 0X857130C3,
88-
0X5D8A9099, 0X594B8D2E, 0X5408ABF7, 0X50C9B640,
89-
0X4E8EE645, 0X4A4FFBF2, 0X470CDD2B, 0X43CDC09C,
90-
0X7B827D21, 0X7F436096, 0X7200464F, 0X76C15BF8,
91-
0X68860BFD, 0X6C47164A, 0X61043093, 0X65C52D24,
92-
0X119B4BE9, 0X155A565E, 0X18197087, 0X1CD86D30,
93-
0X029F3D35, 0X065E2082, 0X0B1D065B, 0X0FDC1BEC,
94-
0X3793A651, 0X3352BBE6, 0X3E119D3F, 0X3AD08088,
95-
0X2497D08D, 0X2056CD3A, 0X2D15EBE3, 0X29D4F654,
96-
0XC5A92679, 0XC1683BCE, 0XCC2B1D17, 0XC8EA00A0,
97-
0XD6AD50A5, 0XD26C4D12, 0XDF2F6BCB, 0XDBEE767C,
98-
0XE3A1CBC1, 0XE760D676, 0XEA23F0AF, 0XEEE2ED18,
99-
0XF0A5BD1D, 0XF464A0AA, 0XF9278673, 0XFDE69BC4,
100-
0X89B8FD09, 0X8D79E0BE, 0X803AC667, 0X84FBDBD0,
101-
0X9ABC8BD5, 0X9E7D9662, 0X933EB0BB, 0X97FFAD0C,
102-
0XAFB010B1, 0XAB710D06, 0XA6322BDF, 0XA2F33668,
103-
0XBCB4666D, 0XB8757BDA, 0XB5365D03, 0XB1F740B4
104-
};
50+
private static readonly uint[] crcTable = CrcUtilities.GenerateSlicingLookupTable(0x04C11DB7, isReversed: false);
10551

10652
/// <summary>
10753
/// The CRC data checksum so far.
@@ -149,6 +95,7 @@ public long Value
14995
/// the byte is taken as the lower 8 bits of bval
15096
/// </param>
15197
/// <remarks>Reversed Data = false</remarks>
98+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
15299
public void Update(int bval)
153100
{
154101
checkValue = unchecked(crcTable[(byte)(((checkValue >> 24) & 0xFF) ^ bval)] ^ (checkValue << 8));
@@ -166,7 +113,7 @@ public void Update(byte[] buffer)
166113
throw new ArgumentNullException(nameof(buffer));
167114
}
168115

169-
Update(new ArraySegment<byte>(buffer, 0, buffer.Length));
116+
Update(buffer, 0, buffer.Length);
170117
}
171118

172119
/// <summary>
@@ -177,11 +124,48 @@ public void Update(byte[] buffer)
177124
/// </param>
178125
public void Update(ArraySegment<byte> segment)
179126
{
180-
var count = segment.Count;
181-
var offset = segment.Offset;
127+
Update(segment.Array, segment.Offset, segment.Count);
128+
}
129+
130+
/// <summary>
131+
/// Internal helper function for updating a block of data using slicing.
132+
/// </summary>
133+
/// <param name="data">The array containing the data to add</param>
134+
/// <param name="offset">Range start for <paramref name="data"/> (inclusive)</param>
135+
/// <param name="count">The number of bytes to checksum starting from <paramref name="offset"/></param>
136+
private void Update(byte[] data, int offset, int count)
137+
{
138+
int remainder = count % CrcUtilities.SlicingDegree;
139+
int end = offset + count - remainder;
140+
141+
while (offset != end)
142+
{
143+
checkValue = CrcUtilities.UpdateDataForNormalPoly(data, offset, crcTable, checkValue);
144+
offset += CrcUtilities.SlicingDegree;
145+
}
182146

183-
while (--count >= 0)
184-
Update(segment.Array[offset++]);
147+
if (remainder != 0)
148+
{
149+
SlowUpdateLoop(data, offset, end + remainder);
150+
}
151+
}
152+
153+
/// <summary>
154+
/// A non-inlined function for updating data that doesn't fit in a 16-byte
155+
/// block. We don't expect to enter this function most of the time, and when
156+
/// we do we're not here for long, so disabling inlining here improves
157+
/// performance overall.
158+
/// </summary>
159+
/// <param name="data">The array containing the data to add</param>
160+
/// <param name="offset">Range start for <paramref name="data"/> (inclusive)</param>
161+
/// <param name="end">Range end for <paramref name="data"/> (exclusive)</param>
162+
[MethodImpl(MethodImplOptions.NoInlining)]
163+
private void SlowUpdateLoop(byte[] data, int offset, int end)
164+
{
165+
while (offset != end)
166+
{
167+
Update(data[offset++]);
168+
}
185169
}
186170
}
187171
}

src/ICSharpCode.SharpZipLib/Checksum/Crc32.cs

+59-62
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Runtime.CompilerServices;
23

34
namespace ICSharpCode.SharpZipLib.Checksum
45
{
@@ -25,9 +26,19 @@ namespace ICSharpCode.SharpZipLib.Checksum
2526
/// out is a one). We start with the highest power (least significant bit) of
2627
/// q and repeat for all eight bits of q.
2728
///
28-
/// The table is simply the CRC of all possible eight bit values. This is all
29-
/// the information needed to generate CRC's on data a byte at a time for all
30-
/// combinations of CRC register values and incoming bytes.
29+
/// This implementation uses sixteen lookup tables stored in one linear array
30+
/// to implement the slicing-by-16 algorithm, a variant of the slicing-by-8
31+
/// algorithm described in this Intel white paper:
32+
///
33+
/// https://web.archive.org/web/20120722193753/http://download.intel.com/technology/comms/perfnet/download/slicing-by-8.pdf
34+
///
35+
/// The first lookup table is simply the CRC of all possible eight bit values.
36+
/// Each successive lookup table is derived from the original table generated
37+
/// by Sarwate's algorithm. Slicing a 16-bit input and XORing the outputs
38+
/// together will produce the same output as a byte-by-byte CRC loop with
39+
/// fewer arithmetic and bit manipulation operations, at the cost of increased
40+
/// memory consumed by the lookup tables. (Slicing-by-16 requires a 16KB table,
41+
/// which is still small enough to fit in most processors' L1 cache.)
3142
/// </remarks>
3243
public sealed class Crc32 : IChecksum
3344
{
@@ -36,60 +47,7 @@ public sealed class Crc32 : IChecksum
3647
private static readonly uint crcInit = 0xFFFFFFFF;
3748
private static readonly uint crcXor = 0xFFFFFFFF;
3849

39-
private static readonly uint[] crcTable = {
40-
0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419,
41-
0x706AF48F, 0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4,
42-
0xE0D5E91E, 0x97D2D988, 0x09B64C2B, 0x7EB17CBD, 0xE7B82D07,
43-
0x90BF1D91, 0x1DB71064, 0x6AB020F2, 0xF3B97148, 0x84BE41DE,
44-
0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7, 0x136C9856,
45-
0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9,
46-
0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4,
47-
0xA2677172, 0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B,
48-
0x35B5A8FA, 0x42B2986C, 0xDBBBC9D6, 0xACBCF940, 0x32D86CE3,
49-
0x45DF5C75, 0xDCD60DCF, 0xABD13D59, 0x26D930AC, 0x51DE003A,
50-
0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423, 0xCFBA9599,
51-
0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
52-
0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190,
53-
0x01DB7106, 0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F,
54-
0x9FBFE4A5, 0xE8B8D433, 0x7807C9A2, 0x0F00F934, 0x9609A88E,
55-
0xE10E9818, 0x7F6A0DBB, 0x086D3D2D, 0x91646C97, 0xE6635C01,
56-
0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E, 0x6C0695ED,
57-
0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950,
58-
0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3,
59-
0xFBD44C65, 0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2,
60-
0x4ADFA541, 0x3DD895D7, 0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A,
61-
0x346ED9FC, 0xAD678846, 0xDA60B8D0, 0x44042D73, 0x33031DE5,
62-
0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA, 0xBE0B1010,
63-
0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
64-
0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17,
65-
0x2EB40D81, 0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6,
66-
0x03B6E20C, 0x74B1D29A, 0xEAD54739, 0x9DD277AF, 0x04DB2615,
67-
0x73DC1683, 0xE3630B12, 0x94643B84, 0x0D6D6A3E, 0x7A6A5AA8,
68-
0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1, 0xF00F9344,
69-
0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB,
70-
0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A,
71-
0x67DD4ACC, 0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5,
72-
0xD6D6A3E8, 0xA1D1937E, 0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1,
73-
0xA6BC5767, 0x3FB506DD, 0x48B2364B, 0xD80D2BDA, 0xAF0A1B4C,
74-
0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55, 0x316E8EEF,
75-
0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
76-
0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE,
77-
0xB2BD0B28, 0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31,
78-
0x2CD99E8B, 0x5BDEAE1D, 0x9B64C2B0, 0xEC63F226, 0x756AA39C,
79-
0x026D930A, 0x9C0906A9, 0xEB0E363F, 0x72076785, 0x05005713,
80-
0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38, 0x92D28E9B,
81-
0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
82-
0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1,
83-
0x18B74777, 0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C,
84-
0x8F659EFF, 0xF862AE69, 0x616BFFD3, 0x166CCF45, 0xA00AE278,
85-
0xD70DD2EE, 0x4E048354, 0x3903B3C2, 0xA7672661, 0xD06016F7,
86-
0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC, 0x40DF0B66,
87-
0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
88-
0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605,
89-
0xCDD70693, 0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8,
90-
0x5D681B02, 0x2A6F2B94, 0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B,
91-
0x2D02EF8D
92-
};
50+
private static readonly uint[] crcTable = CrcUtilities.GenerateSlicingLookupTable(0xEDB88320, isReversed: true);
9351

9452
/// <summary>
9553
/// The CRC data checksum so far.
@@ -98,6 +56,7 @@ public sealed class Crc32 : IChecksum
9856

9957
#endregion Instance Fields
10058

59+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
10160
internal static uint ComputeCrc32(uint oldCrc, byte bval)
10261
{
10362
return (uint)(Crc32.crcTable[(oldCrc ^ bval) & 0xFF] ^ (oldCrc >> 8));
@@ -138,6 +97,7 @@ public long Value
13897
/// the byte is taken as the lower 8 bits of bval
13998
/// </param>
14099
/// <remarks>Reversed Data = true</remarks>
100+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
141101
public void Update(int bval)
142102
{
143103
checkValue = unchecked(crcTable[(checkValue ^ bval) & 0xFF] ^ (checkValue >> 8));
@@ -155,7 +115,7 @@ public void Update(byte[] buffer)
155115
throw new ArgumentNullException(nameof(buffer));
156116
}
157117

158-
Update(new ArraySegment<byte>(buffer, 0, buffer.Length));
118+
Update(buffer, 0, buffer.Length);
159119
}
160120

161121
/// <summary>
@@ -166,11 +126,48 @@ public void Update(byte[] buffer)
166126
/// </param>
167127
public void Update(ArraySegment<byte> segment)
168128
{
169-
var count = segment.Count;
170-
var offset = segment.Offset;
129+
Update(segment.Array, segment.Offset, segment.Count);
130+
}
131+
132+
/// <summary>
133+
/// Internal helper function for updating a block of data using slicing.
134+
/// </summary>
135+
/// <param name="data">The array containing the data to add</param>
136+
/// <param name="offset">Range start for <paramref name="data"/> (inclusive)</param>
137+
/// <param name="count">The number of bytes to checksum starting from <paramref name="offset"/></param>
138+
private void Update(byte[] data, int offset, int count)
139+
{
140+
int remainder = count % CrcUtilities.SlicingDegree;
141+
int end = offset + count - remainder;
142+
143+
while (offset != end)
144+
{
145+
checkValue = CrcUtilities.UpdateDataForReversedPoly(data, offset, crcTable, checkValue);
146+
offset += CrcUtilities.SlicingDegree;
147+
}
171148

172-
while (--count >= 0)
173-
Update(segment.Array[offset++]);
149+
if (remainder != 0)
150+
{
151+
SlowUpdateLoop(data, offset, end + remainder);
152+
}
153+
}
154+
155+
/// <summary>
156+
/// A non-inlined function for updating data that doesn't fit in a 16-byte
157+
/// block. We don't expect to enter this function most of the time, and when
158+
/// we do we're not here for long, so disabling inlining here improves
159+
/// performance overall.
160+
/// </summary>
161+
/// <param name="data">The array containing the data to add</param>
162+
/// <param name="offset">Range start for <paramref name="data"/> (inclusive)</param>
163+
/// <param name="end">Range end for <paramref name="data"/> (exclusive)</param>
164+
[MethodImpl(MethodImplOptions.NoInlining)]
165+
private void SlowUpdateLoop(byte[] data, int offset, int end)
166+
{
167+
while (offset != end)
168+
{
169+
Update(data[offset++]);
170+
}
174171
}
175172
}
176173
}

0 commit comments

Comments
 (0)