Skip to content

Commit 4b63e08

Browse files
committed
Optimize LEB128 data reading
As it turns out, the Rust compiler uses variable length LEB128 encoded integers internally. It so happens that they spent a fair amount of effort micro-optimizing the decoding functionality [0] [1], as it's in the hot path. With this change we replace our decoding routines with these optimized ones. To make that happen more easily (and to gain some base line speed up), also remove the "shift" return from the respective methods. As a result of these changes, we see a respectable speed up: Before: test util::tests::bench_u64_leb128_reading ... bench: 128 ns/iter (+/- 10) After: test util::tests::bench_u64_leb128_reading ... bench: 103 ns/iter (+/- 5) Gsym decoding, which uses these routines, improved as follows: main/symbolize_gsym_multi_no_setup time: [146.26 µs 146.69 µs 147.18 µs] change: [−7.2075% −5.7106% −4.4870%] (p = 0.00 < 0.02) Performance has improved. [0] rust-lang/rust#69050 [1] rust-lang/rust#69157 Signed-off-by: Daniel Müller <deso@posteo.net>
1 parent 228ead0 commit 4b63e08

File tree

3 files changed

+69
-46
lines changed

3 files changed

+69
-46
lines changed

src/gsym/inline.rs

+5-10
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ impl InlineInfo {
2525
) -> Result<Option<InlineInfo>> {
2626
let range_cnt = data
2727
.read_u64_leb128()
28-
.ok_or_invalid_data(|| "failed to read range count from inline information")?
29-
.0;
28+
.ok_or_invalid_data(|| "failed to read range count from inline information")?;
3029
let range_cnt = usize::try_from(range_cnt)
3130
.ok()
3231
.ok_or_invalid_data(|| "range count ({}) is too big")?;
@@ -41,12 +40,10 @@ impl InlineInfo {
4140
for i in 0..range_cnt {
4241
let offset = data
4342
.read_u64_leb128()
44-
.ok_or_invalid_data(|| "failed to read offset from inline information")?
45-
.0;
43+
.ok_or_invalid_data(|| "failed to read offset from inline information")?;
4644
let size = data
4745
.read_u64_leb128()
48-
.ok_or_invalid_data(|| "failed to read size from inline information")?
49-
.0;
46+
.ok_or_invalid_data(|| "failed to read size from inline information")?;
5047

5148
let start = base_addr
5249
.checked_add(offset)
@@ -91,15 +88,13 @@ impl InlineInfo {
9188
let (call_file, call_line) = if lookup_addr.is_some() {
9289
let call_file = data
9390
.read_u64_leb128()
94-
.ok_or_invalid_data(|| "failed to read call file from inline information")?
95-
.0;
91+
.ok_or_invalid_data(|| "failed to read call file from inline information")?;
9692
let call_file = u32::try_from(call_file)
9793
.ok()
9894
.ok_or_invalid_data(|| "call file index ({}) is too big")?;
9995
let call_line = data
10096
.read_u64_leb128()
101-
.ok_or_invalid_data(|| "failed to read call line from inline information")?
102-
.0;
97+
.ok_or_invalid_data(|| "failed to read call line from inline information")?;
10398
let call_line = u32::try_from(call_line).unwrap_or(u32::MAX);
10499
(Some(call_file), Some(call_line))
105100
} else {

src/gsym/linetab.rs

+6-6
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ impl LineTableHeader {
4848
///
4949
/// * `data` - is what [`AddrData::data`] is.
5050
pub(super) fn parse(data: &mut &[u8]) -> Option<Self> {
51-
let (min_delta, _bytes) = data.read_i64_leb128()?;
52-
let (max_delta, _bytes) = data.read_i64_leb128()?;
53-
let (first_line, _bytes) = data.read_u64_leb128()?;
51+
let min_delta = data.read_i64_leb128()?;
52+
let max_delta = data.read_i64_leb128()?;
53+
let first_line = data.read_u64_leb128()?;
5454

5555
let header = Self {
5656
min_delta,
@@ -108,17 +108,17 @@ pub(crate) fn run_op(
108108
match op {
109109
END_SEQUENCE => Some(RunResult::End),
110110
SET_FILE => {
111-
let (f, _bytes) = ops.read_u64_leb128()?;
111+
let f = ops.read_u64_leb128()?;
112112
row.file_idx = f as u32;
113113
Some(RunResult::Ok)
114114
}
115115
ADVANCE_PC => {
116-
let (adv, _bytes) = ops.read_u64_leb128()?;
116+
let adv = ops.read_u64_leb128()?;
117117
row.addr += adv as Addr;
118118
Some(RunResult::NewRow)
119119
}
120120
ADVANCE_LINE => {
121-
let (adv, _bytes) = ops.read_i64_leb128()?;
121+
let adv = ops.read_i64_leb128()?;
122122
row.file_line = (row.file_line as i64 + adv) as u32;
123123
Some(RunResult::Ok)
124124
}

src/util.rs

+58-30
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,8 @@ pub(crate) trait ReadRaw<'data> {
387387
/// Consume and return `len` bytes.
388388
fn read_slice(&mut self, len: usize) -> Option<&'data [u8]>;
389389

390+
fn read_array<const N: usize>(&mut self) -> Option<[u8; N]>;
391+
390392
/// Read a NUL terminated string.
391393
fn read_cstr(&mut self) -> Option<&'data CStr>;
392394

@@ -470,36 +472,55 @@ pub(crate) trait ReadRaw<'data> {
470472

471473
/// Read a `u64` encoded as unsigned variable length little endian base 128
472474
/// value.
473-
///
474-
/// The function returns the value read along with the number of bytes
475-
/// consumed.
476-
fn read_u64_leb128(&mut self) -> Option<(u64, u8)> {
477-
let mut shift = 0;
478-
let mut value = 0u64;
479-
while let Some(bytes) = self.read_slice(1) {
480-
if let [byte] = bytes {
481-
value |= ((byte & 0b0111_1111) as u64) << shift;
482-
shift += 7;
483-
if (byte & 0b1000_0000) == 0 {
484-
return Some((value, shift / 7))
485-
}
475+
//
476+
// Slightly adjusted copy of `rustc` implementation:
477+
// https://github.com/rust-lang/rust/blob/7ebd2bdbf6d798e6e711a0100981b0ff029abf5f/compiler/rustc_serialize/src/leb128.rs#L54
478+
fn read_u64_leb128(&mut self) -> Option<u64> {
479+
// The first iteration of this loop is unpeeled. This is a
480+
// performance win because this code is hot and integer values less
481+
// than 128 are very common, typically occurring 50-80% or more of
482+
// the time, even for u64 and u128.
483+
let [byte] = self.read_array::<1>()?;
484+
if (byte & 0x80) == 0 {
485+
return Some(byte as u64);
486+
}
487+
let mut result = (byte & 0x7F) as u64;
488+
let mut shift = 7;
489+
loop {
490+
let [byte] = self.read_array::<1>()?;
491+
if (byte & 0x80) == 0 {
492+
result |= (byte as u64) << shift;
493+
return Some(result);
486494
} else {
487-
unreachable!()
495+
result |= ((byte & 0x7F) as u64) << shift;
488496
}
497+
shift += 7;
489498
}
490-
None
491499
}
492500

493501
/// Read a `u64` encoded as signed variable length little endian base 128
494502
/// value.
495-
///
496-
/// The function returns the value read along with the number of bytes
497-
/// consumed.
498-
fn read_i64_leb128(&mut self) -> Option<(i64, u8)> {
499-
let (value, shift) = self.read_u64_leb128()?;
500-
let sign_bits = u64::BITS as u8 - shift * 7;
501-
let value = ((value as i64) << sign_bits) >> sign_bits;
502-
Some((value, shift))
503+
fn read_i64_leb128(&mut self) -> Option<i64> {
504+
let mut result = 0;
505+
let mut shift = 0;
506+
let mut byte;
507+
508+
loop {
509+
[byte] = self.read_array::<1>()?;
510+
result |= <i64>::from(byte & 0x7F) << shift;
511+
shift += 7;
512+
513+
if (byte & 0x80) == 0 {
514+
break;
515+
}
516+
}
517+
518+
if (shift < <i64>::BITS) && ((byte & 0x40) != 0) {
519+
// sign extend
520+
result |= !0 << shift;
521+
}
522+
523+
Some(result)
503524
}
504525
}
505526

@@ -527,6 +548,16 @@ impl<'data> ReadRaw<'data> for &'data [u8] {
527548
Some(a)
528549
}
529550

551+
#[inline]
552+
fn read_array<const N: usize>(&mut self) -> Option<[u8; N]> {
553+
self.ensure(N)?;
554+
let (a, b) = self.split_at(N);
555+
*self = b;
556+
// SAFETY: We *know* that `a` has length `N`.
557+
let array = unsafe { <[u8; N]>::try_from(a).unwrap_unchecked() };
558+
Some(array)
559+
}
560+
530561
#[inline]
531562
fn read_cstr(&mut self) -> Option<&'data CStr> {
532563
let idx = self.iter().position(|byte| *byte == b'\0')?;
@@ -815,13 +846,11 @@ mod tests {
815846
#[test]
816847
fn leb128_reading() {
817848
let data = [0xf4, 0xf3, 0x75];
818-
let (v, s) = data.as_slice().read_u64_leb128().unwrap();
849+
let v = data.as_slice().read_u64_leb128().unwrap();
819850
assert_eq!(v, 0x1d79f4);
820-
assert_eq!(s, 3);
821851

822-
let (v, s) = data.as_slice().read_i64_leb128().unwrap();
852+
let v = data.as_slice().read_i64_leb128().unwrap();
823853
assert_eq!(v, -165388);
824-
assert_eq!(s, 3);
825854
}
826855

827856
/// Check that we can read a NUL terminated string from a slice.
@@ -941,16 +970,15 @@ mod tests {
941970
];
942971

943972
for (data, expected) in data {
944-
let (v, _s) = data.as_slice().read_u64_leb128().unwrap();
973+
let v = data.as_slice().read_u64_leb128().unwrap();
945974
assert_eq!(v, expected);
946975
}
947976

948977
let () = b.iter(|| {
949978
for (data, _) in data {
950979
let mut slice = black_box(data.as_slice());
951-
let (v, s) = slice.read_u64_leb128().unwrap();
980+
let v = slice.read_u64_leb128().unwrap();
952981
black_box(v);
953-
black_box(s);
954982
}
955983
});
956984
}

0 commit comments

Comments
 (0)