Skip to content

Commit 3dbdfe1

Browse files
authored
Fix BufferBackend soundness issue and add StringInterner::resolve_unchecked (#68)
* fix BufferBackend::resolve unsoundness Unfortunately this fix vastly regresses the performance of the method. Benchmarks show -73% throughput which is massive ... Still the BufferBackend is a viable choice for memory constrained environments. * add StringInterner::resolve_unchecked method We added this because it make a huge difference for the BufferBackend to have this available.
1 parent 52f1139 commit 3dbdfe1

File tree

4 files changed

+76
-10
lines changed

4 files changed

+76
-10
lines changed

benches/bench.rs

+34-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,11 @@ use criterion::{
2121
};
2222
use string_interner::backend::Backend;
2323

24-
criterion_group!(bench_resolve, bench_resolve_already_filled);
24+
criterion_group!(
25+
bench_resolve,
26+
bench_resolve_already_filled,
27+
bench_resolve_unchecked_already_filled
28+
);
2529
criterion_group!(bench_get, bench_get_already_filled);
2630
criterion_group!(bench_iter, bench_iter_already_filled);
2731
criterion_group!(
@@ -184,6 +188,35 @@ fn bench_resolve_already_filled(c: &mut Criterion) {
184188
bench_for_backend::<BenchBuffer>(&mut g);
185189
}
186190

191+
fn bench_resolve_unchecked_already_filled(c: &mut Criterion) {
192+
let mut g = c.benchmark_group("resolve_unchecked/already-filled");
193+
g.throughput(Throughput::Elements(BENCH_LEN_STRINGS as u64));
194+
fn bench_for_backend<BB: BackendBenchmark>(g: &mut BenchmarkGroup<WallTime>) {
195+
g.bench_with_input(
196+
BB::NAME,
197+
&(BENCH_LEN_STRINGS, BENCH_STRING_LEN),
198+
|bencher, &(len_words, word_len)| {
199+
let words = generate_test_strings(len_words, word_len);
200+
bencher.iter_batched_ref(
201+
|| BB::setup_filled_with_ids(&words),
202+
|(interner, word_ids)| {
203+
for &word_id in &*word_ids {
204+
black_box(
205+
// SAFETY: We provide only valid symbols to the tested interners.
206+
unsafe { interner.resolve_unchecked(word_id) },
207+
);
208+
}
209+
},
210+
BatchSize::SmallInput,
211+
)
212+
},
213+
);
214+
}
215+
bench_for_backend::<BenchBucket>(&mut g);
216+
bench_for_backend::<BenchString>(&mut g);
217+
bench_for_backend::<BenchBuffer>(&mut g);
218+
}
219+
187220
fn bench_get_already_filled(c: &mut Criterion) {
188221
let mut g = c.benchmark_group("get/already-filled");
189222
g.throughput(Throughput::Elements(BENCH_LEN_STRINGS as u64));

src/backend/buffer.rs

+10-8
Original file line numberDiff line numberDiff line change
@@ -88,15 +88,12 @@ where
8888
///
8989
/// Returns the string from the given index if any as well
9090
/// as the index of the next string in the buffer.
91-
fn resolve_index_to_str(&self, index: usize) -> Option<(&str, usize)> {
91+
fn resolve_index_to_str(&self, index: usize) -> Option<(&[u8], usize)> {
9292
let bytes = self.buffer.get(index..)?;
9393
let (str_len, str_len_bytes) = decode_var_usize(bytes)?;
9494
let index_str = index + str_len_bytes;
9595
let str_bytes = self.buffer.get(index_str..index_str + str_len)?;
96-
// SAFETY: It is guaranteed by the backend that only valid strings
97-
// are stored in this portion of the buffer.
98-
let string = unsafe { str::from_utf8_unchecked(str_bytes) };
99-
Some((string, index_str + str_len))
96+
Some((str_bytes, index_str + str_len))
10097
}
10198

10299
/// Resolves the string for the given symbol.
@@ -180,8 +177,10 @@ where
180177

181178
#[inline]
182179
fn resolve(&self, symbol: Self::Symbol) -> Option<&str> {
183-
self.resolve_index_to_str(symbol.to_usize())
184-
.map(|(string, _next_str_index)| string)
180+
match self.resolve_index_to_str(symbol.to_usize()) {
181+
None => None,
182+
Some((bytes, _)) => str::from_utf8(bytes).ok(),
183+
}
185184
}
186185

187186
fn shrink_to_fit(&mut self) {
@@ -481,7 +480,10 @@ where
481480
fn next(&mut self) -> Option<Self::Item> {
482481
self.backend
483482
.resolve_index_to_str(self.next)
484-
.and_then(|(string, next)| {
483+
.and_then(|(bytes, next)| {
484+
// SAFETY: Within the iterator all indices given to `resolv_index_to_str`
485+
// are properly pointing to the start of each interned string.
486+
let string = unsafe { str::from_utf8_unchecked(bytes) };
485487
let symbol = S::try_from_usize(self.next)?;
486488
self.next = next;
487489
self.remaining -= 1;

src/interner.rs

+12-1
Original file line numberDiff line numberDiff line change
@@ -264,12 +264,23 @@ where
264264
self.backend.shrink_to_fit()
265265
}
266266

267-
/// Returns the string for the given symbol if any.
267+
/// Returns the string for the given `symbol`` if any.
268268
#[inline]
269269
pub fn resolve(&self, symbol: <B as Backend>::Symbol) -> Option<&str> {
270270
self.backend.resolve(symbol)
271271
}
272272

273+
/// Returns the string for the given `symbol` without performing any checks.
274+
///
275+
/// # Safety
276+
///
277+
/// It is the caller's responsibility to provide this method with `symbol`s
278+
/// that are valid for the [`StringInterner`].
279+
#[inline]
280+
pub unsafe fn resolve_unchecked(&self, symbol: <B as Backend>::Symbol) -> &str {
281+
unsafe { self.backend.resolve_unchecked(symbol) }
282+
}
283+
273284
/// Returns an iterator that yields all interned strings and their symbols.
274285
#[inline]
275286
pub fn iter(&self) -> <B as Backend>::Iter<'_> {

tests/tests.rs

+20
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,26 @@ macro_rules! gen_tests_for_backend {
298298
assert_eq!(interner.resolve(dd), None);
299299
}
300300

301+
#[test]
302+
fn resolve_unchecked_works() {
303+
let mut interner = StringInterner::new();
304+
// Insert 3 unique strings:
305+
let aa = interner.get_or_intern("aa");
306+
let bb = interner.get_or_intern("bb");
307+
let cc = interner.get_or_intern("cc");
308+
assert_eq!(interner.len(), 3);
309+
// Resolve valid symbols:
310+
assert_eq!(unsafe { interner.resolve_unchecked(aa) }, "aa");
311+
assert_eq!(unsafe { interner.resolve_unchecked(bb) }, "bb");
312+
assert_eq!(unsafe { interner.resolve_unchecked(cc) }, "cc");
313+
assert_eq!(interner.len(), 3);
314+
// Resolve invalid symbols:
315+
let dd = expect_valid_symbol(1000);
316+
assert_ne!(aa, dd);
317+
assert_ne!(bb, dd);
318+
assert_ne!(cc, dd);
319+
}
320+
301321
#[test]
302322
fn get_works() {
303323
let mut interner = StringInterner::new();

0 commit comments

Comments
 (0)