Skip to content

Commit

Permalink
Lazy init of anyfuncs: use bitmap rather than zero-func-ptr as sentin…
Browse files Browse the repository at this point in the history
…el to avoid need to zero lots of memory at instantiation
  • Loading branch information
cfallin committed Jan 25, 2022
1 parent 07e8bdd commit 6bb241e
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 50 deletions.
6 changes: 6 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -129,3 +129,9 @@ harness = false
[[bench]]
name = "thread_eager_init"
harness = false

[profile.bench]
debug = true

[profile.release]
debug = true
20 changes: 15 additions & 5 deletions benches/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,22 @@ pub fn build_wasi_example() {
}

pub fn strategies() -> Vec<InstanceAllocationStrategy> {
let mut module_limits = ModuleLimits::default();
module_limits.functions = 50000;
module_limits.globals = 10000;
module_limits.types = 50000;
module_limits.memory_pages = 2048;

let pooling = InstanceAllocationStrategy::Pooling {
strategy: PoolingAllocationStrategy::Random,
module_limits,
instance_limits: InstanceLimits::default(),
};
vec![
// Skip the on-demand allocator when uffd is enabled
#[cfg(any(not(feature = "uffd"), not(target_os = "linux")))]
InstanceAllocationStrategy::OnDemand,
InstanceAllocationStrategy::pooling(),
// #[cfg(any(not(feature = "uffd"), not(target_os = "linux")))]
// InstanceAllocationStrategy::OnDemand,
pooling,
]
}

Expand All @@ -67,8 +78,7 @@ pub fn load_module(engine: &Engine, module_name: &str) -> Result<(Module, Linker
path.push("instantiation");
path.push(module_name);

let module = Module::from_file(&engine, &path)
.unwrap_or_else(|_| panic!("failed to load benchmark `{}`", path.display()));
let module = Module::from_file(&engine, &path).unwrap();
let mut linker = Linker::new(&engine);
wasmtime_wasi::add_to_linker(&mut linker, |cx| cx).unwrap();

Expand Down
72 changes: 34 additions & 38 deletions benches/server.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use criterion::{criterion_group, criterion_main, Criterion};
use std::{sync::Arc, time::Instant};
use std::sync::Arc;
use wasmtime::*;
use wasmtime_wasi::{sync::WasiCtxBuilder, WasiCtx};

Expand All @@ -22,19 +22,19 @@ impl Server {
let wasi = WasiCtxBuilder::new().build();
let mut store = Store::new(&self.engine, wasi);
let instance = ipre.instantiate_async(&mut store).await.unwrap();
let start_func = instance.get_func(&mut store, "_start").unwrap();
start_func
.call_async(&mut store, &[], &mut [])
.await
.unwrap();
// let start_func = instance.get_func(&mut store, "_start").unwrap();
// start_func
// .call_async(&mut store, &[], &mut [])
// .await
// .unwrap();
}
}

fn run_server(
c: &mut Criterion,
strategy: &InstanceAllocationStrategy,
filenames: &[&str],
occupancy: usize,
instantiations: usize,
) {
let engine = common::make_engine(strategy, /* async = */ true).unwrap();
let mut instance_pres = vec![];
Expand All @@ -53,45 +53,41 @@ fn run_server(
instance_pres,
});

// Spawn an initial batch of jobs up to the
let server_clone = server.clone();

let rt = tokio::runtime::Runtime::new().unwrap();
rt.block_on(async move {
for i in 0..instantiations {
let server = server_clone.clone();
tokio::spawn(server.job(i));
}
});
c.bench_function(
&format!(
"strategy {}, occupancy {}, benches {:?}",
common::benchmark_name(strategy),
occupancy,
filenames
),
move |b| {
let server_clone = server.clone();
b.iter_custom(move |instantiations| {
let instantiations = 1_000_000;
let server_clone = server_clone.clone();
let rt = tokio::runtime::Runtime::new().unwrap();
let now = std::time::Instant::now();
rt.block_on(async move {
for i in 0..instantiations {
let server = server_clone.clone();
tokio::spawn(server.job(i as usize));
}
});
now.elapsed()
});
},
);
}

fn bench_server(c: &mut Criterion) {
common::build_wasi_example();

let modules = vec!["wasi.wasm"];
// let modules = vec!["wasi.wasm"];
let modules = vec!["spidermonkey.wasm"];
let occupancy = 1000;

for strategy in common::strategies() {
c.bench_function(
&format!(
"strategy {}, occupancy {}, benches {:?}",
common::benchmark_name(&strategy),
occupancy,
modules,
),
|b| {
b.iter_custom(|iters| {
let start = Instant::now();
run_server(
&strategy,
&modules,
occupancy,
/* instantiations = */ iters as usize,
);
start.elapsed()
});
},
);
run_server(c, &strategy, &modules[..], occupancy);
}
}

Expand Down
23 changes: 22 additions & 1 deletion crates/environ/src/vmoffsets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,8 @@ pub struct VMOffsets<P> {
defined_memories: u32,
defined_globals: u32,
defined_anyfuncs: u32,
anyfuncs_init: u32,
anyfuncs_init_u64_words: u32,
builtin_functions: u32,
size: u32,
}
Expand Down Expand Up @@ -187,6 +189,8 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
defined_memories: 0,
defined_globals: 0,
defined_anyfuncs: 0,
anyfuncs_init: 0,
anyfuncs_init_u64_words: 0,
builtin_functions: 0,
size: 0,
};
Expand Down Expand Up @@ -275,7 +279,7 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
.unwrap(),
)
.unwrap();
ret.builtin_functions = ret
ret.anyfuncs_init = ret
.defined_anyfuncs
.checked_add(
ret.num_imported_functions
Expand All @@ -285,6 +289,11 @@ impl<P: PtrSize> From<VMOffsetsFields<P>> for VMOffsets<P> {
.unwrap(),
)
.unwrap();
ret.anyfuncs_init_u64_words = (ret.num_defined_functions + 63) / 64;
ret.builtin_functions = ret
.anyfuncs_init
.checked_add(ret.anyfuncs_init_u64_words.checked_mul(8).unwrap())
.unwrap();
ret.size = ret
.builtin_functions
.checked_add(
Expand Down Expand Up @@ -589,6 +598,18 @@ impl<P: PtrSize> VMOffsets<P> {
self.defined_globals
}

/// The offset of the `anyfuncs_init` bitset.
#[inline]
pub fn vmctx_anyfuncs_init_begin(&self) -> u32 {
self.anyfuncs_init
}

/// The length of the `anyfuncs_init` bitset in bytes.
#[inline]
pub fn vmctx_anyfuncs_init_len(&self) -> u32 {
self.anyfuncs_init_u64_words * 8
}

/// The offset of the `anyfuncs` array.
#[inline]
pub fn vmctx_anyfuncs_begin(&self) -> u32 {
Expand Down
21 changes: 19 additions & 2 deletions crates/runtime/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ use std::convert::TryFrom;
use std::hash::Hash;
use std::ops::Range;
use std::ptr::NonNull;
use std::sync::atomic::AtomicU64;
use std::sync::atomic::{AtomicU64, Ordering};
use std::sync::Arc;
use std::{mem, ptr, slice};
use wasmtime_environ::{
Expand Down Expand Up @@ -462,6 +462,21 @@ impl Instance {
}
}

/// Get a word of the bitmap of anyfunc-initialized bits, and the bitmask for the particular bit.
pub(crate) fn get_anyfunc_bitmap_word(&self, index: FuncIndex) -> (&AtomicU64, u64) {
let word_index = index.as_u32() / 64;
let bit_index = index.as_u32() % 64;
let word = unsafe {
self.vmctx_plus_offset::<AtomicU64>(
self.offsets.vmctx_anyfuncs_init_begin() + (word_index * 8),
)
.as_ref()
.unwrap()
};
let mask = 1u64 << (bit_index as u64);
(word, mask)
}

/// Get a `&VMCallerCheckedAnyfunc` for the given `FuncIndex`.
///
/// Returns `None` if the index is the reserved index value.
Expand All @@ -477,12 +492,14 @@ impl Instance {
}

unsafe {
let (bitmap_word, bitmask) = self.get_anyfunc_bitmap_word(index);
let anyfunc = self
.vmctx_plus_offset::<VMCallerCheckedAnyfunc>(self.offsets.vmctx_anyfunc(index))
.as_ref()
.unwrap();
if !anyfunc.is_initialized() {
if (bitmap_word.load(Ordering::Acquire) & bitmask) == 0 {
anyfunc.initialize(self.construct_anyfunc(index));
bitmap_word.fetch_or(bitmask, Ordering::Release);
}
Some(anyfunc)
}
Expand Down
10 changes: 6 additions & 4 deletions crates/runtime/src/instance/allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -561,10 +561,12 @@ unsafe fn initialize_vmcontext(instance: &mut Instance, req: InstanceAllocationR
req.imports.globals.len(),
);

// Zero the anyfuncs -- they will be lazily initialized as requred
let base = instance.anyfunc_base();
let anyfuncs = std::slice::from_raw_parts_mut(base, instance.module.functions.len());
anyfuncs.fill(VMCallerCheckedAnyfunc::zero());
// Zero the anyfunc-initialized bitmap -- they will be lazily initialized as requred
let base = instance.vmctx_plus_offset(instance.offsets.vmctx_anyfuncs_init_begin());
let bitmap_words = (instance.module.functions.len() + 63) / 64;
let len = bitmap_words * 8;
let slice = std::slice::from_raw_parts_mut(base as *mut u8, len);
slice.fill(0);

// Initialize the defined tables
let mut ptr = instance.vmctx_plus_offset(instance.offsets.vmctx_tables_begin());
Expand Down

0 comments on commit 6bb241e

Please sign in to comment.