Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

memfd/madvise-based CoW pooling allocator #3697

Merged
merged 12 commits into from
Feb 2, 2022
5 changes: 4 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ jobs:
- run: cargo check -p wasmtime --no-default-features --features async
- run: cargo check -p wasmtime --no-default-features --features uffd
- run: cargo check -p wasmtime --no-default-features --features pooling-allocator
- run: cargo check -p wasmtime --no-default-features --features memfd-allocator
- run: cargo check -p wasmtime --no-default-features --features cranelift
- run: cargo check -p wasmtime --no-default-features --features cranelift,wat,async,cache

Expand Down Expand Up @@ -310,11 +311,13 @@ jobs:
env:
RUST_BACKTRACE: 1

# Test uffd functionality on Linux
# Test Linux-specific functionality
- run: |
cargo test --features uffd -p wasmtime-runtime instance::allocator::pooling
cargo test --features uffd -p wasmtime-cli pooling_allocator
cargo test --features uffd -p wasmtime-cli wast::Cranelift
cargo test --features memfd-allocator -p wasmtime-cli pooling_allocator
cargo test --features memfd-allocator -p wasmtime-cli wast::Cranelift
if: matrix.os == 'ubuntu-latest' && matrix.target == ''
env:
RUST_BACKTRACE: 1
Expand Down
10 changes: 10 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ vtune = ["wasmtime/vtune"]
wasi-crypto = ["wasmtime-wasi-crypto"]
wasi-nn = ["wasmtime-wasi-nn"]
uffd = ["wasmtime/uffd"]
pooling-allocator = ["wasmtime/pooling-allocator"]
memfd-allocator = ["pooling-allocator", "wasmtime/memfd-allocator"]
all-arch = ["wasmtime/all-arch"]
posix-signals-on-macos = ["wasmtime/posix-signals-on-macos"]

Expand Down
23 changes: 23 additions & 0 deletions crates/environ/src/module.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,19 @@ impl MemoryPlan {
},
}
}

/// Determine whether a data segment (memory initializer) is
/// possibly out-of-bounds. Returns `true` if the initializer has a
/// dynamic location and this question cannot be resolved
/// pre-instantiation; hence, this method's result should not be
/// used to signal an error, only to exit optimized/simple fastpaths.
pub fn initializer_possibly_out_of_bounds(&self, init: &MemoryInitializer) -> bool {
match init.end() {
// Not statically known, so possibly out of bounds (we can't guarantee in-bounds).
None => true,
Some(end) => end > self.memory.minimum * (WASM_PAGE_SIZE as u64),
}
}
}

/// A WebAssembly linear memory initializer.
Expand All @@ -113,6 +126,16 @@ pub struct MemoryInitializer {
pub data: Range<u32>,
}

impl MemoryInitializer {
/// If this initializer has a definite, static, non-overflowed end address, return it.
pub fn end(&self) -> Option<u64> {
if self.base.is_some() {
return None;
}
self.offset.checked_add(self.data.len() as u64)
}
}

/// The type of WebAssembly linear memory initialization to use for a module.
#[derive(Clone, Debug, Serialize, Deserialize)]
pub enum MemoryInitialization {
Expand Down
15 changes: 14 additions & 1 deletion crates/jit/src/instantiate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,10 @@ use wasmtime_environ::{
StackMapInformation, Trampoline, Tunables, WasmFuncType, ELF_WASMTIME_ADDRMAP,
ELF_WASMTIME_TRAPS,
};
use wasmtime_runtime::{GdbJitImageRegistration, InstantiationError, VMFunctionBody, VMTrampoline};
use wasmtime_runtime::{
CompiledModuleId, CompiledModuleIdAllocator, GdbJitImageRegistration, InstantiationError,
VMFunctionBody, VMTrampoline,
};

/// This is the name of the section in the final ELF image which contains
/// concatenated data segments from the original wasm module.
Expand Down Expand Up @@ -248,6 +251,8 @@ pub struct CompiledModule {
code: Range<usize>,
code_memory: CodeMemory,
dbg_jit_registration: Option<GdbJitImageRegistration>,
/// A unique ID used to register this module with the engine.
unique_id: CompiledModuleId,
}

impl CompiledModule {
Expand All @@ -271,6 +276,7 @@ impl CompiledModule {
mmap: MmapVec,
info: Option<CompiledModuleInfo>,
profiler: &dyn ProfilingAgent,
id_allocator: &CompiledModuleIdAllocator,
) -> Result<Arc<Self>> {
// Transfer ownership of `obj` to a `CodeMemory` object which will
// manage permissions, such as the executable bit. Once it's located
Expand Down Expand Up @@ -312,6 +318,7 @@ impl CompiledModule {
dbg_jit_registration: None,
code_memory,
meta: info.meta,
unique_id: id_allocator.alloc(),
};
ret.register_debug_and_profiling(profiler)?;

Expand All @@ -333,6 +340,12 @@ impl CompiledModule {
Ok(())
}

/// Get this module's unique ID. It is unique with respect to a
/// single allocator (which is ordinarily held on a Wasm engine).
pub fn unique_id(&self) -> CompiledModuleId {
self.unique_id
}

/// Returns the underlying memory which contains the compiled module's
/// image.
pub fn mmap(&self) -> &MmapVec {
Expand Down
3 changes: 3 additions & 0 deletions crates/runtime/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ winapi = { version = "0.3.7", features = ["winbase", "memoryapi", "errhandlingap

[target.'cfg(target_os = "linux")'.dependencies]
userfaultfd = { version = "0.4.1", optional = true }
memfd = { version = "0.4.1", optional = true }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Given where this PR now is and the trajectory of this feature, I think this is good to have as an optional dependency but I think it should be enabled by default and probably just called memfd (enabled-by-default probably from the wasmtime crate, not here). Especially with this making such a drastric difference for the on-demand allocator it seems like using memfd is a no-brainer.

To make this tweak as well as improve the portability here (right now if you enable memfd on Windows it'd probably just fail to compile) I think a few small changes are all that's needed. To be clear though this is icing on the cake and it's totally fine to do this as a follow-up, but I think the steps would be:

  • Remove the memfd-allocator
  • Add a memfd feature to the wasmtime crate that forwards to wasmtime-runtime/memfd
  • Add memfd to the default feature set of the wasmtime crate
  • Add a crates/runtime/build.rs that detects that the target's os is Linux (by looking at CARGO_CFG_TARGET_OS) and that the memfd feature is active (by looking at CARGO_FEATURE_MEMFD), and if both are present then printing println!("cargo:rustc-cfg=memfd")
  • Change all cfg(feature = "memfd-allocator") in the crate to cfg(memfd)

With that I think we'll have on-by-default memfd for Linux-only, with the opt-in ability to turn it off. Again though it's fine to defer this to a future PR.

As I type all this out though I also would question maybe we don't even need a feature for this. I suppose one thing that could come up is that if you have a million modules in a process that's a million file descriptors which can blow kernel limits, but other than that I'm not sure why anyone would explicitly want to disable memfd.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've gone ahead and done all of this; the default build now will use memfd, validated by building the wasmtime binary and strace'ing an execution. Waiting for CI to see if moving memfd out of the target-only deps into the main deps will work on non-Linux, but I am hoping it will (the crate's got a toplevel #![cfg(target_os ...)] that should make it a no-op on other platforms...?).


[build-dependencies]
cc = "1.0"
Expand All @@ -59,3 +60,5 @@ uffd = ["userfaultfd", "pooling-allocator"]
# It is useful for applications that do not bind their own exception ports and
# need portable signal handling.
posix-signals-on-macos = []

memfd-allocator = ["pooling-allocator", "memfd"]
23 changes: 23 additions & 0 deletions crates/runtime/src/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,29 @@ pub(crate) struct Instance {

#[allow(clippy::cast_ptr_alignment)]
impl Instance {
/// Helper for allocators; not a public API.
pub(crate) fn create_raw(
module: &Arc<Module>,
wasm_data: &'static [u8],
memories: PrimaryMap<DefinedMemoryIndex, Memory>,
tables: PrimaryMap<DefinedTableIndex, Table>,
host_state: Box<dyn Any + Send + Sync>,
) -> Instance {
Instance {
module: module.clone(),
offsets: VMOffsets::new(HostPtr, &module),
memories,
tables,
dropped_elements: EntitySet::with_capacity(module.passive_elements.len()),
dropped_data: EntitySet::with_capacity(module.passive_data_map.len()),
host_state,
wasm_data,
vmctx: VMContext {
_marker: std::marker::PhantomPinned,
},
}
}

/// Helper function to access various locations offset from our `*mut
/// VMContext` object.
unsafe fn vmctx_plus_offset<T>(&self, offset: u32) -> *mut T {
Expand Down
63 changes: 43 additions & 20 deletions crates/runtime/src/instance/allocator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,37 @@ use crate::memory::{DefaultMemoryCreator, Memory};
use crate::table::Table;
use crate::traphandlers::Trap;
use crate::vmcontext::{
VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMContext, VMGlobalDefinition,
VMSharedSignatureIndex,
VMBuiltinFunctionsArray, VMCallerCheckedAnyfunc, VMGlobalDefinition, VMSharedSignatureIndex,
};
use crate::ModuleMemFds;
use crate::Store;
use anyhow::Result;
use std::alloc;
use std::any::Any;
use std::convert::TryFrom;
use std::marker;
use std::ptr::{self, NonNull};
use std::slice;
use std::sync::Arc;
use thiserror::Error;
use wasmtime_environ::{
DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, EntitySet, FunctionInfo,
GlobalInit, HostPtr, MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap,
SignatureIndex, TableInitializer, TrapCode, VMOffsets, WasmType, WASM_PAGE_SIZE,
DefinedFuncIndex, DefinedMemoryIndex, DefinedTableIndex, EntityRef, FunctionInfo, GlobalInit,
MemoryInitialization, MemoryInitializer, Module, ModuleType, PrimaryMap, SignatureIndex,
TableInitializer, TrapCode, WasmType, WASM_PAGE_SIZE,
};

#[cfg(feature = "pooling-allocator")]
mod pooling;

#[cfg(feature = "memfd-allocator")]
mod memfd;
#[cfg(feature = "memfd-allocator")]
pub use self::memfd::MemFdSlot;

#[cfg(not(feature = "memfd-allocator"))]
mod memfd_disabled;
#[cfg(not(feature = "memfd-allocator"))]
pub use self::memfd_disabled::MemFdSlot;

#[cfg(feature = "pooling-allocator")]
pub use self::pooling::{
InstanceLimits, ModuleLimits, PoolingAllocationStrategy, PoolingInstanceAllocator,
Expand All @@ -39,6 +48,9 @@ pub struct InstanceAllocationRequest<'a> {
/// The base address of where JIT functions are located.
pub image_base: usize,

/// If using MemFD-based memories, the backing MemFDs.
pub memfds: Option<Arc<ModuleMemFds>>,

/// Descriptors about each compiled function, such as the offset from
/// `image_base`.
pub functions: &'a PrimaryMap<DefinedFuncIndex, FunctionInfo>,
Expand Down Expand Up @@ -376,9 +388,23 @@ fn check_memory_init_bounds(

fn initialize_memories(
instance: &mut Instance,
module: &Module,
initializers: &[MemoryInitializer],
) -> Result<(), InstantiationError> {
for init in initializers {
// Check whether this is a MemFD memory; if so, we can skip
// all initializers.
let memory = init.memory_index;
if let Some(defined_index) = module.defined_memory_index(memory) {
// We can only skip if there is actually a MemFD image. In
// some situations the MemFD image creation code will bail
// (e.g. due to an out of bounds data segment) and so we
// need to fall back on the usual initialization below.
if instance.memories[defined_index].is_memfd_with_image() {
continue;
}
}

instance
.memory_init_segment(
init.memory_index,
Expand Down Expand Up @@ -432,6 +458,14 @@ fn initialize_instance(
match &module.memory_initialization {
MemoryInitialization::Paged { map, out_of_bounds } => {
for (index, pages) in map {
// We can only skip if there is actually a MemFD image. In
// some situations the MemFD image creation code will bail
// (e.g. due to an out of bounds data segment) and so we
// need to fall back on the usual initialization below.
if instance.memories[index].is_memfd_with_image() {
continue;
}

let memory = instance.memory(index);
let slice =
unsafe { slice::from_raw_parts_mut(memory.base, memory.current_length) };
Expand All @@ -453,7 +487,7 @@ fn initialize_instance(
}
}
MemoryInitialization::Segmented(initializers) => {
initialize_memories(instance, initializers)?;
initialize_memories(instance, module, initializers)?;
}
}

Expand Down Expand Up @@ -691,19 +725,8 @@ unsafe impl InstanceAllocator for OnDemandInstanceAllocator {
let host_state = std::mem::replace(&mut req.host_state, Box::new(()));

let mut handle = {
let instance = Instance {
module: req.module.clone(),
offsets: VMOffsets::new(HostPtr, &req.module),
memories,
tables,
dropped_elements: EntitySet::with_capacity(req.module.passive_elements.len()),
dropped_data: EntitySet::with_capacity(req.module.passive_data_map.len()),
host_state,
wasm_data: &*req.wasm_data,
vmctx: VMContext {
_marker: marker::PhantomPinned,
},
};
let instance =
Instance::create_raw(&req.module, &*req.wasm_data, memories, tables, host_state);
let layout = instance.alloc_layout();
let instance_ptr = alloc::alloc(layout) as *mut Instance;
if instance_ptr.is_null() {
Expand Down
Loading