use std::cmp; use std::collections::HashMap; use std::env; use std::mem; use anyhow::{anyhow, bail, Error}; use walrus::ir::Value; use walrus::{DataId, FunctionId, InitExpr, ValType}; use walrus::{ExportItem, GlobalId, GlobalKind, ImportKind, MemoryId, Module}; use wasm_bindgen_wasm_conventions as wasm_conventions; const PAGE_SIZE: u32 = 1 << 16; /// Configuration for the transformation pass in this module. /// /// Created primarily through `new` and then executed through `run`. pub struct Config { maximum_memory: u32, thread_stack_size: u32, enabled: bool, } impl Config { /// Create a new configuration with default settings. pub fn new() -> Config { Config { maximum_memory: 1 << 30, // 1GB thread_stack_size: 1 << 20, // 1MB enabled: env::var("WASM_BINDGEN_THREADS").is_ok(), } } /// Is threaded Wasm enabled? pub fn is_enabled(&self, module: &Module) -> bool { if self.enabled { return true; } // Compatibility with older LLVM outputs. Newer LLVM outputs, when // atomics are enabled, emit a shared memory. That's a good indicator // that we have work to do. If shared memory isn't enabled, though then // this isn't an atomic module so there's nothing to do. We still allow, // though, an environment variable to force us to go down this path to // remain compatibile with older LLVM outputs. match wasm_conventions::get_memory(module) { Ok(memory) => module.memories.get(memory).shared, Err(_) => false, } } /// Specify the maximum amount of memory the wasm module can ever have. /// /// We'll be specifying that the memory for this wasm module is shared, and /// all shared memories must have their maximum limit specified (whereas /// by default Rust/LLVM/LLD don't specify a maximum). /// /// The default for this option is 16MB, and this can be used to change /// the maximum memory we'll be specifying. /// /// The `max` argument is in units of bytes. /// /// If the maximum memory is already specified this setting won't have any /// affect. pub fn maximum_memory(&mut self, max: u32) -> &mut Config { self.maximum_memory = max; self } /// Specify the stack size for all threads spawned. /// /// The stack size is typically set by rustc as an argument to LLD and /// defaults to 1MB for the main thread. All threads spawned by the /// main thread, however, need to allocate their own stack! /// /// This configuration option indicates how large the stack of each child /// thread will be. This will be allocated as part of the `start` function /// and will be stored in LLVM's global stack pointer. pub fn thread_stack_size(&mut self, size: u32) -> &mut Config { self.thread_stack_size = size; self } /// Execute the transformation on the parsed wasm module specified. /// /// This function will prepare `Module` to be run on multiple threads, /// performing steps such as: /// /// * All data segments are switched to "passive" data segments to ensure /// they're only initialized once (coming later) /// * If memory is exported from this module, it is instead switched to /// being imported (with the same parameters). /// * The imported memory is required to be `shared`, ensuring it's backed /// by a `SharedArrayBuffer` on the web. /// * A `global` for a thread ID is injected. /// * Four bytes in linear memory are reserved for the counter of thread /// IDs. /// * A `start` function is injected (or prepended if one already exists) /// which initializes memory for the first thread and otherwise allocates /// thread ids for all threads. /// /// More and/or less may happen here over time, stay tuned! pub fn run(&self, module: &mut Module) -> Result<(), Error> { if !self.is_enabled(module) { return Ok(()); } let memory = wasm_conventions::get_memory(module)?; let stack_pointer = wasm_conventions::get_shadow_stack_pointer(module) .ok_or_else(|| anyhow!("failed to find shadow stack pointer"))?; let addr = allocate_static_data(module, memory, 4, 4)?; let zero = InitExpr::Value(Value::I32(0)); let globals = Globals { thread_id: module.globals.add_local(ValType::I32, true, zero), thread_tcb: module.globals.add_local(ValType::I32, true, zero), }; // There was an "inflection point" at the LLVM 9 release where LLD // started having better support for producing binaries capable of being // used with multi-threading. Prior to LLVM 9 (e.g. nightly releases // before July 2019 basically) we had to sort of paper over a lot of // support that hadn't been added to LLD. With LLVM 9 and onwards though // we expect Rust binaries to be pretty well formed if prepared for // threading when they come out of LLD. This `if` statement basically // switches on these two cases, figuring out if we're "old style" or // "new style". let mem = module.memories.get_mut(memory); let memory_init = if mem.shared { let prev_max = mem.maximum.unwrap(); assert!(mem.import.is_some()); mem.maximum = Some(cmp::max(self.maximum_memory / PAGE_SIZE, prev_max)); assert!(mem.data_segments.is_empty()); InitMemory::Call { wasm_init_memory: delete_synthetic_func(module, "__wasm_init_memory")?, wasm_init_tls: delete_synthetic_func(module, "__wasm_init_tls")?, tls_size: delete_synthetic_global(module, "__tls_size")?, } } else { update_memory(module, memory, self.maximum_memory)?; InitMemory::Segments(switch_data_segments_to_passive(module, memory)?) }; inject_start( module, memory_init, &globals, addr, stack_pointer, self.thread_stack_size, memory, )?; implement_thread_intrinsics(module, &globals)?; Ok(()) } } fn delete_synthetic_func(module: &mut Module, name: &str) -> Result { match delete_synthetic_export(module, name)? { walrus::ExportItem::Function(f) => Ok(f), _ => bail!("`{}` must be a function", name), } } fn delete_synthetic_global(module: &mut Module, name: &str) -> Result { let id = match delete_synthetic_export(module, name)? { walrus::ExportItem::Global(g) => g, _ => bail!("`{}` must be a global", name), }; let g = match module.globals.get(id).kind { walrus::GlobalKind::Local(g) => g, walrus::GlobalKind::Import(_) => bail!("`{}` must not be an imported global", name), }; match g { InitExpr::Value(Value::I32(v)) => Ok(v as u32), _ => bail!("`{}` was not an `i32` constant", name), } } fn delete_synthetic_export(module: &mut Module, name: &str) -> Result { let item = module .exports .iter() .find(|e| e.name == name) .ok_or_else(|| anyhow!("failed to find `{}`", name))?; let ret = item.item; let id = item.id(); module.exports.delete(id); Ok(ret) } struct PassiveSegment { id: DataId, offset: InitExpr, len: u32, } fn switch_data_segments_to_passive( module: &mut Module, memory: MemoryId, ) -> Result, Error> { let mut ret = Vec::new(); let memory = module.memories.get_mut(memory); for id in mem::replace(&mut memory.data_segments, Default::default()) { let data = module.data.get_mut(id); let kind = match &data.kind { walrus::DataKind::Active(kind) => kind, walrus::DataKind::Passive => continue, }; let offset = match kind.location { walrus::ActiveDataLocation::Absolute(n) => { walrus::InitExpr::Value(walrus::ir::Value::I32(n as i32)) } walrus::ActiveDataLocation::Relative(global) => walrus::InitExpr::Global(global), }; data.kind = walrus::DataKind::Passive; ret.push(PassiveSegment { id, offset, len: data.value.len() as u32, }); } Ok(ret) } fn update_memory(module: &mut Module, memory: MemoryId, max: u32) -> Result { assert!(max % PAGE_SIZE == 0); let memory = module.memories.get_mut(memory); // For multithreading if we want to use the exact same module on all // threads we'll need to be sure to import memory, so switch it to an // import if it's already here. if memory.import.is_none() { let id = module .imports .add("env", "memory", ImportKind::Memory(memory.id())); memory.import = Some(id); } // If the memory isn't already shared, make it so as that's the whole point // here! if !memory.shared { memory.shared = true; if memory.maximum.is_none() { memory.maximum = Some(max / PAGE_SIZE); } } Ok(memory.id()) } struct Globals { thread_id: GlobalId, thread_tcb: GlobalId, } fn allocate_static_data( module: &mut Module, memory: MemoryId, size: u32, align: u32, ) -> Result { // First up, look for a `__heap_base` export which is injected by LLD as // part of the linking process. Note that `__heap_base` should in theory be // *after* the stack and data, which means it's at the very end of the // address space and should be safe for us to inject 4 bytes of data at. let heap_base = module .exports .iter() .filter(|e| e.name == "__heap_base") .filter_map(|e| match e.item { ExportItem::Global(id) => Some(id), _ => None, }) .next(); let heap_base = match heap_base { Some(idx) => idx, None => bail!("failed to find `__heap_base` for injecting thread id"), }; // Now we need to bump up `__heap_base` by 4 bytes as we'd like to reserve // those 4 bytes for our thread id counter. Do lots of validation here to // make sure that `__heap_base` is an non-mutable integer, and then do // some logic: // // * We require that `__heap_base` is aligned to 4 as that's what the atomic // will require anyway. // * We *may* have to add another page to the minimum for this module. If by // reserving 4 bytes the heap base now lies on a different page then we // probably went past our minimum page requirement, so we'll need to // update our memory limits to add one. // // Otherwise here we'll rewrite the `__heap_base` global's initializer to be // 4 larger, reserving us those 4 bytes for a thread id counter. let (address, add_a_page) = { let global = module.globals.get_mut(heap_base); if global.ty != ValType::I32 { bail!("the `__heap_base` global doesn't have the type `i32`"); } if global.mutable { bail!("the `__heap_base` global is unexpectedly mutable"); } let offset = match &mut global.kind { GlobalKind::Local(InitExpr::Value(Value::I32(n))) => n, _ => bail!("`__heap_base` not a locally defined `i32`"), }; let address = (*offset as u32 + (align - 1)) & !(align - 1); // align up let add_a_page = (address + size) / PAGE_SIZE != address / PAGE_SIZE; *offset = (address + size) as i32; (address, add_a_page) }; if add_a_page { let memory = module.memories.get_mut(memory); memory.initial += 1; memory.maximum = memory.maximum.map(|m| cmp::max(m, memory.initial)); } Ok(address) } enum InitMemory { Segments(Vec), Call { wasm_init_memory: walrus::FunctionId, wasm_init_tls: walrus::FunctionId, tls_size: u32, }, } fn inject_start( module: &mut Module, memory_init: InitMemory, globals: &Globals, addr: u32, stack_pointer: GlobalId, stack_size: u32, memory: MemoryId, ) -> Result<(), Error> { use walrus::ir::*; assert!(stack_size % PAGE_SIZE == 0); let mut builder = walrus::FunctionBuilder::new(&mut module.types, &[], &[]); let local = module.locals.add(ValType::I32); let mut body = builder.func_body(); body.i32_const(addr as i32) .i32_const(1) .atomic_rmw( memory, AtomicOp::Add, AtomicWidth::I32, MemArg { align: 4, offset: 0, }, ) .local_tee(local) .global_set(globals.thread_id); // Perform an if/else based on whether we're the first thread or not. Our // thread ID will be zero if we're the first thread, otherwise it'll be // nonzero (assuming we don't overflow...) body.local_get(local); body.if_else( None, // If our thread id is nonzero then we're the second or greater thread, so // we give ourselves a stack via memory.grow and we update our stack // pointer as the default stack pointer is surely wrong for us. |body| { // local0 = grow_memory(stack_size); body.i32_const((stack_size / PAGE_SIZE) as i32) .memory_grow(memory) .local_set(local); // if local0 == -1 then trap body.block(None, |body| { let target = body.id(); body.local_get(local) .i32_const(-1) .binop(BinaryOp::I32Ne) .br_if(target) .unreachable(); }); // stack_pointer = local0 + stack_size body.local_get(local) .i32_const(PAGE_SIZE as i32) .binop(BinaryOp::I32Mul) .i32_const(stack_size as i32) .binop(BinaryOp::I32Add) .global_set(stack_pointer); }, // If the thread ID is zero then we can skip the update of the stack // pointer as we know our stack pointer is valid. We need to initialize // memory, however, so do that here. |body| { match &memory_init { InitMemory::Segments(segments) => { for segment in segments { // let zero = block.i32_const(0); match segment.offset { InitExpr::Global(id) => body.global_get(id), InitExpr::Value(v) => body.const_(v), }; body.i32_const(0) .i32_const(segment.len as i32) .memory_init(memory, segment.id) .data_drop(segment.id); } } InitMemory::Call { wasm_init_memory, .. } => { body.call(*wasm_init_memory); } } }, ); // If we have these globals then we're using the new thread local system // implemented in LLVM, which means that `__wasm_init_tls` needs to be // called with a chunk of memory `tls_size` bytes big to set as the threads // thread-local data block. if let InitMemory::Call { wasm_init_tls, tls_size, .. } = memory_init { let malloc = find_wbindgen_malloc(module)?; body.i32_const(tls_size as i32) .call(malloc) .call(wasm_init_tls); } // If a start function previously existed we're done with our own // initialization so delegate to them now. if let Some(id) = module.start.take() { body.call(id); } // Finish off our newly generated function. let id = builder.finish(Vec::new(), &mut module.funcs); // ... and finally flag it as the new start function module.start = Some(id); Ok(()) } fn find_wbindgen_malloc(module: &Module) -> Result { let e = module .exports .iter() .find(|e| e.name == "__wbindgen_malloc") .ok_or_else(|| anyhow!("failed to find `__wbindgen_malloc`"))?; match e.item { walrus::ExportItem::Function(f) => Ok(f), _ => bail!("`__wbindgen_malloc` wasn't a funtion"), } } fn implement_thread_intrinsics(module: &mut Module, globals: &Globals) -> Result<(), Error> { use walrus::ir::*; let mut map = HashMap::new(); enum Intrinsic { GetThreadId, GetTcb, SetTcb, } let imports = module .imports .iter() .filter(|i| i.module == "__wbindgen_thread_xform__"); for import in imports { let function = match import.kind { ImportKind::Function(id) => module.funcs.get(id), _ => bail!("non-function import from special module"), }; let ty = module.types.get(function.ty()); match &import.name[..] { "__wbindgen_current_id" => { if !ty.params().is_empty() || ty.results() != &[ValType::I32] { bail!("`__wbindgen_current_id` intrinsic has the wrong signature"); } map.insert(function.id(), Intrinsic::GetThreadId); } "__wbindgen_tcb_get" => { if !ty.params().is_empty() || ty.results() != &[ValType::I32] { bail!("`__wbindgen_tcb_get` intrinsic has the wrong signature"); } map.insert(function.id(), Intrinsic::GetTcb); } "__wbindgen_tcb_set" => { if !ty.results().is_empty() || ty.params() != &[ValType::I32] { bail!("`__wbindgen_tcb_set` intrinsic has the wrong signature"); } map.insert(function.id(), Intrinsic::SetTcb); } other => bail!("unknown thread intrinsic: {}", other), } } struct Visitor<'a> { map: &'a HashMap, globals: &'a Globals, } module.funcs.iter_local_mut().for_each(|(_id, func)| { let entry = func.entry_block(); dfs_pre_order_mut(&mut Visitor { map: &map, globals }, func, entry); }); impl VisitorMut for Visitor<'_> { fn visit_instr_mut(&mut self, instr: &mut Instr, _loc: &mut InstrLocId) { let call = match instr { Instr::Call(e) => e, _ => return, }; match self.map.get(&call.func) { Some(Intrinsic::GetThreadId) => { *instr = GlobalGet { global: self.globals.thread_id, } .into(); } Some(Intrinsic::GetTcb) => { *instr = GlobalGet { global: self.globals.thread_tcb, } .into(); } Some(Intrinsic::SetTcb) => { *instr = GlobalSet { global: self.globals.thread_tcb, } .into(); } None => {} } } } Ok(()) }