From 8cb792474cedbfb8d2e7e3c1c38fc2df4cdabca1 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Tue, 23 Jul 2019 07:30:32 -0700 Subject: [PATCH] Fully update threading support for LLVM 9 Support has landed in rust-lang/rust for full support for LLVM 9's interpretation of WebAssembly threads. This commit updates our thread transformation pass to take all this into account, namely: * The threadign pass now runs by default and is keyed on whether memory is shared, not off an env var. * TLS is initialized in addition to memory on each thread. * Stack pointer finding is tweaked to account for the TLS base also being a mutable global. * The build of the parallel raytrace example was updated to use today's nightly. --- azure-pipelines.yml | 2 +- crates/cli-support/src/lib.rs | 19 ++- crates/threads-xform/src/lib.rs | 163 +++++++++++++++++++------- examples/raytrace-parallel/Xargo.toml | 1 - examples/raytrace-parallel/build.sh | 17 ++- 5 files changed, 136 insertions(+), 66 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 9015e842..9ea205af 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -166,7 +166,7 @@ jobs: steps: - template: ci/azure-install-rust.yml parameters: - toolchain: nightly-2019-06-13 + toolchain: nightly-2019-07-30 - template: ci/azure-install-sccache.yml - script: rustup component add rust-src displayName: "install rust-src" diff --git a/crates/cli-support/src/lib.rs b/crates/cli-support/src/lib.rs index dcce3c97..05026b10 100755 --- a/crates/cli-support/src/lib.rs +++ b/crates/cli-support/src/lib.rs @@ -32,9 +32,9 @@ pub struct Bindgen { // Experimental support for weakrefs, an upcoming ECMAScript feature. // Currently only enable-able through an env var. weak_refs: bool, - // Experimental support for the wasm threads proposal, transforms the wasm - // module to be "ready to be instantiated on any thread" - threads: Option, + // Support for the wasm threads proposal, transforms the wasm module to be + // "ready to be instantiated on any thread" + threads: wasm_bindgen_threads_xform::Config, anyref: bool, encode_into: EncodeInto, } @@ -286,10 +286,8 @@ impl Bindgen { ); } - if let Some(cfg) = &self.threads { - cfg.run(&mut module) - .with_context(|_| "failed to prepare module for threading")?; - } + self.threads.run(&mut module) + .with_context(|_| "failed to prepare module for threading")?; // If requested, turn all mangled symbols into prettier unmangled // symbols with the help of `rustc-demangle`. @@ -395,10 +393,7 @@ fn reset_indentation(s: &str) -> String { // Eventually these will all be CLI options, but while they're unstable features // they're left as environment variables. We don't guarantee anything about // backwards-compatibility with these options. -fn threads_config() -> Option { - if env::var("WASM_BINDGEN_THREADS").is_err() { - return None; - } +fn threads_config() -> wasm_bindgen_threads_xform::Config { let mut cfg = wasm_bindgen_threads_xform::Config::new(); if let Ok(s) = env::var("WASM_BINDGEN_THREADS_MAX_MEMORY") { cfg.maximum_memory(s.parse().unwrap()); @@ -406,7 +401,7 @@ fn threads_config() -> Option { if let Ok(s) = env::var("WASM_BINDGEN_THREADS_STACK_SIZE") { cfg.thread_stack_size(s.parse().unwrap()); } - Some(cfg) + cfg } fn demangle(module: &mut Module) { diff --git a/crates/threads-xform/src/lib.rs b/crates/threads-xform/src/lib.rs index a02fe9c4..e90ea28b 100644 --- a/crates/threads-xform/src/lib.rs +++ b/crates/threads-xform/src/lib.rs @@ -1,5 +1,6 @@ use std::cmp; use std::collections::HashMap; +use std::env; use std::mem; use failure::{bail, format_err, Error}; @@ -78,9 +79,19 @@ impl Config { /// /// More and/or less may happen here over time, stay tuned! pub fn run(&self, module: &mut Module) -> Result<(), Error> { - let stack_pointer = find_stack_pointer(module)?; + // Compatibility with older LLVM outputs. Newer LLVM outputs, when + // atomics are enabled, emit a shared memory. That's a good indicator + // that we have work to do. If shared memory isn't enabled, though then + // this isn't an atomic module so there's nothing to do. We still allow, + // though, an environment variable to force us to go down this path to + // remain compatibile with older LLVM outputs. let memory = find_memory(module)?; - let addr = inject_thread_id_counter(module, memory)?; + if !module.memories.get(memory).shared && env::var("WASM_BINDGEN_THREADS").is_err() { + return Ok(()); + } + + let stack_pointer = find_stack_pointer(module)?; + let addr = allocate_static_data(module, memory, 4, 4)?; let zero = InitExpr::Value(Value::I32(0)); let globals = Globals { thread_id: module.globals.add_local(ValType::I32, true, zero), @@ -103,18 +114,11 @@ impl Config { mem.maximum = Some(cmp::max(self.maximum_memory / PAGE_SIZE, prev_max)); assert!(mem.data_segments.is_empty()); - let init_memory = module - .exports - .iter() - .find(|e| e.name == "__wasm_init_memory") - .ok_or_else(|| format_err!("failed to find `__wasm_init_memory`"))?; - let init_memory_id = match init_memory.item { - walrus::ExportItem::Function(f) => f, - _ => bail!("`__wasm_init_memory` must be a function"), - }; - let export_id = init_memory.id(); - module.exports.delete(export_id); - InitMemory::Call(init_memory_id) + InitMemory::Call { + wasm_init_memory: delete_synthetic_func(module, "__wasm_init_memory")?, + wasm_init_tls: delete_synthetic_func(module, "__wasm_init_tls")?, + tls_size: delete_synthetic_global(module, "__tls_size")?, + } } else { update_memory(module, memory, self.maximum_memory)?; InitMemory::Segments(switch_data_segments_to_passive(module, memory)?) @@ -127,13 +131,47 @@ impl Config { stack_pointer, self.thread_stack_size, memory, - ); + )?; implement_thread_intrinsics(module, &globals)?; Ok(()) } } +fn delete_synthetic_func(module: &mut Module, name: &str) -> Result { + match delete_synthetic_export(module, name)? { + walrus::ExportItem::Function(f) => Ok(f), + _ => bail!("`{}` must be a function", name), + } +} + +fn delete_synthetic_global(module: &mut Module, name: &str) -> Result { + let id = match delete_synthetic_export(module, name)? { + walrus::ExportItem::Global(g) => g, + _ => bail!("`{}` must be a global", name), + }; + let g = match module.globals.get(id).kind { + walrus::GlobalKind::Local(g) => g, + walrus::GlobalKind::Import(_) => bail!("`{}` must not be an imported global", name), + }; + match g { + InitExpr::Value(Value::I32(v)) => Ok(v as u32), + _ => bail!("`{}` was not an `i32` constant", name), + } +} + +fn delete_synthetic_export(module: &mut Module, name: &str) -> Result { + let item = module + .exports + .iter() + .find(|e| e.name == name) + .ok_or_else(|| format_err!("failed to find `{}`", name))?; + let ret = item.item; + let id = item.id(); + module.exports.delete(id); + Ok(ret) +} + struct PassiveSegment { id: DataId, offset: InitExpr, @@ -211,7 +249,12 @@ struct Globals { thread_tcb: GlobalId, } -fn inject_thread_id_counter(module: &mut Module, memory: MemoryId) -> Result { +fn allocate_static_data( + module: &mut Module, + memory: MemoryId, + size: u32, + align: u32, +) -> Result { // First up, look for a `__heap_base` export which is injected by LLD as // part of the linking process. Note that `__heap_base` should in theory be // *after* the stack and data, which means it's at the very end of the @@ -256,9 +299,9 @@ fn inject_thread_id_counter(module: &mut Module, memory: MemoryId) -> Result n, _ => bail!("`__heap_base` not a locally defined `i32`"), }; - let address = (*offset as u32 + 3) & !3; // align up - let add_a_page = (address + 4) / PAGE_SIZE != address / PAGE_SIZE; - *offset = (address + 4) as i32; + let address = (*offset as u32 + (align - 1)) & !(align - 1); // align up + let add_a_page = (address + size) / PAGE_SIZE != address / PAGE_SIZE; + *offset = (address + size) as i32; (address, add_a_page) }; @@ -282,22 +325,32 @@ fn find_stack_pointer(module: &mut Module) -> Result, Error> { }) .collect::>(); - match candidates.len() { - // If there are no mutable i32 globals, assume this module doesn't even - // need a stack pointer! - 0 => Ok(None), - - // If there's more than one global give up for now. Eventually we can - // probably do better by pattern matching on functions, but this should - // be sufficient for LLVM's output for now. - 1 => Ok(Some(candidates[0].id())), - _ => bail!("too many mutable globals to infer the stack pointer"), + if candidates.len() == 0 { + return Ok(None); } + if candidates.len() > 2 { + bail!("too many mutable globals to infer the stack pointer"); + } + if candidates.len() == 1 { + return Ok(Some(candidates[0].id())); + } + + // If we've got two mutable globals then we're in a pretty standard + // situation for threaded code where one is the stack pointer and one is the + // TLS base offset. We need to figure out which is which, and we basically + // assume LLVM's current codegen where the first is the stack pointer. + // + // TODO: have an actual check here. + Ok(Some(candidates[0].id())) } enum InitMemory { Segments(Vec), - Call(walrus::FunctionId), + Call { + wasm_init_memory: walrus::FunctionId, + wasm_init_tls: walrus::FunctionId, + tls_size: u32, + }, } fn inject_start( @@ -308,7 +361,7 @@ fn inject_start( stack_pointer: Option, stack_size: u32, memory: MemoryId, -) { +) -> Result<(), Error> { use walrus::ir::*; assert!(stack_size % PAGE_SIZE == 0); @@ -376,15 +429,6 @@ fn inject_start( let sp = block.binop(BinaryOp::I32Add, sp_base, stack_size); let set_stack_pointer = block.global_set(stack_pointer, sp); block.expr(set_stack_pointer); - - // FIXME(WebAssembly/tool-conventions#117) we probably don't want to - // duplicate drop with `if_zero_block` or otherwise just infer to drop - // all these data segments, this seems like something to synthesize in - // the linker... - for segment in module.data.iter() { - let drop = block.data_drop(segment.id()); - block.expr(drop); - } } let if_nonzero_block = block.id(); drop(block); @@ -394,7 +438,7 @@ fn inject_start( // memory, however, so do that here. let if_zero_block = { let mut block = builder.if_else_block(Box::new([]), Box::new([])); - match memory_init { + match &memory_init { InitMemory::Segments(segments) => { for segment in segments { let zero = block.i32_const(0); @@ -409,8 +453,10 @@ fn inject_start( block.expr(drop); } } - InitMemory::Call(wasm_init_memory) => { - let call = block.call(wasm_init_memory, Box::new([])); + InitMemory::Call { + wasm_init_memory, .. + } => { + let call = block.call(*wasm_init_memory, Box::new([])); block.expr(call); } } @@ -420,6 +466,23 @@ fn inject_start( let block = builder.if_else(thread_id_is_nonzero, if_nonzero_block, if_zero_block); exprs.push(block); + // If we have these globals then we're using the new thread local system + // implemented in LLVM, which means that `__wasm_init_tls` needs to be + // called with a chunk of memory `tls_size` bytes big to set as the threads + // thread-local data block. + if let InitMemory::Call { + wasm_init_tls, + tls_size, + .. + } = memory_init + { + let malloc = find_wbindgen_malloc(module)?; + let size = builder.i32_const(tls_size as i32); + let ptr = builder.call(malloc, Box::new([size])); + let block = builder.call(wasm_init_tls, Box::new([ptr])); + exprs.push(block); + } + // If a start function previously existed we're done with our own // initialization so delegate to them now. if let Some(id) = module.start.take() { @@ -432,6 +495,20 @@ fn inject_start( // ... and finally flag it as the new start function module.start = Some(id); + + Ok(()) +} + +fn find_wbindgen_malloc(module: &Module) -> Result { + let e = module + .exports + .iter() + .find(|e| e.name == "__wbindgen_malloc") + .ok_or_else(|| format_err!("failed to find `__wbindgen_malloc`"))?; + match e.item { + walrus::ExportItem::Function(f) => Ok(f), + _ => bail!("`__wbindgen_malloc` wasn't a funtion"), + } } fn implement_thread_intrinsics(module: &mut Module, globals: &Globals) -> Result<(), Error> { diff --git a/examples/raytrace-parallel/Xargo.toml b/examples/raytrace-parallel/Xargo.toml index 883133e4..4e87884d 100644 --- a/examples/raytrace-parallel/Xargo.toml +++ b/examples/raytrace-parallel/Xargo.toml @@ -1,3 +1,2 @@ [dependencies.std] stage = 0 -features = ['wasm-bindgen-threads'] diff --git a/examples/raytrace-parallel/build.sh b/examples/raytrace-parallel/build.sh index 1935c6b4..e47b9ecb 100755 --- a/examples/raytrace-parallel/build.sh +++ b/examples/raytrace-parallel/build.sh @@ -10,16 +10,15 @@ set -ex # # * Next we need to compile everything with the `atomics` feature enabled, # ensuring that LLVM will generate atomic instructions and such. -RUSTFLAGS='-C target-feature=+atomics' \ +RUSTFLAGS='-C target-feature=+atomics,+bulk-memory' \ xargo build --target wasm32-unknown-unknown --release -# Threading support is disabled by default in wasm-bindgen, so use an env var -# here to turn it on for our bindings generation. Also note that webpack isn't -# currently compatible with atomics, so we go with the --no-modules output. -WASM_BINDGEN_THREADS=1 \ - cargo run --manifest-path ../../crates/cli/Cargo.toml \ - --bin wasm-bindgen -- \ - ../../target/wasm32-unknown-unknown/release/raytrace_parallel.wasm --out-dir . \ - --no-modules +# Note the usage of `--no-modules` here which is used to create an output which +# is usable from Web Workers. We notably can't use `--target bundler` since +# Webpack doesn't have support for atomics yet. +cargo run --manifest-path ../../crates/cli/Cargo.toml \ + --bin wasm-bindgen -- \ + ../../target/wasm32-unknown-unknown/release/raytrace_parallel.wasm --out-dir . \ + --no-modules python3 -m http.server