From 2fbb8359e0ada6c94af467f6b1b1837fecaced62 Mon Sep 17 00:00:00 2001 From: Nick Fitzgerald Date: Mon, 16 Sep 2019 11:18:06 -0700 Subject: [PATCH] Create the `wasm-bindgen-wasm-conventions` crate This tiny crate provides utilities for working with Wasm codegen conventions (typically established by LLVM or lld) such as getting the shadow stack pointer. It also de-duplicates all the places in the codebase where we were implementing these conventions in one-off ways. --- crates/cli-support/Cargo.toml | 1 + crates/cli-support/src/lib.rs | 31 ++++--- crates/cli-support/src/webidl/standard.rs | 46 ++-------- crates/threads-xform/Cargo.toml | 1 + crates/threads-xform/src/lib.rs | 104 ++++++++-------------- crates/wasm-conventions/Cargo.toml | 16 ++++ crates/wasm-conventions/src/lib.rs | 95 ++++++++++++++++++++ 7 files changed, 173 insertions(+), 121 deletions(-) create mode 100644 crates/wasm-conventions/Cargo.toml create mode 100755 crates/wasm-conventions/src/lib.rs diff --git a/crates/cli-support/Cargo.toml b/crates/cli-support/Cargo.toml index 3e6498d8..90409c20 100644 --- a/crates/cli-support/Cargo.toml +++ b/crates/cli-support/Cargo.toml @@ -23,5 +23,6 @@ wasm-bindgen-anyref-xform = { path = '../anyref-xform', version = '=0.2.50' } wasm-bindgen-shared = { path = "../shared", version = '=0.2.50' } wasm-bindgen-multi-value-xform = { path = '../multi-value-xform', version = '=0.2.50' } wasm-bindgen-threads-xform = { path = '../threads-xform', version = '=0.2.50' } +wasm-bindgen-wasm-conventions = { path = '../wasm-conventions', version = '=0.2.50' } wasm-bindgen-wasm-interpreter = { path = "../wasm-interpreter", version = '=0.2.50' } wasm-webidl-bindings = "0.5.0" diff --git a/crates/cli-support/src/lib.rs b/crates/cli-support/src/lib.rs index 666f52dd..fea9749e 100755 --- a/crates/cli-support/src/lib.rs +++ b/crates/cli-support/src/lib.rs @@ -8,6 +8,7 @@ use std::mem; use std::path::{Path, PathBuf}; use std::str; use walrus::Module; +use wasm_bindgen_wasm_conventions as wasm_conventions; mod anyref; mod decode; @@ -278,18 +279,13 @@ impl Bindgen { } }; - // Our multi-value xform relies on the presence of the stack pointer, so - // temporarily export it so that our many GC's don't remove it before - // the xform runs. - if self.multi_value { - // Assume that the first global is the shadow stack pointer, since that is - // what LLVM codegens. - match module.globals.iter().next() { - Some(g) if g.ty == walrus::ValType::I32 => { - module.exports.add("__shadow_stack_pointer", g.id()); - } - _ => {} - } + // Our threads and multi-value xforms rely on the presence of the stack + // pointer, so temporarily export it so that our many GC's don't remove + // it before the xform runs. + let mut exported_shadow_stack_pointer = false; + if self.multi_value || self.threads.is_enabled() { + wasm_conventions::export_shadow_stack_pointer(&mut module)?; + exported_shadow_stack_pointer = true; } // This isn't the hardest thing in the world too support but we @@ -387,6 +383,17 @@ impl Bindgen { } } + // If we exported the shadow stack pointer earlier, remove it from the + // export set now. + if exported_shadow_stack_pointer { + wasm_conventions::unexport_shadow_stack_pointer(&mut module)?; + // The shadow stack pointer is potentially unused now, but since it + // most likely _is_ in use, we don't pay the cost of a full GC here + // just to remove one potentially unnecessary global. + // + // walrus::passes::gc::run(&mut module); + } + Ok(Output { module, stem: stem.to_string(), diff --git a/crates/cli-support/src/webidl/standard.rs b/crates/cli-support/src/webidl/standard.rs index e3ef0b10..105fc9a4 100644 --- a/crates/cli-support/src/webidl/standard.rs +++ b/crates/cli-support/src/webidl/standard.rs @@ -53,9 +53,10 @@ use crate::descriptor::VectorKind; use crate::webidl::{AuxExportKind, AuxImport, AuxValue, JsImport, JsImportName}; use crate::webidl::{NonstandardIncoming, NonstandardOutgoing}; use crate::webidl::{NonstandardWebidlSection, WasmBindgenAux}; -use failure::{bail, format_err, Error, ResultExt}; -use walrus::{GlobalId, MemoryId, Module}; +use failure::{bail, Error, ResultExt}; +use walrus::Module; use wasm_bindgen_multi_value_xform as multi_value_xform; +use wasm_bindgen_wasm_conventions as wasm_conventions; use wasm_webidl_bindings::ast; pub fn add_multi_value( @@ -79,8 +80,8 @@ pub fn add_multi_value( return Ok(()); } - let memory = get_memory(module)?; - let shadow_stack_pointer = get_shadow_stack_pointer(module)?; + let shadow_stack_pointer = wasm_conventions::get_shadow_stack_pointer(module)?; + let memory = wasm_conventions::get_memory(module)?; multi_value_xform::run(module, memory, shadow_stack_pointer, &to_xform)?; // Finally, unset `return_via_outptr`, fix up its incoming bindings' @@ -163,43 +164,6 @@ fn fixup_binding_argument_gets(incoming: &mut [NonstandardIncoming]) -> Result<( } } -fn get_memory(module: &Module) -> Result { - let mut memories = module.memories.iter().map(|m| m.id()); - let memory = memories.next(); - if memories.next().is_some() { - bail!( - "expected a single memory, found multiple; multiple memories \ - currently not supported" - ); - } - memory.ok_or_else(|| { - format_err!( - "module does not have a memory; must have a memory \ - to transform return pointers into Wasm multi-value" - ) - }) -} - -// Get the `__shadow_stack_pointer` global that we stashed in an export early on -// in the pipeline. -fn get_shadow_stack_pointer(module: &mut Module) -> Result { - let (g, e) = module - .exports - .iter() - .find(|e| e.name == "__shadow_stack_pointer") - .map(|e| { - let g = match e.item { - walrus::ExportItem::Global(g) => g, - _ => unreachable!(), - }; - (g, e.id()) - }) - .ok_or_else(|| format_err!("module does not have a shadow stack pointer"))?; - - module.exports.delete(e); - Ok(g) -} - pub fn add_section( module: &mut Module, aux: &WasmBindgenAux, diff --git a/crates/threads-xform/Cargo.toml b/crates/threads-xform/Cargo.toml index 0c405e73..bca429c7 100644 --- a/crates/threads-xform/Cargo.toml +++ b/crates/threads-xform/Cargo.toml @@ -14,3 +14,4 @@ edition = "2018" [dependencies] failure = "0.1" walrus = "0.12.0" +wasm-bindgen-wasm-conventions = { path = "../wasm-conventions", version = "=0.2.50" } diff --git a/crates/threads-xform/src/lib.rs b/crates/threads-xform/src/lib.rs index 017d0728..316f6327 100644 --- a/crates/threads-xform/src/lib.rs +++ b/crates/threads-xform/src/lib.rs @@ -7,6 +7,7 @@ use failure::{bail, format_err, Error}; use walrus::ir::Value; use walrus::{DataId, FunctionId, InitExpr, ValType}; use walrus::{ExportItem, GlobalId, GlobalKind, ImportKind, MemoryId, Module}; +use wasm_bindgen_wasm_conventions as wasm_conventions; const PAGE_SIZE: u32 = 1 << 16; @@ -16,6 +17,7 @@ const PAGE_SIZE: u32 = 1 << 16; pub struct Config { maximum_memory: u32, thread_stack_size: u32, + enabled: bool, } impl Config { @@ -24,9 +26,15 @@ impl Config { Config { maximum_memory: 1 << 30, // 1GB thread_stack_size: 1 << 20, // 1MB + enabled: env::var("WASM_BINDGEN_THREADS").is_ok(), } } + /// Is threaded Wasm enabled? + pub fn is_enabled(&self) -> bool { + self.enabled + } + /// Specify the maximum amount of memory the wasm module can ever have. /// /// We'll be specifying that the memory for this wasm module is shared, and @@ -79,18 +87,22 @@ impl Config { /// /// More and/or less may happen here over time, stay tuned! pub fn run(&self, module: &mut Module) -> Result<(), Error> { + if !self.enabled { + return Ok(()); + } + // Compatibility with older LLVM outputs. Newer LLVM outputs, when // atomics are enabled, emit a shared memory. That's a good indicator // that we have work to do. If shared memory isn't enabled, though then // this isn't an atomic module so there's nothing to do. We still allow, // though, an environment variable to force us to go down this path to // remain compatibile with older LLVM outputs. - let memory = find_memory(module)?; - if !module.memories.get(memory).shared && env::var("WASM_BINDGEN_THREADS").is_err() { + let memory = wasm_conventions::get_memory(module)?; + if !module.memories.get(memory).shared { return Ok(()); } - let stack_pointer = find_stack_pointer(module)?; + let stack_pointer = wasm_conventions::get_shadow_stack_pointer(module)?; let addr = allocate_static_data(module, memory, 4, 4)?; let zero = InitExpr::Value(Value::I32(0)); let globals = Globals { @@ -207,17 +219,6 @@ fn switch_data_segments_to_passive( Ok(ret) } -fn find_memory(module: &mut Module) -> Result { - let mut memories = module.memories.iter(); - let memory = memories - .next() - .ok_or_else(|| format_err!("currently incompatible with no memory modules"))?; - if memories.next().is_some() { - bail!("only one memory is currently supported"); - } - Ok(memory.id()) -} - fn update_memory(module: &mut Module, memory: MemoryId, max: u32) -> Result { assert!(max % PAGE_SIZE == 0); let memory = module.memories.get_mut(memory); @@ -313,37 +314,6 @@ fn allocate_static_data( Ok(address) } -fn find_stack_pointer(module: &mut Module) -> Result, Error> { - let candidates = module - .globals - .iter() - .filter(|g| g.ty == ValType::I32) - .filter(|g| g.mutable) - .filter(|g| match g.kind { - GlobalKind::Local(_) => true, - GlobalKind::Import(_) => false, - }) - .collect::>(); - - if candidates.len() == 0 { - return Ok(None); - } - if candidates.len() > 2 { - bail!("too many mutable globals to infer the stack pointer"); - } - if candidates.len() == 1 { - return Ok(Some(candidates[0].id())); - } - - // If we've got two mutable globals then we're in a pretty standard - // situation for threaded code where one is the stack pointer and one is the - // TLS base offset. We need to figure out which is which, and we basically - // assume LLVM's current codegen where the first is the stack pointer. - // - // TODO: have an actual check here. - Ok(Some(candidates[0].id())) -} - enum InitMemory { Segments(Vec), Call { @@ -358,7 +328,7 @@ fn inject_start( memory_init: InitMemory, globals: &Globals, addr: u32, - stack_pointer: Option, + stack_pointer: GlobalId, stack_size: u32, memory: MemoryId, ) -> Result<(), Error> { @@ -393,30 +363,28 @@ fn inject_start( // we give ourselves a stack via memory.grow and we update our stack // pointer as the default stack pointer is surely wrong for us. |body| { - if let Some(stack_pointer) = stack_pointer { - // local0 = grow_memory(stack_size); - body.i32_const((stack_size / PAGE_SIZE) as i32) - .memory_grow(memory) - .local_set(local); + // local0 = grow_memory(stack_size); + body.i32_const((stack_size / PAGE_SIZE) as i32) + .memory_grow(memory) + .local_set(local); - // if local0 == -1 then trap - body.block(None, |body| { - let target = body.id(); - body.local_get(local) - .i32_const(-1) - .binop(BinaryOp::I32Ne) - .br_if(target) - .unreachable(); - }); - - // stack_pointer = local0 + stack_size + // if local0 == -1 then trap + body.block(None, |body| { + let target = body.id(); body.local_get(local) - .i32_const(PAGE_SIZE as i32) - .binop(BinaryOp::I32Mul) - .i32_const(stack_size as i32) - .binop(BinaryOp::I32Add) - .global_set(stack_pointer); - } + .i32_const(-1) + .binop(BinaryOp::I32Ne) + .br_if(target) + .unreachable(); + }); + + // stack_pointer = local0 + stack_size + body.local_get(local) + .i32_const(PAGE_SIZE as i32) + .binop(BinaryOp::I32Mul) + .i32_const(stack_size as i32) + .binop(BinaryOp::I32Add) + .global_set(stack_pointer); }, // If the thread ID is zero then we can skip the update of the stack // pointer as we know our stack pointer is valid. We need to initialize diff --git a/crates/wasm-conventions/Cargo.toml b/crates/wasm-conventions/Cargo.toml new file mode 100644 index 00000000..3f4dc059 --- /dev/null +++ b/crates/wasm-conventions/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "wasm-bindgen-wasm-conventions" +version = "0.2.50" +authors = ["The wasm-bindgen developers"] +license = "MIT/Apache-2.0" +repository = "https://github.com/rustwasm/wasm-bindgen/tree/master/crates/wasm-conventions" +homepage = "https://rustwasm.github.io/wasm-bindgen/" +documentation = "https://docs.rs/wasm-bindgen-wasm-conventions" +description = "Utilities for working with Wasm codegen conventions (usually established by LLVM/lld)" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +walrus = "0.12.0" +failure = "0.1.5" diff --git a/crates/wasm-conventions/src/lib.rs b/crates/wasm-conventions/src/lib.rs new file mode 100755 index 00000000..ab6f2ae6 --- /dev/null +++ b/crates/wasm-conventions/src/lib.rs @@ -0,0 +1,95 @@ +//! A tiny crate of utilities for working with implicit Wasm codegen conventions +//! (often established by LLVM and lld). +//! +//! Examples conventions include: +//! +//! * The shadow stack pointer +//! * The canonical linear memory that contains the shadow stack + +#![deny(missing_docs, missing_debug_implementations)] + +use failure::{bail, format_err, Error}; +use walrus::{GlobalId, GlobalKind, MemoryId, Module, ValType}; + +/// Get a Wasm module's canonical linear memory. +pub fn get_memory(module: &Module) -> Result { + let mut memories = module.memories.iter().map(|m| m.id()); + let memory = memories.next(); + if memories.next().is_some() { + bail!( + "expected a single memory, found multiple; multiple memories \ + currently not supported" + ); + } + memory.ok_or_else(|| { + format_err!( + "module does not have a memory; must have a memory \ + to transform return pointers into Wasm multi-value" + ) + }) +} + +/// Discover the shadow stack pointer and add it to the module's exports as +/// `__shadow_stack_pointer`. +/// +/// Adding it to the exports is useful for making sure it doesn't get GC'd. +pub fn export_shadow_stack_pointer(module: &mut Module) -> Result<(), Error> { + let candidates = module + .globals + .iter() + .filter(|g| g.ty == ValType::I32) + .filter(|g| g.mutable) + .filter(|g| match g.kind { + GlobalKind::Local(_) => true, + GlobalKind::Import(_) => false, + }) + .collect::>(); + + let ssp = match candidates.len() { + 0 => bail!("could not find the shadow stack pointer for the module"), + // If we've got two mutable globals then we're in a pretty standard + // situation for threaded code where one is the stack pointer and one is the + // TLS base offset. We need to figure out which is which, and we basically + // assume LLVM's current codegen where the first is the stack pointer. + // + // TODO: have an actual check here. + 1 | 2 => candidates[0].id(), + _ => bail!("too many mutable globals to infer which is the shadow stack pointer"), + }; + + module.exports.add("__shadow_stack_pointer", ssp); + Ok(()) +} + +/// Unexport the shadow stack pointer that was previously added to the module's +/// exports as `__shadow_stack_pointer`. +pub fn unexport_shadow_stack_pointer(module: &mut Module) -> Result<(), Error> { + let e = module + .exports + .iter() + .find(|e| e.name == "__shadow_stack_pointer") + .map(|e| e.id()) + .ok_or_else(|| { + format_err!("did not find the `__shadow_stack_pointer` export in the module") + })?; + module.exports.delete(e); + Ok(()) +} + +/// Get the `__shadow_stack_pointer`. +/// +/// It must have been previously added to the module's exports via +/// `export_shadow_stack_pointer`. +pub fn get_shadow_stack_pointer(module: &Module) -> Result { + module + .exports + .iter() + .find(|e| e.name == "__shadow_stack_pointer") + .ok_or_else(|| { + format_err!("did not find the `__shadow_stack_pointer` export in the module") + }) + .and_then(|e| match e.item { + walrus::ExportItem::Global(g) => Ok(g), + _ => bail!("`__shadow_stack_pointer` export is wrong kind"), + }) +}