Insert trampolines to preserve callee-saved registers for backends without register save area information.

This commit is contained in:
losfair
2019-08-15 19:10:24 -07:00
parent 0a54213d4f
commit 6a24485999
4 changed files with 68 additions and 15 deletions

View File

@ -1,8 +1,9 @@
mod raw {
pub mod raw {
use std::ffi::c_void;
extern "C" {
pub fn run_on_alternative_stack(stack_end: *mut u64, stack_begin: *mut u64) -> u64;
pub fn register_preservation_trampoline(); // NOT safe to call directly
pub fn setjmp(env: *mut c_void) -> i32;
pub fn longjmp(env: *mut c_void, val: i32) -> !;
}
@ -39,11 +40,27 @@ struct UnwindInfo {
payload: Option<Box<dyn Any>>, // out
}
#[repr(packed)]
#[derive(Default, Copy, Clone)]
pub struct BoundaryRegisterPreservation {
pub r15: u64,
pub r14: u64,
pub r13: u64,
pub r12: u64,
pub rbx: u64,
}
thread_local! {
static UNWIND: UnsafeCell<Option<UnwindInfo>> = UnsafeCell::new(None);
static CURRENT_CTX: UnsafeCell<*mut vm::Ctx> = UnsafeCell::new(::std::ptr::null_mut());
static CURRENT_CODE_VERSIONS: RefCell<Vec<CodeVersion>> = RefCell::new(vec![]);
static WAS_SIGINT_TRIGGERED: Cell<bool> = Cell::new(false);
static BOUNDARY_REGISTER_PRESERVATION: UnsafeCell<BoundaryRegisterPreservation> = UnsafeCell::new(BoundaryRegisterPreservation::default());
}
#[no_mangle]
pub unsafe extern "C" fn get_boundary_register_preservation() -> *mut BoundaryRegisterPreservation {
BOUNDARY_REGISTER_PRESERVATION.with(|x| x.get())
}
struct InterruptSignalMem(*mut u8);

View File

@ -104,6 +104,7 @@ pub struct InstanceImage {
#[derive(Debug, Clone)]
pub struct CodeVersion {
pub baseline: bool,
pub msm: ModuleStateMap,
pub base: usize,
}
@ -372,7 +373,7 @@ impl InstanceImage {
pub mod x64 {
use super::*;
use crate::codegen::BreakpointMap;
use crate::fault::{catch_unsafe_unwind, run_on_alternative_stack};
use crate::fault::{catch_unsafe_unwind, run_on_alternative_stack, get_boundary_register_preservation};
use crate::structures::TypedIndex;
use crate::types::LocalGlobalIndex;
use crate::vm::Ctx;
@ -763,8 +764,9 @@ pub mod x64 {
) -> ExecutionStateImage {
let mut known_registers: [Option<u64>; 24] = initially_known_registers;
let mut results: Vec<WasmFunctionStateDump> = vec![];
let mut was_baseline = true;
for _ in 0.. {
for i in 0.. {
let ret_addr = initial_address.take().unwrap_or_else(|| {
let x = *stack;
stack = stack.offset(1);
@ -772,6 +774,7 @@ pub mod x64 {
});
let mut fsm_state: Option<(&FunctionStateMap, MachineState)> = None;
let mut is_baseline: Option<bool> = None;
for version in versions() {
match version
@ -786,6 +789,7 @@ pub mod x64 {
{
Some(x) => {
fsm_state = Some(x);
is_baseline = Some(version.baseline);
break;
}
None => {}
@ -798,6 +802,22 @@ pub mod x64 {
return ExecutionStateImage { frames: results };
};
{
let is_baseline = is_baseline.unwrap();
// Are we unwinding through an optimized/baseline boundary?
if is_baseline && !was_baseline {
let callee_saved = &*get_boundary_register_preservation();
known_registers[X64Register::GPR(GPR::R15).to_index().0] = Some(callee_saved.r15);
known_registers[X64Register::GPR(GPR::R14).to_index().0] = Some(callee_saved.r14);
known_registers[X64Register::GPR(GPR::R13).to_index().0] = Some(callee_saved.r13);
known_registers[X64Register::GPR(GPR::R12).to_index().0] = Some(callee_saved.r12);
known_registers[X64Register::GPR(GPR::RBX).to_index().0] = Some(callee_saved.rbx);
}
was_baseline = is_baseline;
}
let mut wasm_stack: Vec<Option<u64>> = state
.wasm_stack
.iter()

View File

@ -111,6 +111,7 @@ pub fn run_tiering<F: Fn(InteractiveShellContext) -> ShellExitOperation>(
let mut optimized_instances: Vec<Instance> = vec![];
push_code_version(CodeVersion {
baseline: true,
msm: baseline
.module
.runnable_module
@ -166,6 +167,7 @@ pub fn run_tiering<F: Fn(InteractiveShellContext) -> ShellExitOperation>(
}
push_code_version(CodeVersion {
baseline: false,
msm: optimized
.module
.runnable_module
@ -179,6 +181,8 @@ pub fn run_tiering<F: Fn(InteractiveShellContext) -> ShellExitOperation>(
.as_ptr() as usize,
});
n_versions.set(n_versions.get() + 1);
baseline.context_mut().local_functions = optimized.context_mut().local_functions;
}
// TODO: Fix this for optimized version.
let breakpoints = baseline.module.runnable_module.get_breakpoints();

View File

@ -32,6 +32,7 @@ use wasmer_runtime_core::{
TableIndex, Type,
},
vm::{self, LocalGlobal, LocalTable, INTERNALS_SIZE},
fault::raw::register_preservation_trampoline,
};
use wasmparser::{Operator, Type as WpType, TypeOrFuncType as WpTypeOrFuncType};
@ -220,24 +221,34 @@ impl RunnableModule for X64ExecutionContext {
}
unsafe fn patch_local_function(&self, idx: usize, target_address: usize) -> bool {
// movabsq ?, %rax;
// jmpq *%rax;
/*
0: 48 b8 42 42 42 42 42 42 42 42 movabsq $4774451407313060418, %rax
a: 49 bb 43 43 43 43 43 43 43 43 movabsq $4846791580151137091, %r11
14: 41 ff e3 jmpq *%r11
*/
#[repr(packed)]
struct Trampoline {
movabsq: [u8; 2],
addr: u64,
jmpq: [u8; 2],
movabsq_rax: [u8; 2],
addr_rax: u64,
movabsq_r11: [u8; 2],
addr_r11: u64,
jmpq_r11: [u8; 3],
}
self.code.make_writable();
let trampoline = &mut *(self.function_pointers[self.func_import_count + idx].0
as *const Trampoline as *mut Trampoline);
trampoline.movabsq[0] = 0x48;
trampoline.movabsq[1] = 0xb8;
trampoline.addr = target_address as u64;
trampoline.jmpq[0] = 0xff;
trampoline.jmpq[1] = 0xe0;
trampoline.movabsq_rax[0] = 0x48;
trampoline.movabsq_rax[1] = 0xb8;
trampoline.addr_rax = target_address as u64;
trampoline.movabsq_r11[0] = 0x49;
trampoline.movabsq_r11[1] = 0xbb;
trampoline.addr_r11 = register_preservation_trampoline as unsafe extern "C" fn() as usize as u64;
trampoline.jmpq_r11[0] = 0x41;
trampoline.jmpq_r11[1] = 0xff;
trampoline.jmpq_r11[2] = 0xe3;
self.code.make_executable();
true
@ -1677,9 +1688,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
fn begin_body(&mut self, _module_info: &ModuleInfo) -> Result<(), CodegenError> {
let a = self.assembler.as_mut().unwrap();
let start_label = a.get_label();
// skip the patchpoint during normal execution
a.emit_jmp(Condition::None, start_label);
// patchpoint of 16 bytes
for _ in 0..16 {
// patchpoint of 32 bytes
for _ in 0..32 {
a.emit_nop();
}
a.emit_label(start_label);