mirror of
https://github.com/fluencelabs/wasmer
synced 2025-06-13 00:51:20 +00:00
Implement F64Min and F64Max.
This commit is contained in:
@ -3183,18 +3183,211 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
|
||||
&mut self.value_stack,
|
||||
Assembler::emit_vdivsd,
|
||||
),
|
||||
Operator::F64Max => Self::emit_fp_binop_avx(
|
||||
a,
|
||||
&mut self.machine,
|
||||
&mut self.value_stack,
|
||||
Assembler::emit_vmaxsd,
|
||||
),
|
||||
Operator::F64Min => Self::emit_fp_binop_avx(
|
||||
a,
|
||||
&mut self.machine,
|
||||
&mut self.value_stack,
|
||||
Assembler::emit_vminsd,
|
||||
),
|
||||
Operator::F64Max => {
|
||||
let src2 =
|
||||
get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
|
||||
let src1 =
|
||||
get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
|
||||
let ret = self.machine.acquire_locations(
|
||||
a,
|
||||
&[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))],
|
||||
false,
|
||||
)[0];
|
||||
self.value_stack.push(ret);
|
||||
|
||||
let tmp1 = self.machine.acquire_temp_xmm().unwrap();
|
||||
let tmp2 = self.machine.acquire_temp_xmm().unwrap();
|
||||
let tmpg1 = self.machine.acquire_temp_gpr().unwrap();
|
||||
let tmpg2 = self.machine.acquire_temp_gpr().unwrap();
|
||||
|
||||
let src1 = match src1 {
|
||||
Location::XMM(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
a.emit_mov(Size::S64, src1, Location::XMM(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
a.emit_mov(Size::S32, src1, Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
a.emit_mov(Size::S64, src1, Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1));
|
||||
tmp1
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let src2 = match src2 {
|
||||
Location::XMM(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
a.emit_mov(Size::S64, src2, Location::XMM(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
a.emit_mov(Size::S32, src2, Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
a.emit_mov(Size::S64, src2, Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2));
|
||||
tmp2
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let tmp_xmm1 = XMM::XMM8;
|
||||
let tmp_xmm2 = XMM::XMM9;
|
||||
let tmp_xmm3 = XMM::XMM10;
|
||||
|
||||
static CANONICAL_NAN: u128 = 0x7FF8_0000_0000_0000;
|
||||
a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2));
|
||||
a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1));
|
||||
a.emit_vmaxsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
|
||||
let label1 = a.get_label();
|
||||
let label2 = a.get_label();
|
||||
a.emit_jmp(Condition::NotEqual, label1);
|
||||
a.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
|
||||
a.emit_jmp(Condition::None, label2);
|
||||
a.emit_label(label1);
|
||||
a.emit_vxorpd(tmp_xmm2, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm2);
|
||||
a.emit_label(label2);
|
||||
a.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
|
||||
a.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
|
||||
a.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1);
|
||||
// load float canonical nan
|
||||
a.emit_mov(
|
||||
Size::S64,
|
||||
Location::Imm64((&CANONICAL_NAN as *const u128) as u64),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2));
|
||||
a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
|
||||
match ret {
|
||||
Location::XMM(x) => {
|
||||
a.emit_vmovapd(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
|
||||
}
|
||||
Location::Memory(_, _) | Location::GPR(_) => {
|
||||
a.emit_mov(Size::S64, Location::XMM(src1), ret);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
self.machine.release_temp_gpr(tmpg2);
|
||||
self.machine.release_temp_gpr(tmpg1);
|
||||
self.machine.release_temp_xmm(tmp2);
|
||||
self.machine.release_temp_xmm(tmp1);
|
||||
}
|
||||
Operator::F64Min => {
|
||||
let src2 =
|
||||
get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
|
||||
let src1 =
|
||||
get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
|
||||
let ret = self.machine.acquire_locations(
|
||||
a,
|
||||
&[(WpType::F64, MachineValue::WasmStack(self.value_stack.len()))],
|
||||
false,
|
||||
)[0];
|
||||
self.value_stack.push(ret);
|
||||
|
||||
let tmp1 = self.machine.acquire_temp_xmm().unwrap();
|
||||
let tmp2 = self.machine.acquire_temp_xmm().unwrap();
|
||||
let tmpg1 = self.machine.acquire_temp_gpr().unwrap();
|
||||
let tmpg2 = self.machine.acquire_temp_gpr().unwrap();
|
||||
|
||||
let src1 = match src1 {
|
||||
Location::XMM(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
a.emit_mov(Size::S64, src1, Location::XMM(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
a.emit_mov(Size::S32, src1, Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp1));
|
||||
tmp1
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
a.emit_mov(Size::S64, src1, Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp1));
|
||||
tmp1
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
let src2 = match src2 {
|
||||
Location::XMM(x) => x,
|
||||
Location::GPR(_) | Location::Memory(_, _) => {
|
||||
a.emit_mov(Size::S64, src2, Location::XMM(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm32(_) => {
|
||||
a.emit_mov(Size::S32, src2, Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S32, Location::GPR(tmpg1), Location::XMM(tmp2));
|
||||
tmp2
|
||||
}
|
||||
Location::Imm64(_) => {
|
||||
a.emit_mov(Size::S64, src2, Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S64, Location::GPR(tmpg1), Location::XMM(tmp2));
|
||||
tmp2
|
||||
}
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
let tmp_xmm1 = XMM::XMM8;
|
||||
let tmp_xmm2 = XMM::XMM9;
|
||||
let tmp_xmm3 = XMM::XMM10;
|
||||
|
||||
static NEG_ZERO: u128 = 0x8000_0000_0000_0000;
|
||||
static CANONICAL_NAN: u128 = 0x7FF8_0000_0000_0000;
|
||||
a.emit_mov(Size::S64, Location::XMM(src1), Location::GPR(tmpg1));
|
||||
a.emit_mov(Size::S64, Location::XMM(src2), Location::GPR(tmpg2));
|
||||
a.emit_cmp(Size::S64, Location::GPR(tmpg2), Location::GPR(tmpg1));
|
||||
a.emit_vminsd(src1, XMMOrMemory::XMM(src2), tmp_xmm1);
|
||||
let label1 = a.get_label();
|
||||
let label2 = a.get_label();
|
||||
a.emit_jmp(Condition::NotEqual, label1);
|
||||
a.emit_vmovapd(XMMOrMemory::XMM(tmp_xmm1), XMMOrMemory::XMM(tmp_xmm2));
|
||||
a.emit_jmp(Condition::None, label2);
|
||||
a.emit_label(label1);
|
||||
// load float -0.0
|
||||
a.emit_mov(
|
||||
Size::S64,
|
||||
Location::Imm64((&NEG_ZERO as *const u128) as u64),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
a.emit_mov(
|
||||
Size::S64,
|
||||
Location::Memory(tmpg1, 0),
|
||||
Location::XMM(tmp_xmm2),
|
||||
);
|
||||
a.emit_label(label2);
|
||||
a.emit_vcmpeqsd(src1, XMMOrMemory::XMM(src2), tmp_xmm3);
|
||||
a.emit_vblendvpd(tmp_xmm3, XMMOrMemory::XMM(tmp_xmm2), tmp_xmm1, tmp_xmm1);
|
||||
a.emit_vcmpunordsd(src1, XMMOrMemory::XMM(src2), src1);
|
||||
// load float canonical nan
|
||||
a.emit_mov(
|
||||
Size::S64,
|
||||
Location::Imm64((&CANONICAL_NAN as *const u128) as u64),
|
||||
Location::GPR(tmpg1),
|
||||
);
|
||||
a.emit_mov(Size::S64, Location::Memory(tmpg1, 0), Location::XMM(src2));
|
||||
a.emit_vblendvpd(src1, XMMOrMemory::XMM(src2), tmp_xmm1, src1);
|
||||
match ret {
|
||||
Location::XMM(x) => {
|
||||
a.emit_vmovaps(XMMOrMemory::XMM(src1), XMMOrMemory::XMM(x));
|
||||
}
|
||||
Location::Memory(_, _) | Location::GPR(_) => {
|
||||
a.emit_mov(Size::S64, Location::XMM(src1), ret);
|
||||
}
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
self.machine.release_temp_gpr(tmpg2);
|
||||
self.machine.release_temp_gpr(tmpg1);
|
||||
self.machine.release_temp_xmm(tmp2);
|
||||
self.machine.release_temp_xmm(tmp1);
|
||||
}
|
||||
Operator::F64Eq => Self::emit_fp_cmpop_avx(
|
||||
a,
|
||||
&mut self.machine,
|
||||
|
@ -106,7 +106,9 @@ pub trait Emitter {
|
||||
fn emit_cmovae_gpr_64(&mut self, src: GPR, dst: GPR);
|
||||
|
||||
fn emit_vmovaps(&mut self, src: XMMOrMemory, dst: XMMOrMemory);
|
||||
fn emit_vmovapd(&mut self, src: XMMOrMemory, dst: XMMOrMemory);
|
||||
fn emit_vxorps(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vxorpd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
|
||||
fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM);
|
||||
@ -174,6 +176,7 @@ pub trait Emitter {
|
||||
fn emit_vcvtsi2sd_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM);
|
||||
|
||||
fn emit_vblendvps(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM);
|
||||
fn emit_vblendvpd(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM);
|
||||
|
||||
fn emit_test_gpr_64(&mut self, reg: GPR);
|
||||
|
||||
@ -1019,7 +1022,23 @@ impl Emitter for Assembler {
|
||||
};
|
||||
}
|
||||
|
||||
fn emit_vmovapd(&mut self, src: XMMOrMemory, dst: XMMOrMemory) {
|
||||
match (src, dst) {
|
||||
(XMMOrMemory::XMM(src), XMMOrMemory::XMM(dst)) => {
|
||||
dynasm!(self ; movapd Rx(dst as u8), Rx(src as u8))
|
||||
}
|
||||
(XMMOrMemory::Memory(base, disp), XMMOrMemory::XMM(dst)) => {
|
||||
dynasm!(self ; movapd Rx(dst as u8), [Rq(base as u8) + disp])
|
||||
}
|
||||
(XMMOrMemory::XMM(src), XMMOrMemory::Memory(base, disp)) => {
|
||||
dynasm!(self ; movapd [Rq(base as u8) + disp], Rx(src as u8))
|
||||
}
|
||||
_ => panic!("singlepass can't emit VMOVAPD {:?} {:?}", src, dst),
|
||||
};
|
||||
}
|
||||
|
||||
avx_fn!(vxorps, emit_vxorps);
|
||||
avx_fn!(vxorpd, emit_vxorpd);
|
||||
|
||||
avx_fn!(vaddss, emit_vaddss);
|
||||
avx_fn!(vaddsd, emit_vaddsd);
|
||||
@ -1094,6 +1113,17 @@ impl Emitter for Assembler {
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_vblendvpd(&mut self, src1: XMM, src2: XMMOrMemory, mask: XMM, dst: XMM) {
|
||||
match src2 {
|
||||
XMMOrMemory::XMM(src2) => {
|
||||
dynasm!(self ; vblendvpd Rx(dst as u8), Rx(mask as u8), Rx(src2 as u8), Rx(src1 as u8))
|
||||
}
|
||||
XMMOrMemory::Memory(base, disp) => {
|
||||
dynasm!(self ; vblendvpd Rx(dst as u8), Rx(mask as u8), [Rq(base as u8) + disp], Rx(src1 as u8))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn emit_ucomiss(&mut self, src: XMMOrMemory, dst: XMM) {
|
||||
match src {
|
||||
XMMOrMemory::XMM(x) => dynasm!(self ; ucomiss Rx(dst as u8), Rx(x as u8)),
|
||||
|
Reference in New Issue
Block a user