From f781b0eb02d99659547eb0de59c63d4e8cd12f7d Mon Sep 17 00:00:00 2001 From: losfair Date: Tue, 9 Apr 2019 17:08:31 +0800 Subject: [PATCH] More floating point operators. --- lib/dynasm-backend/src/codegen_x64.rs | 109 +++++++++++++++++++++++++- lib/dynasm-backend/src/emitter_x64.rs | 17 ++++ 2 files changed, 123 insertions(+), 3 deletions(-) diff --git a/lib/dynasm-backend/src/codegen_x64.rs b/lib/dynasm-backend/src/codegen_x64.rs index 386f0ce52..012557f9c 100644 --- a/lib/dynasm-backend/src/codegen_x64.rs +++ b/lib/dynasm-backend/src/codegen_x64.rs @@ -571,6 +571,17 @@ impl X64FunctionCode { src1: Location, src2: Location, dst: Location, + ) { + Self::emit_relaxed_avx_base(a, m, |a, _, src1, src2, dst| op(a, src1, src2, dst), src1, src2, dst) + } + + fn emit_relaxed_avx_base( + a: &mut Assembler, + m: &mut Machine, + op: F, + src1: Location, + src2: Location, + dst: Location, ) { let tmp1 = m.acquire_temp_xmm().unwrap(); let tmp2 = m.acquire_temp_xmm().unwrap(); @@ -618,10 +629,10 @@ impl X64FunctionCode { match dst { Location::XMM(x) => { - op(a, src1, src2, x); + op(a, m, src1, src2, x); }, Location::Memory(_, _) => { - op(a, src1, src2, tmp3); + op(a, m, src1, src2, tmp3); a.emit_mov(Size::S64, Location::XMM(tmp3), dst); }, _ => unreachable!(), @@ -1422,7 +1433,7 @@ impl FunctionCodeGenerator for X64FunctionCode { Size::S32, loc, ret, ); } - + Operator::F32Const { value } => self.value_stack.push((Location::Imm32(value.bits()), LocalOrTemp::Temp)), Operator::F32Add => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vaddss), Operator::F32Sub => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsubss), @@ -1442,6 +1453,43 @@ impl FunctionCodeGenerator for X64FunctionCode { Operator::F32Trunc => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundss_trunc), Operator::F32Sqrt => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsqrtss), + Operator::F32Copysign => { + let loc_b = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations(a, &[WpType::F32], false)[0]; + + let tmp1 = self.machine.acquire_temp_gpr().unwrap(); + let tmp2 = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2)); + a.emit_and(Size::S32, Location::Imm32(0x7fffffffu32), Location::GPR(tmp1)); + a.emit_and(Size::S32, Location::Imm32(0x80000000u32), Location::GPR(tmp2)); + a.emit_or(Size::S32, Location::GPR(tmp2), Location::GPR(tmp1)); + a.emit_mov(Size::S32, Location::GPR(tmp1), ret); + self.machine.release_temp_gpr(tmp2); + self.machine.release_temp_gpr(tmp1); + } + + Operator::F32Abs => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations(a, &[WpType::F32], false)[0]; + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(tmp)); + a.emit_and(Size::S32, Location::Imm32(0x7fffffffu32), Location::GPR(tmp)); + a.emit_mov(Size::S32, Location::GPR(tmp), ret); + self.machine.release_temp_gpr(tmp); + } + + Operator::F32Neg => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations(a, &[WpType::F32], false)[0]; + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S32, loc, Location::GPR(tmp)); + a.emit_btc_gpr_imm8_32(31, tmp); + a.emit_mov(Size::S32, Location::GPR(tmp), ret); + self.machine.release_temp_gpr(tmp); + } + Operator::F64Const { value } => self.value_stack.push((Location::Imm64(value.bits()), LocalOrTemp::Temp)), Operator::F64Add => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vaddsd), Operator::F64Sub => Self::emit_fp_binop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsubsd), @@ -1461,6 +1509,61 @@ impl FunctionCodeGenerator for X64FunctionCode { Operator::F64Trunc => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vroundsd_trunc), Operator::F64Sqrt => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vsqrtsd), + Operator::F64Copysign => { + let loc_b = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let loc_a = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations(a, &[WpType::F64], false)[0]; + + let tmp1 = self.machine.acquire_temp_gpr().unwrap(); + let tmp2 = self.machine.acquire_temp_gpr().unwrap(); + let c = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1)); + a.emit_mov(Size::S64, loc_b, Location::GPR(tmp2)); + + a.emit_mov(Size::S64, Location::Imm64(0x7fffffffffffffffu64), Location::GPR(c)); + a.emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp1)); + + a.emit_mov(Size::S64, Location::Imm64(0x8000000000000000u64), Location::GPR(c)); + a.emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp2)); + + a.emit_or(Size::S64, Location::GPR(tmp2), Location::GPR(tmp1)); + a.emit_mov(Size::S64, Location::GPR(tmp1), ret); + + self.machine.release_temp_gpr(c); + self.machine.release_temp_gpr(tmp2); + self.machine.release_temp_gpr(tmp1); + } + + Operator::F64Abs => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations(a, &[WpType::F64], false)[0]; + + let tmp = self.machine.acquire_temp_gpr().unwrap(); + let c = self.machine.acquire_temp_gpr().unwrap(); + + a.emit_mov(Size::S64, loc, Location::GPR(tmp)); + a.emit_mov(Size::S64, Location::Imm64(0x7fffffffffffffffu64), Location::GPR(c)); + a.emit_and(Size::S64, Location::GPR(c), Location::GPR(tmp)); + a.emit_mov(Size::S64, Location::GPR(tmp), ret); + + self.machine.release_temp_gpr(c); + self.machine.release_temp_gpr(tmp); + } + + Operator::F64Neg => { + let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); + let ret = self.machine.acquire_locations(a, &[WpType::F64], false)[0]; + let tmp = self.machine.acquire_temp_gpr().unwrap(); + a.emit_mov(Size::S64, loc, Location::GPR(tmp)); + a.emit_btc_gpr_imm8_64(63, tmp); + a.emit_mov(Size::S64, Location::GPR(tmp), ret); + self.machine.release_temp_gpr(tmp); + } + + Operator::F64PromoteF32 => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcvtss2sd), + Operator::F32DemoteF64 => Self::emit_fp_unop_avx(a, &mut self.machine, &mut self.value_stack, Assembler::emit_vcvtsd2ss), + Operator::Call { function_index } => { let function_index = function_index as usize; let label = self diff --git a/lib/dynasm-backend/src/emitter_x64.rs b/lib/dynasm-backend/src/emitter_x64.rs index d5a426930..91c0bcab7 100644 --- a/lib/dynasm-backend/src/emitter_x64.rs +++ b/lib/dynasm-backend/src/emitter_x64.rs @@ -116,6 +116,9 @@ pub trait Emitter { fn emit_movzx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location); fn emit_movsx(&mut self, sz_src: Size, src: Location, sz_dst: Size, dst: Location); + fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR); + fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR); + fn emit_vaddss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); fn emit_vaddsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); fn emit_vsubss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); @@ -159,6 +162,9 @@ pub trait Emitter { fn emit_vroundsd_ceil(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); fn emit_vroundsd_trunc(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcvtss2sd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_vcvtsd2ss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM); + fn emit_ud2(&mut self); fn emit_ret(&mut self); fn emit_call_label(&mut self, label: Self::Label); @@ -661,6 +667,14 @@ impl Emitter for Assembler { } } + fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) { + dynasm!(self ; btc Rd(dst as u8), BYTE (src as i8)); + } + + fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR) { + dynasm!(self ; btc Rq(dst as u8), BYTE (src as i8)); + } + avx_fn!(vaddss, emit_vaddss); avx_fn!(vaddsd, emit_vaddsd); @@ -700,6 +714,9 @@ impl Emitter for Assembler { avx_fn!(vsqrtss, emit_vsqrtss); avx_fn!(vsqrtsd, emit_vsqrtsd); + avx_fn!(vcvtss2sd, emit_vcvtss2sd); + avx_fn!(vcvtsd2ss, emit_vcvtsd2ss); + avx_round_fn!(vroundss, emit_vroundss_nearest, 0); avx_round_fn!(vroundss, emit_vroundss_floor, 1); avx_round_fn!(vroundss, emit_vroundss_ceil, 2);