diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs index 20c9a0c21..5afa0f928 100644 --- a/lib/singlepass-backend/src/codegen_x64.rs +++ b/lib/singlepass-backend/src/codegen_x64.rs @@ -3277,24 +3277,48 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f32_int_conv_check(a, &mut self.machine, tmp_in, -1.0, 4294967296.0); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_uf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check(a, &mut self.machine, tmp_in, -1.0, 4294967296.0); - a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I32TruncSF32 => { @@ -3306,30 +3330,55 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f32_int_conv_check( - a, - &mut self.machine, - tmp_in, - -2147483904.0, - 2147483648.0, - ); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_sf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - a.emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check( + a, + &mut self.machine, + tmp_in, + -2147483904.0, + 2147483648.0, + ); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + a.emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I64TruncSF32 => { @@ -3341,47 +3390,57 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f32_int_conv_check( - a, - &mut self.machine, - tmp_in, - -9223373136366403584.0, - 9223372036854775808.0, - ); - a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_sf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check( + a, + &mut self.machine, + tmp_in, + -9223373136366403584.0, + 9223372036854775808.0, + ); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I64TruncUF32 => { - /* - ; movq xmm5, r15 - ; mov r15d, 1593835520u32 as i32 //float 9.22337203E+18 - ; movd xmm1, r15d - ; movd xmm2, Rd(reg as u8) - ; movd xmm3, Rd(reg as u8) - ; subss xmm2, xmm1 - ; cvttss2si Rq(reg as u8), xmm2 - ; mov r15, QWORD 0x8000000000000000u64 as i64 - ; xor r15, Rq(reg as u8) - ; cvttss2si Rq(reg as u8), xmm3 - ; ucomiss xmm3, xmm1 - ; cmovae Rq(reg as u8), r15 - ; movq r15, xmm5 - */ let loc = get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap()); let ret = self.machine.acquire_locations( @@ -3390,54 +3449,79 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S32, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f32_int_conv_check( - a, - &mut self.machine, - tmp_in, - -1.0, - 18446744073709551616.0, - ); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_uf32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 - let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 - let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 - let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f32_int_conv_check( + a, + &mut self.machine, + tmp_in, + -1.0, + 18446744073709551616.0, + ); - a.emit_mov( - Size::S32, - Location::Imm32(1593835520u32), - Location::GPR(tmp), - ); //float 9.22337203E+18 - a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x1)); - a.emit_mov(Size::S32, Location::XMM(tmp_in), Location::XMM(tmp_x2)); - a.emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); - a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov( - Size::S64, - Location::Imm64(0x8000000000000000u64), - Location::GPR(tmp), - ); - a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); - a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); - a.emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2); - a.emit_cmovae_gpr_64(tmp, tmp_out); - a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 + let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 + let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 - self.machine.release_temp_xmm(tmp_x2); - self.machine.release_temp_xmm(tmp_x1); - self.machine.release_temp_gpr(tmp); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + a.emit_mov( + Size::S32, + Location::Imm32(1593835520u32), + Location::GPR(tmp), + ); //float 9.22337203E+18 + a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x1)); + a.emit_mov(Size::S32, Location::XMM(tmp_in), Location::XMM(tmp_x2)); + a.emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp), + ); + a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); + a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); + a.emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2); + a.emit_cmovae_gpr_64(tmp, tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_x2); + self.machine.release_temp_xmm(tmp_x1); + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I32TruncUF64 => { @@ -3449,24 +3533,49 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f64_int_conv_check(a, &mut self.machine, tmp_in, -1.0, 4294967296.0); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_uf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check(a, &mut self.machine, tmp_in, -1.0, 4294967296.0); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I32TruncSF64 => { @@ -3478,35 +3587,60 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - let real_in = match loc { - Location::Imm32(_) | Location::Imm64(_) => { - a.emit_mov(Size::S64, loc, Location::GPR(tmp_out)); - a.emit_mov(Size::S64, Location::GPR(tmp_out), Location::XMM(tmp_in)); - tmp_in - } - Location::XMM(x) => x, - _ => { - a.emit_mov(Size::S64, loc, Location::XMM(tmp_in)); - tmp_in - } - }; + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i32_trunc_sf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_f64_int_conv_check( - a, - &mut self.machine, - real_in, - -2147483649.0, - 2147483648.0, - ); + let real_in = match loc { + Location::Imm32(_) | Location::Imm64(_) => { + a.emit_mov(Size::S64, loc, Location::GPR(tmp_out)); + a.emit_mov(Size::S64, Location::GPR(tmp_out), Location::XMM(tmp_in)); + tmp_in + } + Location::XMM(x) => x, + _ => { + a.emit_mov(Size::S64, loc, Location::XMM(tmp_in)); + tmp_in + } + }; - a.emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out); - a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + Self::emit_f64_int_conv_check( + a, + &mut self.machine, + real_in, + -2147483649.0, + 2147483648.0, + ); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + a.emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out); + a.emit_mov(Size::S32, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I64TruncSF64 => { @@ -3518,30 +3652,55 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f64_int_conv_check( - a, - &mut self.machine, - tmp_in, - -9223372036854777856.0, - 9223372036854775808.0, - ); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_sf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); - a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check( + a, + &mut self.machine, + tmp_in, + -9223372036854777856.0, + 9223372036854775808.0, + ); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::I64TruncUF64 => { @@ -3553,54 +3712,79 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_gpr().unwrap(); - let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 - Self::emit_relaxed_binop( - a, - &mut self.machine, - Assembler::emit_mov, - Size::S64, - loc, - Location::XMM(tmp_in), - ); - Self::emit_f64_int_conv_check( - a, - &mut self.machine, - tmp_in, - -1.0, - 18446744073709551616.0, - ); + if a.arch_has_itruncf() { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + a.arch_emit_i64_trunc_uf64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::GPR(tmp_out), + ret, + ); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_gpr().unwrap(); + let tmp_in = self.machine.acquire_temp_xmm().unwrap(); // xmm2 - let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 - let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 - let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::XMM(tmp_in), + ); + Self::emit_f64_int_conv_check( + a, + &mut self.machine, + tmp_in, + -1.0, + 18446744073709551616.0, + ); - a.emit_mov( - Size::S64, - Location::Imm64(4890909195324358656u64), - Location::GPR(tmp), - ); //double 9.2233720368547758E+18 - a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x1)); - a.emit_mov(Size::S64, Location::XMM(tmp_in), Location::XMM(tmp_x2)); - a.emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); - a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); - a.emit_mov( - Size::S64, - Location::Imm64(0x8000000000000000u64), - Location::GPR(tmp), - ); - a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); - a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); - a.emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2); - a.emit_cmovae_gpr_64(tmp, tmp_out); - a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + let tmp = self.machine.acquire_temp_gpr().unwrap(); // r15 + let tmp_x1 = self.machine.acquire_temp_xmm().unwrap(); // xmm1 + let tmp_x2 = self.machine.acquire_temp_xmm().unwrap(); // xmm3 - self.machine.release_temp_xmm(tmp_x2); - self.machine.release_temp_xmm(tmp_x1); - self.machine.release_temp_gpr(tmp); - self.machine.release_temp_xmm(tmp_in); - self.machine.release_temp_gpr(tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(4890909195324358656u64), + Location::GPR(tmp), + ); //double 9.2233720368547758E+18 + a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x1)); + a.emit_mov(Size::S64, Location::XMM(tmp_in), Location::XMM(tmp_x2)); + a.emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); + a.emit_mov( + Size::S64, + Location::Imm64(0x8000000000000000u64), + Location::GPR(tmp), + ); + a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); + a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); + a.emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2); + a.emit_cmovae_gpr_64(tmp, tmp_out); + a.emit_mov(Size::S64, Location::GPR(tmp_out), ret); + + self.machine.release_temp_xmm(tmp_x2); + self.machine.release_temp_xmm(tmp_x1); + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_xmm(tmp_in); + self.machine.release_temp_gpr(tmp_out); + } } Operator::F32ConvertSI32 => { @@ -3612,15 +3796,40 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2ss_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_si32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F32ConvertUI32 => { let loc = @@ -3631,15 +3840,39 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_ui32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F32ConvertSI64 => { let loc = @@ -3650,15 +3883,39 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_si64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F32ConvertUI64 => { let loc = @@ -3669,31 +3926,55 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - let tmp = self.machine.acquire_temp_gpr().unwrap(); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f32_convert_ui64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + let tmp = self.machine.acquire_temp_gpr().unwrap(); - let do_convert = a.get_label(); - let end_convert = a.get_label(); + let do_convert = a.get_label(); + let end_convert = a.get_label(); - a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); - a.emit_test_gpr_64(tmp_in); - a.emit_jmp(Condition::Signed, do_convert); - a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_jmp(Condition::None, end_convert); - a.emit_label(do_convert); - a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); - a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); - a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); - a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); - a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_vaddss(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); - a.emit_label(end_convert); - a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_test_gpr_64(tmp_in); + a.emit_jmp(Condition::Signed, do_convert); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_jmp(Condition::None, end_convert); + a.emit_label(do_convert); + a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); + a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); + a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); + a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); + a.emit_vcvtsi2ss_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_vaddss(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); + a.emit_label(end_convert); + a.emit_mov(Size::S32, Location::XMM(tmp_out), ret); - self.machine.release_temp_gpr(tmp); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F64ConvertSI32 => { @@ -3705,15 +3986,40 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2sd_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_si32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_32(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F64ConvertUI32 => { let loc = @@ -3724,15 +4030,40 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S32, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_ui32(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + a.emit_mov(Size::S32, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F64ConvertSI64 => { let loc = @@ -3743,15 +4074,40 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); - a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_si64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::F64ConvertUI64 => { let loc = @@ -3762,31 +4118,56 @@ impl FunctionCodeGenerator for X64FunctionCode { false, )[0]; self.value_stack.push(ret); - let tmp_out = self.machine.acquire_temp_xmm().unwrap(); - let tmp_in = self.machine.acquire_temp_gpr().unwrap(); - let tmp = self.machine.acquire_temp_gpr().unwrap(); - let do_convert = a.get_label(); - let end_convert = a.get_label(); + if a.arch_has_fconverti() { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + loc, + Location::GPR(tmp_in), + ); + a.arch_emit_f64_convert_ui64(tmp_in, tmp_out); + Self::emit_relaxed_binop( + a, + &mut self.machine, + Assembler::emit_mov, + Size::S64, + Location::XMM(tmp_out), + ret, + ); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } else { + let tmp_out = self.machine.acquire_temp_xmm().unwrap(); + let tmp_in = self.machine.acquire_temp_gpr().unwrap(); + let tmp = self.machine.acquire_temp_gpr().unwrap(); - a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); - a.emit_test_gpr_64(tmp_in); - a.emit_jmp(Condition::Signed, do_convert); - a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_jmp(Condition::None, end_convert); - a.emit_label(do_convert); - a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); - a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); - a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); - a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); - a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); - a.emit_vaddsd(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); - a.emit_label(end_convert); - a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + let do_convert = a.get_label(); + let end_convert = a.get_label(); - self.machine.release_temp_gpr(tmp); - self.machine.release_temp_gpr(tmp_in); - self.machine.release_temp_xmm(tmp_out); + a.emit_mov(Size::S64, loc, Location::GPR(tmp_in)); + a.emit_test_gpr_64(tmp_in); + a.emit_jmp(Condition::Signed, do_convert); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_jmp(Condition::None, end_convert); + a.emit_label(do_convert); + a.emit_mov(Size::S64, Location::GPR(tmp_in), Location::GPR(tmp)); + a.emit_and(Size::S64, Location::Imm32(1), Location::GPR(tmp)); + a.emit_shr(Size::S64, Location::Imm8(1), Location::GPR(tmp_in)); + a.emit_or(Size::S64, Location::GPR(tmp), Location::GPR(tmp_in)); + a.emit_vcvtsi2sd_64(tmp_out, GPROrMemory::GPR(tmp_in), tmp_out); + a.emit_vaddsd(tmp_out, XMMOrMemory::XMM(tmp_out), tmp_out); + a.emit_label(end_convert); + a.emit_mov(Size::S64, Location::XMM(tmp_out), ret); + + self.machine.release_temp_gpr(tmp); + self.machine.release_temp_gpr(tmp_in); + self.machine.release_temp_xmm(tmp_out); + } } Operator::Call { function_index } => { diff --git a/lib/singlepass-backend/src/emitter_x64.rs b/lib/singlepass-backend/src/emitter_x64.rs index 1d0127cf7..f620e5f82 100644 --- a/lib/singlepass-backend/src/emitter_x64.rs +++ b/lib/singlepass-backend/src/emitter_x64.rs @@ -173,6 +173,26 @@ pub trait Emitter { fn emit_homomorphic_host_redirection(&mut self, target: GPR); fn emit_inline_breakpoint(&mut self, ty: InlineBreakpointType); + + fn arch_has_itruncf(&self) -> bool { false } + fn arch_emit_i32_trunc_sf32(&mut self, _src: XMM, _dst: GPR) { unimplemented!() } + fn arch_emit_i32_trunc_sf64(&mut self, _src: XMM, _dst: GPR) { unimplemented!() } + fn arch_emit_i32_trunc_uf32(&mut self, _src: XMM, _dst: GPR) { unimplemented!() } + fn arch_emit_i32_trunc_uf64(&mut self, _src: XMM, _dst: GPR) { unimplemented!() } + fn arch_emit_i64_trunc_sf32(&mut self, _src: XMM, _dst: GPR) { unimplemented!() } + fn arch_emit_i64_trunc_sf64(&mut self, _src: XMM, _dst: GPR) { unimplemented!() } + fn arch_emit_i64_trunc_uf32(&mut self, _src: XMM, _dst: GPR) { unimplemented!() } + fn arch_emit_i64_trunc_uf64(&mut self, _src: XMM, _dst: GPR) { unimplemented!() } + + fn arch_has_fconverti(&self) -> bool { false } + fn arch_emit_f32_convert_si32(&mut self, _src: GPR, _dst: XMM) { unimplemented!() } + fn arch_emit_f32_convert_si64(&mut self, _src: GPR, _dst: XMM) { unimplemented!() } + fn arch_emit_f32_convert_ui32(&mut self, _src: GPR, _dst: XMM) { unimplemented!() } + fn arch_emit_f32_convert_ui64(&mut self, _src: GPR, _dst: XMM) { unimplemented!() } + fn arch_emit_f64_convert_si32(&mut self, _src: GPR, _dst: XMM) { unimplemented!() } + fn arch_emit_f64_convert_si64(&mut self, _src: GPR, _dst: XMM) { unimplemented!() } + fn arch_emit_f64_convert_ui32(&mut self, _src: GPR, _dst: XMM) { unimplemented!() } + fn arch_emit_f64_convert_ui64(&mut self, _src: GPR, _dst: XMM) { unimplemented!() } } fn _dummy(a: &mut Assembler) { diff --git a/lib/singlepass-backend/src/translator_aarch64.rs b/lib/singlepass-backend/src/translator_aarch64.rs index d72d389ca..5df26714f 100644 --- a/lib/singlepass-backend/src/translator_aarch64.rs +++ b/lib/singlepass-backend/src/translator_aarch64.rs @@ -400,6 +400,57 @@ macro_rules! avx_fn { } } +macro_rules! avx_fn_bitwise_inv { + ($ins:ident, $width:ident, $width_int:ident, $name:ident) => { + fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match src2 { + XMMOrMemory::XMM(src2) => dynasm!(self ; $ins $width(map_xmm(dst).v()), $width(map_xmm(src1).v()), $width(map_xmm(src2).v())), + XMMOrMemory::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { + fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { + match src2 { + XMMOrMemory::XMM(src2) => dynasm!(self ; $ins $width(map_xmm(dst).v()), $width(map_xmm(src2).v()), $width(map_xmm(src1).v())), + XMMOrMemory::Memory(base, disp) => { + if disp >= 0 { + dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, { fn $name(&mut self, src1: XMM, _src2: XMMOrMemory, dst: XMM) { @@ -1312,20 +1363,6 @@ impl Emitter for Assembler { } } - // TODO: These instructions are only used in FP opcodes. Implement later. - fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) { - dynasm!(self ; .dword 0 ; .dword 29) - } - fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR) { - dynasm!(self ; .dword 0 ; .dword 29) - } - fn emit_cmovae_gpr_32(&mut self, src: GPR, dst: GPR) { - dynasm!(self ; .dword 0 ; .dword 29) - } - fn emit_cmovae_gpr_64(&mut self, src: GPR, dst: GPR) { - dynasm!(self ; .dword 0 ; .dword 29) - } - avx_fn!(fadd, S, W, emit_vaddss); avx_fn!(fsub, S, W, emit_vsubss); avx_fn!(fmul, S, W, emit_vmulss); @@ -1333,8 +1370,11 @@ impl Emitter for Assembler { avx_fn!(fmax, S, W, emit_vmaxss); avx_fn!(fmin, S, W, emit_vminss); avx_fn!(fcmgt, S, W, emit_vcmpgtss); + avx_fn_reversed!(fcmgt, S, W, emit_vcmpltss); // b gt a <=> a lt b avx_fn!(fcmge, S, W, emit_vcmpgess); + avx_fn_bitwise_inv!(fcmgt, S, W, emit_vcmpless); // a not gt b <=> a le b avx_fn!(fcmeq, S, W, emit_vcmpeqss); + avx_fn_bitwise_inv!(fcmeq, S, W, emit_vcmpneqss); // a not eq b <=> a neq b avx_fn_unop!(fsqrt, S, emit_vsqrtss); avx_fn_unop!(frintn, S, emit_vroundss_nearest); // to nearest with ties to even avx_fn_unop!(frintm, S, emit_vroundss_floor); // toward minus infinity @@ -1349,8 +1389,11 @@ impl Emitter for Assembler { avx_fn!(fmax, D, X, emit_vmaxsd); avx_fn!(fmin, D, X, emit_vminsd); avx_fn!(fcmgt, D, X, emit_vcmpgtsd); + avx_fn_reversed!(fcmgt, D, X, emit_vcmpltsd); // b gt a <=> a lt b avx_fn!(fcmge, D, X, emit_vcmpgesd); + avx_fn_bitwise_inv!(fcmgt, D, X, emit_vcmplesd); // a not gt b <=> a le b avx_fn!(fcmeq, D, X, emit_vcmpeqsd); + avx_fn_bitwise_inv!(fcmeq, D, X, emit_vcmpneqsd); // a not eq b <=> a neq b avx_fn_unop!(fsqrt, D, emit_vsqrtsd); avx_fn_unop!(frintn, D, emit_vroundsd_nearest); // to nearest with ties to even avx_fn_unop!(frintm, D, emit_vroundsd_floor); // toward minus infinity @@ -1358,63 +1401,103 @@ impl Emitter for Assembler { avx_fn_unop!(frintz, D, emit_vroundsd_trunc); // toward zero avx_fn_cvt!(fcvt, D, S, emit_vcvtsd2ss); - fn emit_vcmpneqss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + fn arch_has_itruncf(&self) -> bool { true } + fn arch_emit_i32_trunc_sf32(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzs W(map_gpr(dst).x()), S(map_xmm(src).v())); } - fn emit_vcmpneqsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + fn arch_emit_i32_trunc_sf64(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzs W(map_gpr(dst).x()), D(map_xmm(src).v())); + } + fn arch_emit_i32_trunc_uf32(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzu W(map_gpr(dst).x()), S(map_xmm(src).v())); + } + fn arch_emit_i32_trunc_uf64(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzu W(map_gpr(dst).x()), D(map_xmm(src).v())); + } + fn arch_emit_i64_trunc_sf32(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzs X(map_gpr(dst).x()), S(map_xmm(src).v())); + } + fn arch_emit_i64_trunc_sf64(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzs X(map_gpr(dst).x()), D(map_xmm(src).v())); + } + fn arch_emit_i64_trunc_uf32(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzu X(map_gpr(dst).x()), S(map_xmm(src).v())); + } + fn arch_emit_i64_trunc_uf64(&mut self, src: XMM, dst: GPR) { + dynasm!(self ; fcvtzu X(map_gpr(dst).x()), D(map_xmm(src).v())); } - fn emit_vcmpltss(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + fn arch_has_fconverti(&self) -> bool { true } + fn arch_emit_f32_convert_si32(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; scvtf S(map_xmm(dst).v()), W(map_gpr(src).x())); } - fn emit_vcmpltsd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + fn arch_emit_f32_convert_si64(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; scvtf S(map_xmm(dst).v()), X(map_gpr(src).x())); + } + fn arch_emit_f32_convert_ui32(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; ucvtf S(map_xmm(dst).v()), W(map_gpr(src).x())); + } + fn arch_emit_f32_convert_ui64(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; ucvtf S(map_xmm(dst).v()), X(map_gpr(src).x())); + } + fn arch_emit_f64_convert_si32(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; scvtf D(map_xmm(dst).v()), W(map_gpr(src).x())); + } + fn arch_emit_f64_convert_si64(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; scvtf D(map_xmm(dst).v()), X(map_gpr(src).x())); + } + fn arch_emit_f64_convert_ui32(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; ucvtf D(map_xmm(dst).v()), W(map_gpr(src).x())); + } + fn arch_emit_f64_convert_ui64(&mut self, src: GPR, dst: XMM) { + dynasm!(self ; ucvtf D(map_xmm(dst).v()), X(map_gpr(src).x())); } - fn emit_vcmpless(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + // These instructions are only used in itruncf-type/fconverti-type opcodes. + fn emit_btc_gpr_imm8_32(&mut self, src: u8, dst: GPR) { + unimplemented!(); } - fn emit_vcmplesd(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + fn emit_btc_gpr_imm8_64(&mut self, src: u8, dst: GPR) { + unimplemented!(); + } + fn emit_cmovae_gpr_32(&mut self, src: GPR, dst: GPR) { + unimplemented!(); + } + fn emit_cmovae_gpr_64(&mut self, src: GPR, dst: GPR) { + unimplemented!(); } - - fn emit_ucomiss(&mut self, src: XMMOrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } fn emit_ucomisd(&mut self, src: XMMOrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } - fn emit_cvttss2si_32(&mut self, src: XMMOrMemory, dst: GPR) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } fn emit_cvttss2si_64(&mut self, src: XMMOrMemory, dst: GPR) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } fn emit_cvttsd2si_32(&mut self, src: XMMOrMemory, dst: GPR) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } fn emit_cvttsd2si_64(&mut self, src: XMMOrMemory, dst: GPR) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } - fn emit_vcvtsi2ss_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } fn emit_vcvtsi2ss_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } fn emit_vcvtsi2sd_32(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } fn emit_vcvtsi2sd_64(&mut self, src1: XMM, src2: GPROrMemory, dst: XMM) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } - fn emit_test_gpr_64(&mut self, reg: GPR) { - dynasm!(self ; .dword 0 ; .dword 29) + unimplemented!(); } fn emit_ud2(&mut self) {