Add support for non-trapping float to int conversions in singlepass+AArch64.

This commit is contained in:
Nick Lewycky
2019-12-16 15:15:49 -08:00
parent 32ed6f2c10
commit b7929e6561

View File

@ -4763,7 +4763,11 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
}, },
None::<fn(_a: &mut Assembler, _m: &mut Machine)>, None::<fn(_a: &mut Assembler, _m: &mut Machine)>,
|a, _m| { |a, _m| {
a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); if a.arch_has_itruncf() {
a.arch_emit_i32_trunc_uf32(tmp_in, tmp_out);
} else {
a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out);
}
}, },
); );
@ -4876,7 +4880,11 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
a.emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out)); a.emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out));
}), }),
|a, _m| { |a, _m| {
a.emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out); if a.arch_has_itruncf() {
a.arch_emit_i32_trunc_sf32(tmp_in, tmp_out);
} else {
a.emit_cvttss2si_32(XMMOrMemory::XMM(tmp_in), tmp_out);
}
}, },
); );
@ -4989,7 +4997,11 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
a.emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out)); a.emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out));
}), }),
|a, _m| { |a, _m| {
a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); if a.arch_has_itruncf() {
a.arch_emit_i64_trunc_sf32(tmp_in, tmp_out);
} else {
a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out);
}
}, },
); );
@ -5120,32 +5132,36 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
}, },
None::<fn(_a: &mut Assembler, _m: &mut Machine)>, None::<fn(_a: &mut Assembler, _m: &mut Machine)>,
|a, m| { |a, m| {
let tmp = m.acquire_temp_gpr().unwrap(); // r15 if a.arch_has_itruncf() {
let tmp_x1 = m.acquire_temp_xmm().unwrap(); // xmm1 a.arch_emit_i64_trunc_uf32(tmp_in, tmp_out);
let tmp_x2 = m.acquire_temp_xmm().unwrap(); // xmm3 } else {
let tmp = m.acquire_temp_gpr().unwrap(); // r15
let tmp_x1 = m.acquire_temp_xmm().unwrap(); // xmm1
let tmp_x2 = m.acquire_temp_xmm().unwrap(); // xmm3
a.emit_mov( a.emit_mov(
Size::S32, Size::S32,
Location::Imm32(1593835520u32), Location::Imm32(1593835520u32),
Location::GPR(tmp), Location::GPR(tmp),
); //float 9.22337203E+18 ); //float 9.22337203E+18
a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x1)); a.emit_mov(Size::S32, Location::GPR(tmp), Location::XMM(tmp_x1));
a.emit_mov(Size::S32, Location::XMM(tmp_in), Location::XMM(tmp_x2)); a.emit_mov(Size::S32, Location::XMM(tmp_in), Location::XMM(tmp_x2));
a.emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); a.emit_vsubss(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in);
a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_in), tmp_out);
a.emit_mov( a.emit_mov(
Size::S64, Size::S64,
Location::Imm64(0x8000000000000000u64), Location::Imm64(0x8000000000000000u64),
Location::GPR(tmp), Location::GPR(tmp),
); );
a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp));
a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); a.emit_cvttss2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out);
a.emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2); a.emit_ucomiss(XMMOrMemory::XMM(tmp_x1), tmp_x2);
a.emit_cmovae_gpr_64(tmp, tmp_out); a.emit_cmovae_gpr_64(tmp, tmp_out);
m.release_temp_xmm(tmp_x2); m.release_temp_xmm(tmp_x2);
m.release_temp_xmm(tmp_x1); m.release_temp_xmm(tmp_x1);
m.release_temp_gpr(tmp); m.release_temp_gpr(tmp);
}
}, },
); );
@ -5253,7 +5269,11 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
}, },
None::<fn(_a: &mut Assembler, _m: &mut Machine)>, None::<fn(_a: &mut Assembler, _m: &mut Machine)>,
|a, _m| { |a, _m| {
a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); if a.arch_has_itruncf() {
a.arch_emit_i32_trunc_uf64(tmp_in, tmp_out);
} else {
a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out);
}
}, },
); );
@ -5377,7 +5397,11 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
a.emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out)); a.emit_mov(Size::S32, Location::Imm32(0), Location::GPR(tmp_out));
}), }),
|a, _m| { |a, _m| {
a.emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out); if a.arch_has_itruncf() {
a.arch_emit_i32_trunc_sf64(tmp_in, tmp_out);
} else {
a.emit_cvttsd2si_32(XMMOrMemory::XMM(real_in), tmp_out);
}
}, },
); );
@ -5491,7 +5515,11 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
a.emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out)); a.emit_mov(Size::S64, Location::Imm64(0), Location::GPR(tmp_out));
}), }),
|a, _m| { |a, _m| {
a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); if a.arch_has_itruncf() {
a.arch_emit_i64_trunc_sf64(tmp_in, tmp_out);
} else {
a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out);
}
}, },
); );
@ -5623,32 +5651,36 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
}, },
None::<fn(_a: &mut Assembler, _m: &mut Machine)>, None::<fn(_a: &mut Assembler, _m: &mut Machine)>,
|a, m| { |a, m| {
let tmp = m.acquire_temp_gpr().unwrap(); // r15 if a.arch_has_itruncf() {
let tmp_x1 = m.acquire_temp_xmm().unwrap(); // xmm1 a.arch_emit_i64_trunc_uf64(tmp_in, tmp_out);
let tmp_x2 = m.acquire_temp_xmm().unwrap(); // xmm3 } else {
let tmp = m.acquire_temp_gpr().unwrap(); // r15
let tmp_x1 = m.acquire_temp_xmm().unwrap(); // xmm1
let tmp_x2 = m.acquire_temp_xmm().unwrap(); // xmm3
a.emit_mov( a.emit_mov(
Size::S64, Size::S64,
Location::Imm64(4890909195324358656u64), Location::Imm64(4890909195324358656u64),
Location::GPR(tmp), Location::GPR(tmp),
); //double 9.2233720368547758E+18 ); //double 9.2233720368547758E+18
a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x1)); a.emit_mov(Size::S64, Location::GPR(tmp), Location::XMM(tmp_x1));
a.emit_mov(Size::S64, Location::XMM(tmp_in), Location::XMM(tmp_x2)); a.emit_mov(Size::S64, Location::XMM(tmp_in), Location::XMM(tmp_x2));
a.emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in); a.emit_vsubsd(tmp_in, XMMOrMemory::XMM(tmp_x1), tmp_in);
a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out); a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_in), tmp_out);
a.emit_mov( a.emit_mov(
Size::S64, Size::S64,
Location::Imm64(0x8000000000000000u64), Location::Imm64(0x8000000000000000u64),
Location::GPR(tmp), Location::GPR(tmp),
); );
a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp)); a.emit_xor(Size::S64, Location::GPR(tmp_out), Location::GPR(tmp));
a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out); a.emit_cvttsd2si_64(XMMOrMemory::XMM(tmp_x2), tmp_out);
a.emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2); a.emit_ucomisd(XMMOrMemory::XMM(tmp_x1), tmp_x2);
a.emit_cmovae_gpr_64(tmp, tmp_out); a.emit_cmovae_gpr_64(tmp, tmp_out);
m.release_temp_xmm(tmp_x2); m.release_temp_xmm(tmp_x2);
m.release_temp_xmm(tmp_x1); m.release_temp_xmm(tmp_x1);
m.release_temp_gpr(tmp); m.release_temp_gpr(tmp);
}
}, },
); );