Fix missing canonicalizations.

2025-04-25 02:12:13 +00:00 · 2020-03-17 13:52:11 +08:00 · 2020-03-17 13:52:11 +08:00 · 4357c15046
commit 4357c15046
parent 3ee7f43b1c
1 changed files with 438 additions and 396 deletions
--- a/lib/singlepass-backend/src/codegen_x64.rs
+++ b/lib/singlepass-backend/src/codegen_x64.rs
@ -294,6 +294,15 @@ enum CanonicalizeType {
    F64,
 }

+impl CanonicalizeType {
+    fn to_size(&self) -> Size {
+        match self {
+            CanonicalizeType::F32 => Size::S32,
+            CanonicalizeType::F64 => Size::S64,
+        }
+    }
+}
+
 trait PopMany<T> {
    fn peek1(&self) -> Result<&T, CodegenError>;
    fn pop1(&mut self) -> Result<T, CodegenError>;
@ -330,6 +339,19 @@ impl<T> PopMany<T> for Vec<T> {
    }
 }

+trait WpTypeExt {
+    fn is_float(&self) -> bool;
+}
+
+impl WpTypeExt for WpType {
+    fn is_float(&self) -> bool {
+        match self {
+            WpType::F32 | WpType::F64 => true,
+            _ => false,
+        }
+    }
+}
+
 enum FuncPtrInner {}
 #[repr(transparent)]
 #[derive(Copy, Clone, Debug)]
@ -2784,6 +2806,8 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
        module_info: &ModuleInfo,
        _source_loc: u32,
    ) -> Result<(), CodegenError> {
+        assert!(self.fp_stack.len() <= self.value_stack.len());
+
        let a = self.assembler.as_mut().unwrap();

        match ev {
@ -2951,13 +2975,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                            Location::GPR(tmp),
                        );
                        let ty = type_to_wp_type(module_info.globals[local_index].desc.ty);
-                        match ty {
-                            WpType::F32 | WpType::F64 => {
+                        if ty.is_float() {
                            self.fp_stack
                                .push(FloatValue::new(self.value_stack.len() - 1));
                        }
-                            _ => {}
-                        }
                        self.machine.acquire_locations(
                            a,
                            &[(ty, MachineValue::WasmStack(self.value_stack.len()))],
@ -2979,13 +3000,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                            Location::GPR(tmp),
                        );
                        let ty = type_to_wp_type(module_info.imported_globals[import_index].1.ty);
-                        match ty {
-                            WpType::F32 | WpType::F64 => {
+                        if ty.is_float() {
                            self.fp_stack
                                .push(FloatValue::new(self.value_stack.len() - 1));
                        }
-                            _ => {}
-                        }
                        self.machine.acquire_locations(
                            a,
                            &[(ty, MachineValue::WasmStack(self.value_stack.len()))],
@ -3053,8 +3071,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                    Location::Memory(tmp, (global_index as i32) * 8),
                    Location::GPR(tmp),
                );
-                match ty {
-                    WpType::F32 | WpType::F64 => {
+                if ty.is_float() {
                    let fp = self.fp_stack.pop1()?;
                    if a.arch_supports_canonicalize_nan()
                        && self.config.nan_canonicalization
@ -3081,8 +3098,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                            Location::Memory(tmp, LocalGlobal::offset_data() as i32),
                        );
                    }
-                    }
-                    _ => {
+                } else {
                    Self::emit_relaxed_binop(
                        a,
                        &mut self.machine,
@ -3092,7 +3108,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        Location::Memory(tmp, LocalGlobal::offset_data() as i32),
                    );
                }
-                }
                self.machine.release_temp_gpr(tmp);
            }
            Operator::LocalGet { local_index } => {
@ -3111,21 +3126,17 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                    ret,
                );
                self.value_stack.push(ret);
-                match self.local_types[local_index] {
-                    WpType::F32 | WpType::F64 => {
+                if self.local_types[local_index].is_float() {
                    self.fp_stack
                        .push(FloatValue::new(self.value_stack.len() - 1));
                }
-                    _ => {}
-                }
            }
            Operator::LocalSet { local_index } => {
                let local_index = local_index as usize;
                let loc =
                    get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());

-                match self.local_types[local_index] {
-                    WpType::F32 | WpType::F64 => {
+                if self.local_types[local_index].is_float() {
                    let fp = self.fp_stack.pop1()?;
                    if a.arch_supports_canonicalize_nan()
                        && self.config.nan_canonicalization
@ -3152,8 +3163,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                            self.locals[local_index],
                        );
                    }
-                    }
-                    _ => {
+                } else {
                    Self::emit_relaxed_binop(
                        a,
                        &mut self.machine,
@ -3164,13 +3174,11 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                    );
                }
            }
-            }
            Operator::LocalTee { local_index } => {
                let local_index = local_index as usize;
                let loc = *self.value_stack.last().unwrap();

-                match self.local_types[local_index] {
-                    WpType::F32 | WpType::F64 => {
+                if self.local_types[local_index].is_float() {
                    let fp = self.fp_stack.peek1()?;
                    if a.arch_supports_canonicalize_nan()
                        && self.config.nan_canonicalization
@ -3197,8 +3205,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                            self.locals[local_index],
                        );
                    }
-                    }
-                    _ => {
+                } else {
                    Self::emit_relaxed_binop(
                        a,
                        &mut self.machine,
@ -3209,7 +3216,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                    );
                }
            }
-            }
            Operator::I32Const { value } => {
                self.value_stack.push(Location::Imm32(value as u32));
                self.machine
@ -4600,8 +4606,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
            }

            Operator::F32Copysign => {
-                // Preserve canonicalization state.
-
                let loc_b =
                    get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
                let loc_a =
@ -4613,10 +4617,31 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                )[0];
                self.value_stack.push(ret);

+                let (fp_src1, fp_src2) = self.fp_stack.pop2()?;
+                self.fp_stack
+                    .push(FloatValue::new(self.value_stack.len() - 1));
+
                let tmp1 = self.machine.acquire_temp_gpr().unwrap();
                let tmp2 = self.machine.acquire_temp_gpr().unwrap();
-                a.emit_mov(Size::S32, loc_a, Location::GPR(tmp1));
-                a.emit_mov(Size::S32, loc_b, Location::GPR(tmp2));
+
+                if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization {
+                    for (fp, loc, tmp) in [(fp_src1, loc_a, tmp1), (fp_src2, loc_b, tmp2)].iter() {
+                        match fp.canonicalization {
+                            Some(_) => {
+                                Self::canonicalize_nan(
+                                    a,
+                                    &mut self.machine,
+                                    Size::S32,
+                                    *loc,
+                                    Location::GPR(*tmp),
+                                );
+                            }
+                            None => {
+                                a.emit_mov(Size::S32, *loc, Location::GPR(*tmp));
+                            }
+                        }
+                    }
+                }
                a.emit_and(
                    Size::S32,
                    Location::Imm32(0x7fffffffu32),
@ -5120,8 +5145,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
            }

            Operator::F64Copysign => {
-                // Preserve canonicalization state.
-
                let loc_b =
                    get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
                let loc_a =
@ -5133,12 +5156,33 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                )[0];
                self.value_stack.push(ret);

+                let (fp_src1, fp_src2) = self.fp_stack.pop2()?;
+                self.fp_stack
+                    .push(FloatValue::new(self.value_stack.len() - 1));
+
                let tmp1 = self.machine.acquire_temp_gpr().unwrap();
                let tmp2 = self.machine.acquire_temp_gpr().unwrap();
-                let c = self.machine.acquire_temp_gpr().unwrap();

-                a.emit_mov(Size::S64, loc_a, Location::GPR(tmp1));
-                a.emit_mov(Size::S64, loc_b, Location::GPR(tmp2));
+                if a.arch_supports_canonicalize_nan() && self.config.nan_canonicalization {
+                    for (fp, loc, tmp) in [(fp_src1, loc_a, tmp1), (fp_src2, loc_b, tmp2)].iter() {
+                        match fp.canonicalization {
+                            Some(_) => {
+                                Self::canonicalize_nan(
+                                    a,
+                                    &mut self.machine,
+                                    Size::S64,
+                                    *loc,
+                                    Location::GPR(*tmp),
+                                );
+                            }
+                            None => {
+                                a.emit_mov(Size::S64, *loc, Location::GPR(*tmp));
+                            }
+                        }
+                    }
+                }
+
+                let c = self.machine.acquire_temp_gpr().unwrap();

                a.emit_mov(
                    Size::S64,
@ -6861,18 +6905,15 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        false,
                    )[0];
                    self.value_stack.push(ret);
-                    match return_types[0] {
-                        WpType::F32 | WpType::F64 => {
+                    if return_types[0].is_float() {
                        a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret);
                        self.fp_stack
                            .push(FloatValue::new(self.value_stack.len() - 1));
-                        }
-                        _ => {
+                    } else {
                        a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret);
                    }
                }
            }
-            }
            Operator::CallIndirect { index, table_index } => {
                if table_index != 0 {
                    return Err(CodegenError {
@ -7017,18 +7058,15 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        false,
                    )[0];
                    self.value_stack.push(ret);
-                    match return_types[0] {
-                        WpType::F32 | WpType::F64 => {
+                    if return_types[0].is_float() {
                        a.emit_mov(Size::S64, Location::XMM(XMM::XMM0), ret);
                        self.fp_stack
                            .push(FloatValue::new(self.value_stack.len() - 1));
-                        }
-                        _ => {
+                    } else {
                        a.emit_mov(Size::S64, Location::GPR(GPR::RAX), ret);
                    }
                }
            }
-            }
            Operator::If { ty } => {
                let label_end = a.get_label();
                let label_else = a.get_label();
@ -7074,8 +7112,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {

                if !was_unreachable && frame.returns.len() > 0 {
                    let loc = *self.value_stack.last().unwrap();
-                    match frame.returns[0] {
-                        WpType::F32 | WpType::F64 => {
+                    if frame.returns[0].is_float() {
                        let fp = self.fp_stack.peek1()?;
                        if a.arch_supports_canonicalize_nan()
                            && self.config.nan_canonicalization
@ -7102,8 +7139,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                                Location::GPR(GPR::RAX),
                            );
                        }
-                        }
-                        _ => {
+                    } else {
                        Self::emit_relaxed_binop(
                            a,
                            &mut self.machine,
@ -7114,7 +7150,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        );
                    }
                }
-                }

                let released: &[Location] = &self.value_stack[frame.value_stack_depth..];
                self.machine.release_locations(a, released);
@ -7177,11 +7212,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        if a.arch_supports_canonicalize_nan()
                            && self.config.nan_canonicalization =>
                    {
-                        let sz = match fp {
-                            CanonicalizeType::F32 => Size::S32,
-                            CanonicalizeType::F64 => Size::S64,
-                        };
-                        Self::canonicalize_nan(a, &mut self.machine, sz, v_a, ret);
+                        Self::canonicalize_nan(a, &mut self.machine, fp.to_size(), v_a, ret);
                    }
                    _ => {
                        if v_a != ret {
@ -7203,11 +7234,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        if a.arch_supports_canonicalize_nan()
                            && self.config.nan_canonicalization =>
                    {
-                        let sz = match fp {
-                            CanonicalizeType::F32 => Size::S32,
-                            CanonicalizeType::F64 => Size::S64,
-                        };
-                        Self::canonicalize_nan(a, &mut self.machine, sz, v_b, ret);
+                        Self::canonicalize_nan(a, &mut self.machine, fp.to_size(), v_b, ret);
                    }
                    _ => {
                        if v_b != ret {
@ -7630,6 +7657,8 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                    get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
                let target_addr =
                    get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
+                let fp = self.fp_stack.pop1()?;
+                let config_nan_canonicalization = self.config.nan_canonicalization;

                Self::emit_memory_op(
                    module_info,
@ -7642,6 +7671,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                    false,
                    4,
                    |a, m, addr| {
+                        if !a.arch_supports_canonicalize_nan()
+                            || !config_nan_canonicalization
+                            || fp.canonicalization.is_none()
+                        {
                            Self::emit_relaxed_binop(
                                a,
                                m,
@ -7650,6 +7683,16 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                                target_value,
                                Location::Memory(addr, 0),
                            );
+                        } else {
+                            Self::canonicalize_nan(
+                                a,
+                                m,
+                                Size::S32,
+                                target_value,
+                                Location::Memory(addr, 0),
+                            );
+                        }
+
                        Ok(())
                    },
                )?;
@ -8032,6 +8075,8 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                    get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
                let target_addr =
                    get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
+                let fp = self.fp_stack.pop1()?;
+                let config_nan_canonicalization = self.config.nan_canonicalization;

                Self::emit_memory_op(
                    module_info,
@ -8044,6 +8089,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                    false,
                    8,
                    |a, m, addr| {
+                        if !a.arch_supports_canonicalize_nan()
+                            || !config_nan_canonicalization
+                            || fp.canonicalization.is_none()
+                        {
                            Self::emit_relaxed_binop(
                                a,
                                m,
@ -8052,6 +8101,15 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                                target_value,
                                Location::Memory(addr, 0),
                            );
+                        } else {
+                            Self::canonicalize_nan(
+                                a,
+                                m,
+                                Size::S64,
+                                target_value,
+                                Location::Memory(addr, 0),
+                            );
+                        }
                        Ok(())
                    },
                )?;
@ -8162,8 +8220,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        });
                    }
                    let loc = *self.value_stack.last().unwrap();
-                    match frame.returns[0] {
-                        WpType::F32 | WpType::F64 => {
+                    if frame.returns[0].is_float() {
                        let fp = self.fp_stack.peek1()?;
                        if a.arch_supports_canonicalize_nan()
                            && self.config.nan_canonicalization
@ -8190,8 +8247,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                                Location::GPR(GPR::RAX),
                            );
                        }
-                        }
-                        _ => {
+                    } else {
                        Self::emit_relaxed_binop(
                            a,
                            &mut self.machine,
@ -8202,7 +8258,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        );
                    }
                }
-                }
                let released = &self.value_stack[frame.value_stack_depth..];
                self.machine.release_locations_keep_state(a, released);
                a.emit_jmp(Condition::None, frame.label);
@ -8219,8 +8274,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                    }
                    let loc = *self.value_stack.last().unwrap();

-                    match frame.returns[0] {
-                        WpType::F32 | WpType::F64 => {
+                    if frame.returns[0].is_float() {
                        let fp = self.fp_stack.peek1()?;
                        if a.arch_supports_canonicalize_nan()
                            && self.config.nan_canonicalization
@ -8247,12 +8301,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                                Location::GPR(GPR::RAX),
                            );
                        }
-                        }
-                        _ => {
+                    } else {
                        a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX));
                    }
                }
-                }

                let released = &self.value_stack[frame.value_stack_depth..];
                self.machine.release_locations_keep_state(a, released);
@ -8282,8 +8334,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        });
                    }
                    let loc = *self.value_stack.last().unwrap();
-                    match frame.returns[0] {
-                        WpType::F32 | WpType::F64 => {
+                    if frame.returns[0].is_float() {
                        let fp = self.fp_stack.peek1()?;
                        if a.arch_supports_canonicalize_nan()
                            && self.config.nan_canonicalization
@ -8310,12 +8361,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                                Location::GPR(GPR::RAX),
                            );
                        }
-                        }
-                        _ => {
+                    } else {
                        a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX));
                    }
                }
-                }
                let released = &self.value_stack[frame.value_stack_depth..];
                self.machine.release_locations_keep_state(a, released);
                a.emit_jmp(Condition::None, frame.label);
@ -8365,8 +8414,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                            });
                        }
                        let loc = *self.value_stack.last().unwrap();
-                        match frame.returns[0] {
-                            WpType::F32 | WpType::F64 => {
+                        if frame.returns[0].is_float() {
                            let fp = self.fp_stack.peek1()?;
                            if a.arch_supports_canonicalize_nan()
                                && self.config.nan_canonicalization
@ -8393,12 +8441,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                                    Location::GPR(GPR::RAX),
                                );
                            }
-                            }
-                            _ => {
+                        } else {
                            a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX));
                        }
                    }
-                    }
                    let released = &self.value_stack[frame.value_stack_depth..];
                    self.machine.release_locations_keep_state(a, released);
                    a.emit_jmp(Condition::None, frame.label);
@ -8415,8 +8461,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                            });
                        }
                        let loc = *self.value_stack.last().unwrap();
-                        match frame.returns[0] {
-                            WpType::F32 | WpType::F64 => {
+                        if frame.returns[0].is_float() {
                            let fp = self.fp_stack.peek1()?;
                            if a.arch_supports_canonicalize_nan()
                                && self.config.nan_canonicalization
@ -8443,12 +8488,10 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                                    Location::GPR(GPR::RAX),
                                );
                            }
-                            }
-                            _ => {
+                        } else {
                            a.emit_mov(Size::S64, loc, Location::GPR(GPR::RAX));
                        }
                    }
-                    }
                    let released = &self.value_stack[frame.value_stack_depth..];
                    self.machine.release_locations_keep_state(a, released);
                    a.emit_jmp(Condition::None, frame.label);
@ -8462,14 +8505,18 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
            }
            Operator::Drop => {
                get_location_released(a, &mut self.machine, self.value_stack.pop().unwrap());
+                if let Some(x) = self.fp_stack.last() {
+                    if x.depth == self.value_stack.len() {
+                        self.fp_stack.pop1()?;
+                    }
+                }
            }
            Operator::End => {
                let frame = self.control_stack.pop().unwrap();

                if !was_unreachable && frame.returns.len() > 0 {
                    let loc = *self.value_stack.last().unwrap();
-                    match frame.returns[0] {
-                        WpType::F32 | WpType::F64 => {
+                    if frame.returns[0].is_float() {
                        let fp = self.fp_stack.peek1()?;
                        if a.arch_supports_canonicalize_nan()
                            && self.config.nan_canonicalization
@ -8496,8 +8543,7 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                                Location::GPR(GPR::RAX),
                            );
                        }
-                        }
-                        _ => {
+                    } else {
                        Self::emit_relaxed_binop(
                            a,
                            &mut self.machine,
@ -8508,7 +8554,6 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        );
                    }
                }
-                }

                if self.control_stack.len() == 0 {
                    a.emit_label(frame.label);
@ -8558,14 +8603,11 @@ impl FunctionCodeGenerator<CodegenError> for X64FunctionCode {
                        )[0];
                        a.emit_mov(Size::S64, Location::GPR(GPR::RAX), loc);
                        self.value_stack.push(loc);
-                        match frame.returns[0] {
-                            WpType::F32 | WpType::F64 => {
+                        if frame.returns[0].is_float() {
                            self.fp_stack
                                .push(FloatValue::new(self.value_stack.len() - 1));
                            // we already canonicalized at the `Br*` instruction or here previously.
                        }
-                            _ => {}
-                        }
                    }
                }
            }