From 5d2ee4fcc84fdd4d94b49488bf8857746c777633 Mon Sep 17 00:00:00 2001
From: losfair <zhy20000919@hotmail.com>
Date: Mon, 18 Nov 2019 01:39:44 +0800
Subject: [PATCH] Fix floating point comparision involving NaNs.

---
 lib/singlepass-backend/src/codegen_x64.rs     |  5 +-
 .../src/translator_aarch64.rs                 | 70 +++++++------------
 2 files changed, 30 insertions(+), 45 deletions(-)
diff --git a/lib/singlepass-backend/src/codegen_x64.rs b/lib/singlepass-backend/src/codegen_x64.rs
index c06217964..63d08ecff 100644
--- a/lib/singlepass-backend/src/codegen_x64.rs
+++ b/lib/singlepass-backend/src/codegen_x64.rs
@@ -1336,7 +1336,10 @@ impl X64FunctionCode {
         value_stack.push(ret);
 
         Self::emit_relaxed_avx(a, m, f, loc_a, loc_b, ret);
-        a.emit_and(Size::S32, Location::Imm32(1), ret); // FIXME: Why?
+
+        // Workaround for behavior inconsistency among different backing implementations.
+        // (all bits or only the least significant bit are set to one?)
+        a.emit_and(Size::S32, Location::Imm32(1), ret);
     }
 
     /// Floating point (AVX) binary operation with both operands popped from the virtual stack.
diff --git a/lib/singlepass-backend/src/translator_aarch64.rs b/lib/singlepass-backend/src/translator_aarch64.rs
index 7070eea4e..d6361fc1a 100644
--- a/lib/singlepass-backend/src/translator_aarch64.rs
+++ b/lib/singlepass-backend/src/translator_aarch64.rs
@@ -408,39 +408,18 @@ macro_rules! avx_fn {
     }
 }
 
-macro_rules! avx_fn_bitwise_inv {
-    ($ins:ident, $width:ident, $width_int:ident, $name:ident) => {
+macro_rules! avx_cmp {
+    ($cmpty:tt, $width:ident, $width_int:ident, $name:ident) => {
         fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) {
             match src2 {
-                XMMOrMemory::XMM(src2) => dynasm!(self ; $ins $width(map_xmm(dst).v()), $width(map_xmm(src1).v()), $width(map_xmm(src2).v())),
-                XMMOrMemory::Memory(base, disp) => {
-                    if disp >= 0 {
-                        dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, <disp ; add x_tmp3, x_tmp3, X(map_gpr(base).x()));
-                    } else {
-                        dynasm!(self ; b >after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, <disp ; sub x_tmp3, X(map_gpr(base).x()), x_tmp3);
-                    }
-
-                    dynasm!(self
-                        ; ldr $width_int(X_TMP1), [x_tmp3]
-                        ; mov v_tmp1.$width[0], $width_int(X_TMP1)
-                        ; $ins $width(map_xmm(dst).v()), $width(map_xmm(src1).v()), $width(V_TMP1)
+                XMMOrMemory::XMM(src2) => {
+                    dynasm!(
+                        self
+                        ; fcmpe $width(map_xmm(src1).v()), $width(map_xmm(src2).v())
+                        ; cset w_tmp1, $cmpty
+                        ; mov V(map_xmm(dst).v()).$width[0], $width_int(X_TMP1)
                     );
-                }
-            }
-            dynasm!(self
-                ; mov $width_int(X_TMP1), V(map_xmm(dst).v()).$width[0]
-                ; mvn $width_int(X_TMP1), $width_int(X_TMP1)
-                ; mov V(map_xmm(dst).v()).$width[0], $width_int(X_TMP1)
-            );
-        }
-    }
-}
-
-macro_rules! avx_fn_reversed {
-    ($ins:ident, $width:ident, $width_int:ident, $name:ident) => {
-        fn $name(&mut self, src1: XMM, src2: XMMOrMemory, dst: XMM) {
-            match src2 {
-                XMMOrMemory::XMM(src2) => dynasm!(self ; $ins $width(map_xmm(dst).v()), $width(map_xmm(src2).v()), $width(map_xmm(src1).v())),
+                },
                 XMMOrMemory::Memory(base, disp) => {
                     if disp >= 0 {
                         dynasm!(self ; b >after ; disp: ; .dword disp ; after: ; ldr w_tmp3, <disp ; add x_tmp3, x_tmp3, X(map_gpr(base).x()));
@@ -448,10 +427,13 @@ macro_rules! avx_fn_reversed {
                         dynasm!(self ; b >after ; disp: ; .dword -disp ; after: ; ldr w_tmp3, <disp ; sub x_tmp3, X(map_gpr(base).x()), x_tmp3);
                     }
 
-                    dynasm!(self
+                    dynasm!(
+                        self
                         ; ldr $width_int(X_TMP1), [x_tmp3]
                         ; mov v_tmp1.$width[0], $width_int(X_TMP1)
-                        ; $ins $width(map_xmm(dst).v()), $width(V_TMP1), $width(map_xmm(src1).v())
+                        ; fcmpe $width(map_xmm(src1).v()), $width(V_TMP1)
+                        ; cset w_tmp1, $cmpty
+                        ; mov V(map_xmm(dst).v()).$width[0], $width_int(X_TMP1)
                     );
                 }
             }
@@ -1438,12 +1420,12 @@ impl Emitter for Assembler {
     avx_fn!(fdiv, S, W, emit_vdivss);
     avx_fn!(fmax, S, W, emit_vmaxss);
     avx_fn!(fmin, S, W, emit_vminss);
-    avx_fn!(fcmgt, S, W, emit_vcmpgtss);
-    avx_fn_reversed!(fcmgt, S, W, emit_vcmpltss); // b gt a <=> a lt b
-    avx_fn!(fcmge, S, W, emit_vcmpgess);
-    avx_fn_bitwise_inv!(fcmgt, S, W, emit_vcmpless); // a not gt b <=> a le b
-    avx_fn!(fcmeq, S, W, emit_vcmpeqss);
-    avx_fn_bitwise_inv!(fcmeq, S, W, emit_vcmpneqss); // a not eq b <=> a neq b
+    avx_cmp!(gt, S, W, emit_vcmpgtss);
+    avx_cmp!(ge, S, W, emit_vcmpgess);
+    avx_cmp!(mi, S, W, emit_vcmpltss);
+    avx_cmp!(ls, S, W, emit_vcmpless);
+    avx_cmp!(eq, S, W, emit_vcmpeqss);
+    avx_cmp!(ne, S, W, emit_vcmpneqss);
     avx_fn_unop!(fsqrt, S, emit_vsqrtss);
     avx_fn_unop!(frintn, S, emit_vroundss_nearest); // to nearest with ties to even
     avx_fn_unop!(frintm, S, emit_vroundss_floor); // toward minus infinity
@@ -1457,12 +1439,12 @@ impl Emitter for Assembler {
     avx_fn!(fdiv, D, X, emit_vdivsd);
     avx_fn!(fmax, D, X, emit_vmaxsd);
     avx_fn!(fmin, D, X, emit_vminsd);
-    avx_fn!(fcmgt, D, X, emit_vcmpgtsd);
-    avx_fn_reversed!(fcmgt, D, X, emit_vcmpltsd); // b gt a <=> a lt b
-    avx_fn!(fcmge, D, X, emit_vcmpgesd);
-    avx_fn_bitwise_inv!(fcmgt, D, X, emit_vcmplesd); // a not gt b <=> a le b
-    avx_fn!(fcmeq, D, X, emit_vcmpeqsd);
-    avx_fn_bitwise_inv!(fcmeq, D, X, emit_vcmpneqsd); // a not eq b <=> a neq b
+    avx_cmp!(gt, D, X, emit_vcmpgtsd);
+    avx_cmp!(ge, D, X, emit_vcmpgesd);
+    avx_cmp!(mi, D, X, emit_vcmpltsd);
+    avx_cmp!(ls, D, X, emit_vcmplesd);
+    avx_cmp!(eq, D, X, emit_vcmpeqsd);
+    avx_cmp!(ne, D, X, emit_vcmpneqsd);
     avx_fn_unop!(fsqrt, D, emit_vsqrtsd);
     avx_fn_unop!(frintn, D, emit_vroundsd_nearest); // to nearest with ties to even
     avx_fn_unop!(frintm, D, emit_vroundsd_floor); // toward minus infinity