863: Rewrite Min/Max to handle all cases correctly. Fixes 545 spectest failures. r=nlewycky a=nlewycky

# Description
The llvm backend was not quite following the Wasm spec for {F32,F64,F32x4xF64x2}{Min,Max}. We used the `@llvm.minnum` and `@llvm.maxnum` intrinsics which don't handle the corner cases the same. When we tried to use `@llvm.minimum` and `@llvm.maximum` which do, we get an internal error from the x86 backend. I was hoping that crash would go away with the upgrade to LLVM 9, but it does not.

Reimplement these operations using plain LLVM instructions.

# Review

- [x] Add a short description of the the change to the CHANGELOG.md file


Co-authored-by: Nick Lewycky <nick@wasmer.io>
This commit is contained in:
bors[bot]
2019-10-09 22:24:07 +00:00
committed by GitHub
4 changed files with 369 additions and 589 deletions

View File

@ -2681,86 +2681,404 @@ impl FunctionCodeGenerator<CodegenError> for LLVMFunctionCodeGenerator {
state.push1(bits);
}
Operator::F32Min => {
// This implements the same logic as LLVM's @llvm.minimum
// intrinsic would, but x86 lowering of that intrinsics
// encounters a fatal error in LLVM 8 and LLVM 9.
let (v1, v2) = state.pop2()?;
let res = builder
.build_call(intrinsics.minimum_f32, &[v1, v2], &state.var_name())
.try_as_basic_value()
.left()
.unwrap();
let v1 = canonicalize_nans(builder, intrinsics, v1);
let v2 = canonicalize_nans(builder, intrinsics, v2);
let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
let v1_is_nan = builder.build_float_compare(
FloatPredicate::UNO,
v1,
intrinsics.f32_zero,
"nan",
);
let v2_is_not_nan = builder.build_float_compare(
FloatPredicate::ORD,
v2,
intrinsics.f32_zero,
"notnan",
);
let v1_repr = builder
.build_bitcast(v1, intrinsics.i32_ty, "")
.into_int_value();
let v2_repr = builder
.build_bitcast(v2, intrinsics.i32_ty, "")
.into_int_value();
let repr_ne = builder.build_int_compare(IntPredicate::NE, v1_repr, v2_repr, "");
let float_eq = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
let min_cmp = builder.build_float_compare(FloatPredicate::OLT, v1, v2, "");
let negative_zero = intrinsics.f32_ty.const_float(-0.0);
let v2 = builder
.build_select(
builder.build_and(
builder.build_and(float_eq, repr_ne, ""),
v2_is_not_nan,
"",
),
negative_zero,
v2,
"",
)
.into_float_value();
let res =
builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
state.push1(res);
}
Operator::F64Min => {
// This implements the same logic as LLVM's @llvm.minimum
// intrinsic would, but x86 lowering of that intrinsics
// encounters a fatal error in LLVM 8 and LLVM 9.
let (v1, v2) = state.pop2()?;
let res = builder
.build_call(intrinsics.minimum_f64, &[v1, v2], &state.var_name())
.try_as_basic_value()
.left()
.unwrap();
let v1 = canonicalize_nans(builder, intrinsics, v1);
let v2 = canonicalize_nans(builder, intrinsics, v2);
let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
let v1_is_nan = builder.build_float_compare(
FloatPredicate::UNO,
v1,
intrinsics.f64_zero,
"nan",
);
let v2_is_not_nan = builder.build_float_compare(
FloatPredicate::ORD,
v2,
intrinsics.f64_zero,
"notnan",
);
let v1_repr = builder
.build_bitcast(v1, intrinsics.i64_ty, "")
.into_int_value();
let v2_repr = builder
.build_bitcast(v2, intrinsics.i64_ty, "")
.into_int_value();
let repr_ne = builder.build_int_compare(IntPredicate::NE, v1_repr, v2_repr, "");
let float_eq = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
let min_cmp = builder.build_float_compare(FloatPredicate::OLT, v1, v2, "");
let negative_zero = intrinsics.f64_ty.const_float(-0.0);
let v2 = builder
.build_select(
builder.build_and(
builder.build_and(float_eq, repr_ne, ""),
v2_is_not_nan,
"",
),
negative_zero,
v2,
"",
)
.into_float_value();
let res =
builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
state.push1(res);
}
Operator::F32x4Min => {
// This implements the same logic as LLVM's @llvm.minimum
// intrinsic would, but x86 lowering of that intrinsics
// encounters a fatal error in LLVM 8 and LLVM 9.
let (v1, v2) = state.pop2()?;
let v1 = builder.build_bitcast(v1, intrinsics.f32x4_ty, "");
let v2 = builder.build_bitcast(v2, intrinsics.f32x4_ty, "");
let res = builder
.build_call(intrinsics.minimum_f32x4, &[v1, v2], &state.var_name())
.try_as_basic_value()
.left()
.unwrap();
let v1 = canonicalize_nans(builder, intrinsics, v1);
let v2 = canonicalize_nans(builder, intrinsics, v2);
let (v1, v2) = (v1.into_vector_value(), v2.into_vector_value());
let v1_is_nan = builder.build_float_compare(
FloatPredicate::UNO,
v1,
intrinsics.f32x4_zero,
"nan",
);
let v2_is_not_nan = builder.build_float_compare(
FloatPredicate::ORD,
v2,
intrinsics.f32x4_zero,
"notnan",
);
let v1_repr = builder
.build_bitcast(v1, intrinsics.i32x4_ty, "")
.into_vector_value();
let v2_repr = builder
.build_bitcast(v2, intrinsics.i32x4_ty, "")
.into_vector_value();
let repr_ne = builder.build_int_compare(IntPredicate::NE, v1_repr, v2_repr, "");
let float_eq = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
let min_cmp = builder.build_float_compare(FloatPredicate::OLT, v1, v2, "");
let negative_zero = splat_vector(
builder,
intrinsics,
intrinsics.f32_ty.const_float(-0.0).as_basic_value_enum(),
intrinsics.f32x4_ty,
"",
);
let v2 = builder
.build_select(
builder.build_and(
builder.build_and(float_eq, repr_ne, ""),
v2_is_not_nan,
"",
),
negative_zero,
v2,
"",
)
.into_vector_value();
let res =
builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
state.push1(res);
}
Operator::F64x2Min => {
// This implements the same logic as LLVM's @llvm.minimum
// intrinsic would, but x86 lowering of that intrinsics
// encounters a fatal error in LLVM 8 and LLVM 9.
let (v1, v2) = state.pop2()?;
let v1 = builder.build_bitcast(v1, intrinsics.f64x2_ty, "");
let v2 = builder.build_bitcast(v2, intrinsics.f64x2_ty, "");
let res = builder
.build_call(intrinsics.minimum_f64x2, &[v1, v2], &state.var_name())
.try_as_basic_value()
.left()
.unwrap();
let v1 = canonicalize_nans(builder, intrinsics, v1);
let v2 = canonicalize_nans(builder, intrinsics, v2);
let (v1, v2) = (v1.into_vector_value(), v2.into_vector_value());
let v1_is_nan = builder.build_float_compare(
FloatPredicate::UNO,
v1,
intrinsics.f64x2_zero,
"nan",
);
let v2_is_not_nan = builder.build_float_compare(
FloatPredicate::ORD,
v2,
intrinsics.f64x2_zero,
"notnan",
);
let v1_repr = builder
.build_bitcast(v1, intrinsics.i64x2_ty, "")
.into_vector_value();
let v2_repr = builder
.build_bitcast(v2, intrinsics.i64x2_ty, "")
.into_vector_value();
let repr_ne = builder.build_int_compare(IntPredicate::NE, v1_repr, v2_repr, "");
let float_eq = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
let min_cmp = builder.build_float_compare(FloatPredicate::OLT, v1, v2, "");
let negative_zero = splat_vector(
builder,
intrinsics,
intrinsics.f64_ty.const_float(-0.0).as_basic_value_enum(),
intrinsics.f64x2_ty,
"",
);
let v2 = builder
.build_select(
builder.build_and(
builder.build_and(float_eq, repr_ne, ""),
v2_is_not_nan,
"",
),
negative_zero,
v2,
"",
)
.into_vector_value();
let res =
builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
state.push1(res);
}
Operator::F32Max => {
// This implements the same logic as LLVM's @llvm.maximum
// intrinsic would, but x86 lowering of that intrinsics
// encounters a fatal error in LLVM 8 and LLVM 9.
let (v1, v2) = state.pop2()?;
let res = builder
.build_call(intrinsics.maximum_f32, &[v1, v2], &state.var_name())
.try_as_basic_value()
.left()
.unwrap();
let v1 = canonicalize_nans(builder, intrinsics, v1);
let v2 = canonicalize_nans(builder, intrinsics, v2);
let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
let v1_is_nan = builder.build_float_compare(
FloatPredicate::UNO,
v1,
intrinsics.f32_zero,
"nan",
);
let v2_is_not_nan = builder.build_float_compare(
FloatPredicate::ORD,
v2,
intrinsics.f32_zero,
"notnan",
);
let v1_repr = builder
.build_bitcast(v1, intrinsics.i32_ty, "")
.into_int_value();
let v2_repr = builder
.build_bitcast(v2, intrinsics.i32_ty, "")
.into_int_value();
let repr_ne = builder.build_int_compare(IntPredicate::NE, v1_repr, v2_repr, "");
let float_eq = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
let min_cmp = builder.build_float_compare(FloatPredicate::OGT, v1, v2, "");
let v2 = builder
.build_select(
builder.build_and(
builder.build_and(float_eq, repr_ne, ""),
v2_is_not_nan,
"",
),
intrinsics.f32_zero,
v2,
"",
)
.into_float_value();
let res =
builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
state.push1(res);
}
Operator::F64Max => {
// This implements the same logic as LLVM's @llvm.maximum
// intrinsic would, but x86 lowering of that intrinsics
// encounters a fatal error in LLVM 8 and LLVM 9.
let (v1, v2) = state.pop2()?;
let res = builder
.build_call(intrinsics.maximum_f64, &[v1, v2], &state.var_name())
.try_as_basic_value()
.left()
.unwrap();
let v1 = canonicalize_nans(builder, intrinsics, v1);
let v2 = canonicalize_nans(builder, intrinsics, v2);
let (v1, v2) = (v1.into_float_value(), v2.into_float_value());
let v1_is_nan = builder.build_float_compare(
FloatPredicate::UNO,
v1,
intrinsics.f64_zero,
"nan",
);
let v2_is_not_nan = builder.build_float_compare(
FloatPredicate::ORD,
v2,
intrinsics.f64_zero,
"notnan",
);
let v1_repr = builder
.build_bitcast(v1, intrinsics.i64_ty, "")
.into_int_value();
let v2_repr = builder
.build_bitcast(v2, intrinsics.i64_ty, "")
.into_int_value();
let repr_ne = builder.build_int_compare(IntPredicate::NE, v1_repr, v2_repr, "");
let float_eq = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
let min_cmp = builder.build_float_compare(FloatPredicate::OGT, v1, v2, "");
let v2 = builder
.build_select(
builder.build_and(
builder.build_and(float_eq, repr_ne, ""),
v2_is_not_nan,
"",
),
intrinsics.f64_zero,
v2,
"",
)
.into_float_value();
let res =
builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
state.push1(res);
}
Operator::F32x4Max => {
// This implements the same logic as LLVM's @llvm.maximum
// intrinsic would, but x86 lowering of that intrinsics
// encounters a fatal error in LLVM 8 and LLVM 9.
let (v1, v2) = state.pop2()?;
let v1 = builder.build_bitcast(v1, intrinsics.f32x4_ty, "");
let v2 = builder.build_bitcast(v2, intrinsics.f32x4_ty, "");
let res = builder
.build_call(intrinsics.maximum_f32x4, &[v1, v2], &state.var_name())
.try_as_basic_value()
.left()
.unwrap();
let v1 = canonicalize_nans(builder, intrinsics, v1);
let v2 = canonicalize_nans(builder, intrinsics, v2);
let (v1, v2) = (v1.into_vector_value(), v2.into_vector_value());
let v1_is_nan = builder.build_float_compare(
FloatPredicate::UNO,
v1,
intrinsics.f32x4_zero,
"nan",
);
let v2_is_not_nan = builder.build_float_compare(
FloatPredicate::ORD,
v2,
intrinsics.f32x4_zero,
"notnan",
);
let v1_repr = builder
.build_bitcast(v1, intrinsics.i32x4_ty, "")
.into_vector_value();
let v2_repr = builder
.build_bitcast(v2, intrinsics.i32x4_ty, "")
.into_vector_value();
let repr_ne = builder.build_int_compare(IntPredicate::NE, v1_repr, v2_repr, "");
let float_eq = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
let min_cmp = builder.build_float_compare(FloatPredicate::OGT, v1, v2, "");
let zero = splat_vector(
builder,
intrinsics,
intrinsics.f32_zero.as_basic_value_enum(),
intrinsics.f32x4_ty,
"",
);
let v2 = builder
.build_select(
builder.build_and(
builder.build_and(float_eq, repr_ne, ""),
v2_is_not_nan,
"",
),
zero,
v2,
"",
)
.into_vector_value();
let res =
builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
state.push1(res);
}
Operator::F64x2Max => {
// This implements the same logic as LLVM's @llvm.maximum
// intrinsic would, but x86 lowering of that intrinsics
// encounters a fatal error in LLVM 8 and LLVM 9.
let (v1, v2) = state.pop2()?;
let v1 = builder.build_bitcast(v1, intrinsics.f64x2_ty, "");
let v2 = builder.build_bitcast(v2, intrinsics.f64x2_ty, "");
let res = builder
.build_call(intrinsics.maximum_f64x2, &[v1, v2], &state.var_name())
.try_as_basic_value()
.left()
.unwrap();
let v1 = canonicalize_nans(builder, intrinsics, v1);
let v2 = canonicalize_nans(builder, intrinsics, v2);
let (v1, v2) = (v1.into_vector_value(), v2.into_vector_value());
let v1_is_nan = builder.build_float_compare(
FloatPredicate::UNO,
v1,
intrinsics.f64x2_zero,
"nan",
);
let v2_is_not_nan = builder.build_float_compare(
FloatPredicate::ORD,
v2,
intrinsics.f64x2_zero,
"notnan",
);
let v1_repr = builder
.build_bitcast(v1, intrinsics.i64x2_ty, "")
.into_vector_value();
let v2_repr = builder
.build_bitcast(v2, intrinsics.i64x2_ty, "")
.into_vector_value();
let repr_ne = builder.build_int_compare(IntPredicate::NE, v1_repr, v2_repr, "");
let float_eq = builder.build_float_compare(FloatPredicate::OEQ, v1, v2, "");
let min_cmp = builder.build_float_compare(FloatPredicate::OGT, v1, v2, "");
let zero = splat_vector(
builder,
intrinsics,
intrinsics.f64_zero.as_basic_value_enum(),
intrinsics.f64x2_ty,
"",
);
let v2 = builder
.build_select(
builder.build_and(
builder.build_and(float_eq, repr_ne, ""),
v2_is_not_nan,
"",
),
zero,
v2,
"",
)
.into_vector_value();
let res =
builder.build_select(builder.build_or(v1_is_nan, min_cmp, ""), v1, v2, "");
let res = builder.build_bitcast(res, intrinsics.i128_ty, "");
state.push1(res);
}

View File

@ -6,7 +6,9 @@ use inkwell::{
types::{
BasicType, FloatType, FunctionType, IntType, PointerType, StructType, VectorType, VoidType,
},
values::{BasicValue, BasicValueEnum, FloatValue, FunctionValue, IntValue, PointerValue},
values::{
BasicValue, BasicValueEnum, FloatValue, FunctionValue, IntValue, PointerValue, VectorValue,
},
AddressSpace,
};
use std::collections::HashMap;
@ -125,6 +127,8 @@ pub struct Intrinsics {
pub i128_zero: IntValue,
pub f32_zero: FloatValue,
pub f64_zero: FloatValue,
pub f32x4_zero: VectorValue,
pub f64x2_zero: VectorValue,
pub trap_unreachable: BasicValueEnum,
pub trap_call_indirect_sig: BasicValueEnum,
@ -191,6 +195,8 @@ impl Intrinsics {
let i128_zero = i128_ty.const_int(0, false);
let f32_zero = f32_ty.const_float(0.0);
let f64_zero = f64_ty.const_float(0.0);
let f32x4_zero = f32x4_ty.const_zero();
let f64x2_zero = f64x2_ty.const_zero();
let i1_ty_basic = i1_ty.as_basic_type_enum();
let i32_ty_basic = i32_ty.as_basic_type_enum();
@ -455,6 +461,8 @@ impl Intrinsics {
i128_zero,
f32_zero,
f64_zero,
f32x4_zero,
f64x2_zero,
trap_unreachable: i32_zero.as_basic_value_enum(),
trap_call_indirect_sig: i32_ty.const_int(1, false).as_basic_value_enum(),