diff --git a/libdevice/imf_rounding_op.hpp b/libdevice/imf_rounding_op.hpp index 2edd99ed10c07..8c5fdf0d88f9d 100644 --- a/libdevice/imf_rounding_op.hpp +++ b/libdevice/imf_rounding_op.hpp @@ -627,4 +627,236 @@ template Ty __fp_mul(Ty x, Ty y, int rd) { Ty, (z_sig << (sizeof(Ty) * 8 - 1)) | (z_exp << (std::numeric_limits::digits - 1)) | z_fra); } + +template static UTy fra_uint_div(UTy x, UTy y, unsigned nbits) { + UTy res = 0; + unsigned iters = 0; + if (x == 0) + return 0x0; + while (iters < nbits) { + res = res << 1; + x = x << 1; + if (x > y) { + x = x - y; + res = res | 0x1; + } else if (x == y) { + res = res | 0x1; + res = res << (nbits - iters - 1); + return res; + } else { + } + iters++; + } + res = res | 0x1; + return res; +} + +template Ty __fp_div(Ty x, Ty y, int rd) { + typedef typename __iml_fp_config::utype UTy; + typedef typename __iml_fp_config::stype STy; + UTy x_bit = __builtin_bit_cast(UTy, x); + UTy y_bit = __builtin_bit_cast(UTy, y); + UTy x_exp = (x_bit >> (std::numeric_limits::digits - 1)) & + __iml_fp_config::exp_mask; + UTy y_exp = (y_bit >> (std::numeric_limits::digits - 1)) & + __iml_fp_config::exp_mask; + UTy x_fra = x_bit & __iml_fp_config::fra_mask; + UTy y_fra = y_bit & __iml_fp_config::fra_mask; + UTy x_sig = x_bit >> ((sizeof(Ty) * 8) - 1); + UTy y_sig = y_bit >> ((sizeof(Ty) * 8) - 1); + UTy z_sig = x_sig ^ y_sig; + UTy z_exp = 0x0, z_fra = 0x0; + const UTy one_bits = 0x1; + const UTy sig_off_mask = (one_bits << (sizeof(UTy) * 8 - 1)) - 1; + + if (((x_exp == __iml_fp_config::exp_mask) && (x_fra != 0x0)) || + ((y_exp == __iml_fp_config::exp_mask) && (y_fra != 0x0)) || + ((y_bit & sig_off_mask) == 0x0)) { + UTy tmp = __iml_fp_config::nan_bits; + return __builtin_bit_cast(Ty, tmp); + } + + if ((x_exp == __iml_fp_config::exp_mask) && (x_fra == 0x0)) { + if ((y_exp == __iml_fp_config::exp_mask) && (y_fra == 0x0)) { + UTy tmp = __iml_fp_config::nan_bits; + return __builtin_bit_cast(Ty, tmp); + } else { + UTy tmp = + (z_sig << (sizeof(Ty) * 8 - 1)) | __iml_fp_config::pos_inf_bits; + return __builtin_bit_cast(Ty, tmp); + } + } + + if ((x_bit & sig_off_mask) == 0x0) + return __builtin_bit_cast(Ty, (z_sig << (sizeof(UTy) * 8 - 1)) | 0x0); + + if ((y_exp == __iml_fp_config::exp_mask) && (y_fra == 0x0)) + return __builtin_bit_cast(Ty, (z_sig << (sizeof(UTy) * 8 - 1)) | 0x0); + + int sx_exp = x_exp, sy_exp = y_exp; + sx_exp = (sx_exp == 0) ? (1 - __iml_fp_config::bias) + : (sx_exp - __iml_fp_config::bias); + sy_exp = (sy_exp == 0) ? (1 - __iml_fp_config::bias) + : (sy_exp - __iml_fp_config::bias); + int exp_diff = sx_exp - sy_exp; + if (x_exp != 0x0) + x_fra = (one_bits << (std::numeric_limits::digits - 1)) | x_fra; + if (y_exp != 0x0) + y_fra = (one_bits << (std::numeric_limits::digits - 1)) | y_fra; + + if (x_fra >= y_fra) { + // x_fra / y_fra max value for fp32 is 0xFFFFFF when x is normal + // and y is subnormal, so msb_pos max value is 23 + UTy tmp = x_fra / y_fra; + UTy fra_rem = x_fra - y_fra * tmp; + int msb_pos = get_msb_pos(tmp); + int tmp2 = exp_diff + msb_pos; + if (tmp2 > __iml_fp_config::bias) + return __handling_fp_overflow(z_sig, rd); + + if (tmp2 >= (1 - __iml_fp_config::bias)) { + // Fall into normal floating point range + z_exp = tmp2 + __iml_fp_config::bias; + // For fp32, starting msb_pos bits in fra comes from tmp and we need + // 23 - msb_pos( + grs) more bits from fraction division. + z_fra = ((one_bits << msb_pos) - 1) & tmp; + z_fra = z_fra << ((std::numeric_limits::digits - 1) - msb_pos); + UTy fra_bits_quo = fra_uint_div( + fra_rem, y_fra, std::numeric_limits::digits - msb_pos + 2); + z_fra = z_fra | (fra_bits_quo >> 3); + int rb = __handling_rounding(z_sig, z_fra, fra_bits_quo & 0x7, rd); + if (rb != 0) { + z_fra++; + if (z_fra > __iml_fp_config::fra_mask) { + z_exp++; + if (z_exp == __iml_fp_config::exp_mask) + return __handling_fp_overflow(z_sig, rd); + } + } + return __builtin_bit_cast( + Ty, (z_sig << (sizeof(Ty) * 8 - 1)) | + (z_exp << (std::numeric_limits::digits - 1)) | z_fra); + } + + // orignal value can be represented as (0.1xxxx.... * 2^tmp2) + // which is equivalent to 0.00000...1xxxxx * 2^(-126) + tmp2 = tmp2 + 1; + if ((tmp2 + std::numeric_limits::digits - 1) <= + (1 - __iml_fp_config::bias)) { + bool above_half = false; + if ((tmp2 + std::numeric_limits::digits - 1) == + (1 - __iml_fp_config::bias)) + above_half = + !((x_fra == y_fra * tmp) && (tmp == (one_bits << msb_pos))); + return __handling_fp_underflow(z_sig, rd, above_half); + } else { + int rb; + // Fall into subnormal floating point range. For fp32, there are -126 - + // tmp2 leading zeros in final fra and we need get 23 + 126 + tmp2( + grs) + // bits from fraction division. + if (msb_pos >= (std::numeric_limits::digits + + __iml_fp_config::bias + tmp2)) { + unsigned fra_discard_bits = msb_pos + 3 - __iml_fp_config::bias - + std::numeric_limits::digits - tmp2; + z_fra = tmp >> fra_discard_bits; + int grs_bits = (tmp >> (fra_discard_bits - 3)) & 0x7; + if ((grs_bits & 0x1) == 0x0) { + if ((tmp & ((0x1 << (fra_discard_bits - 3)) - 0x1)) || (fra_rem != 0)) + grs_bits = grs_bits | 0x1; + } + rb = __handling_rounding(z_sig, z_fra, grs_bits, rd); + } else { + // For fp32, we need to get (23 + 126 + tmp2 + 3) - (msb_pos + 1) bits + // from fra division and the last bit is sticky bit. + z_fra = tmp; + unsigned fra_get_bits = std::numeric_limits::digits + + __iml_fp_config::bias + tmp2 - msb_pos; + z_fra = z_fra << fra_get_bits; + UTy fra_bits_quo = fra_uint_div(fra_rem, y_fra, fra_get_bits); + z_fra = z_fra | fra_bits_quo; + int grs_bits = z_fra & 0x7; + z_fra = z_fra >> 3; + rb = __handling_rounding(z_sig, z_fra, grs_bits, rd); + } + if (rb != 0) { + z_fra++; + if (z_fra > __iml_fp_config::fra_mask) { + z_exp++; + z_fra = 0x0; + } + } + return __builtin_bit_cast( + Ty, (z_sig << (sizeof(Ty) * 8 - 1)) | + (z_exp << (std::numeric_limits::digits - 1)) | z_fra); + } + } else { + // x_fra < y_fra, the final result can be represented as + // (2^exp_diff) * 0.000...01xxxxx + unsigned lz = 0; + UTy x_tmp = x_fra; + x_tmp = x_tmp << 1; + while (x_tmp < y_fra) { + lz++; + x_tmp = x_tmp << 1; + } + // x_fra < y_fra, the final result can be represented as + // (2^exp_diff) * 0.000...01xxxxx... which is equivalent to + // 2 ^ (exp_diff - lz - 1) * 1.xxxxx... + int nor_exp = exp_diff - lz - 1; + if (nor_exp > __iml_fp_config::bias) + return __handling_fp_overflow(z_sig, rd); + + if (nor_exp >= (1 - __iml_fp_config::bias)) { + z_exp = nor_exp + __iml_fp_config::bias; + x_fra = x_fra << lz; + UTy fra_bits_quo = + fra_uint_div(x_fra, y_fra, 3 + std::numeric_limits::digits); + z_fra = (fra_bits_quo >> 3) & __iml_fp_config::fra_mask; + int grs_bits = fra_bits_quo & 0x7; + int rb = __handling_rounding(z_sig, z_fra, grs_bits, rd); + if (rb != 0x0) { + z_fra++; + if (z_fra > __iml_fp_config::fra_mask) { + z_exp++; + z_fra = 0x0; + if (z_exp == __iml_fp_config::exp_mask) + return __handling_fp_overflow(z_sig, rd); + } + } + return __builtin_bit_cast( + Ty, (z_sig << (sizeof(Ty) * 8 - 1)) | + (z_exp << (std::numeric_limits::digits - 1)) | z_fra); + } + + // Fall into subnormal range or underflow happens. For fp32, + // nor_exp < -126, so (-126 - exp_diff + lz + 1) > 0 which means + // (lz - exp_diff - 126) >= 0 + unsigned lzs = lz - __iml_fp_config::bias - exp_diff + 1; + if (lzs >= (std::numeric_limits::digits - 1)) { + bool above_half = false; + if (lzs == (std::numeric_limits::digits - 1)) { + if ((x_fra << (lz + 1)) > y_fra) + above_half = true; + } + return __handling_fp_underflow(z_sig, rd, above_half); + } else { + x_fra = x_fra << lz; + UTy fra_bits_quo = + fra_uint_div(x_fra, y_fra, std::numeric_limits::digits - lzs + 2); + z_fra = fra_bits_quo >> 3; + int grs_bits = fra_bits_quo & 0x7; + int rb = __handling_rounding(z_sig, z_fra, grs_bits, rd); + if (rb != 0x0) { + z_fra++; + if (z_fra > __iml_fp_config::fra_mask) { + z_exp++; + z_fra = 0x0; + } + } + return __builtin_bit_cast( + Ty, (z_sig << (sizeof(Ty) * 8 - 1)) | + (z_exp << (std::numeric_limits::digits - 1)) | z_fra); + } + } +} #endif diff --git a/libdevice/imf_utils/fp32_round.cpp b/libdevice/imf_utils/fp32_round.cpp index 1acb1d62fd6ba..10df70f3b13ed 100644 --- a/libdevice/imf_utils/fp32_round.cpp +++ b/libdevice/imf_utils/fp32_round.cpp @@ -68,4 +68,24 @@ DEVICE_EXTERN_C_INLINE float __devicelib_imf_fmul_rz(float x, float y) { return __fp_mul(x, y, __IML_RTZ); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fdiv_rd(float x, float y) { + return __fp_div(x, y, __IML_RTN); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fdiv_rn(float x, float y) { + return __fp_div(x, y, __IML_RTE); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fdiv_ru(float x, float y) { + return __fp_div(x, y, __IML_RTP); +} + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fdiv_rz(float x, float y) { + return __fp_div(x, y, __IML_RTZ); +} #endif diff --git a/libdevice/imf_utils/fp64_round.cpp b/libdevice/imf_utils/fp64_round.cpp index 7a3743df6cd06..ff3e3ac6babcd 100644 --- a/libdevice/imf_utils/fp64_round.cpp +++ b/libdevice/imf_utils/fp64_round.cpp @@ -68,4 +68,24 @@ DEVICE_EXTERN_C_INLINE double __devicelib_imf_dmul_rz(double x, double y) { return __fp_mul(x, y, __IML_RTZ); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ddiv_rd(double x, double y) { + return __fp_div(x, y, __IML_RTN); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ddiv_rn(double x, double y) { + return __fp_div(x, y, __IML_RTE); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ddiv_ru(double x, double y) { + return __fp_div(x, y, __IML_RTP); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ddiv_rz(double x, double y) { + return __fp_div(x, y, __IML_RTZ); +} #endif diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index c2002d93af073..e01b518af43fc 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -1924,4 +1924,28 @@ float __devicelib_imf_fmul_rz(float, float); DEVICE_EXTERN_C_INLINE float __imf_fmul_rz(float x, float y) { return __devicelib_imf_fmul_rz(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fdiv_rd(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fdiv_rd(float x, float y) { return __devicelib_imf_fdiv_rd(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fdiv_rn(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fdiv_rn(float x, float y) { return __devicelib_imf_fdiv_rn(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fdiv_ru(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fdiv_ru(float x, float y) { return __devicelib_imf_fdiv_ru(x, y); } + +DEVICE_EXTERN_C_INLINE +float __devicelib_imf_fdiv_rz(float, float); + +DEVICE_EXTERN_C_INLINE +float __imf_fdiv_rz(float x, float y) { return __devicelib_imf_fdiv_rz(x, y); } #endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/imf_wrapper_fp64.cpp b/libdevice/imf_wrapper_fp64.cpp index 596ad69cf5eea..ee75e2c10ea50 100644 --- a/libdevice/imf_wrapper_fp64.cpp +++ b/libdevice/imf_wrapper_fp64.cpp @@ -473,4 +473,36 @@ DEVICE_EXTERN_C_INLINE double __imf_dmul_rz(double x, double y) { return __devicelib_imf_dmul_rz(x, y); } + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ddiv_rd(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_ddiv_rd(double x, double y) { + return __devicelib_imf_ddiv_rd(x, y); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ddiv_rn(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_ddiv_rn(double x, double y) { + return __devicelib_imf_ddiv_rn(x, y); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ddiv_ru(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_ddiv_ru(double x, double y) { + return __devicelib_imf_ddiv_ru(x, y); +} + +DEVICE_EXTERN_C_INLINE +double __devicelib_imf_ddiv_rz(double, double); + +DEVICE_EXTERN_C_INLINE +double __imf_ddiv_rz(double x, double y) { + return __devicelib_imf_ddiv_rz(x, y); +} #endif // __LIBDEVICE_IMF_ENABLED__ diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp index 3d389497363e4..1f7a961b012d1 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp @@ -235,6 +235,10 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_imf_fmul_rn", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_fmul_ru", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_fmul_rz", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fdiv_rd", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fdiv_rn", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fdiv_ru", DeviceLibExt::cl_intel_devicelib_imf}, + {"__devicelib_imf_fdiv_rz", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf}, {"__devicelib_imf_float2int_ru", DeviceLibExt::cl_intel_devicelib_imf}, @@ -452,6 +456,10 @@ SYCLDeviceLibFuncMap SDLMap = { {"__devicelib_imf_dmul_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_dmul_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_dmul_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ddiv_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ddiv_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ddiv_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64}, + {"__devicelib_imf_ddiv_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_double2float_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64}, {"__devicelib_imf_double2float_rn", diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp index f4610c817fe55..87788b73a4420 100644 --- a/sycl/include/sycl/builtins.hpp +++ b/sycl/include/sycl/builtins.hpp @@ -103,6 +103,10 @@ extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_rd(float x, float y); extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_rn(float x, float y); extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_ru(float x, float y); extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_rz(float x, float y); +extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rd(float x, float y); +extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rn(float x, float y); +extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_ru(float x, float y); +extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rz(float x, float y); extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rd(float x); extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rn(float x); extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_ru(float x); @@ -328,6 +332,10 @@ extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_rd(double x, double y); extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_rn(double x, double y); extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_ru(double x, double y); extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_rz(double x, double y); +extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rd(double x, double y); +extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rn(double x, double y); +extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_ru(double x, double y); +extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rz(double x, double y); extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rd(double x); extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rn(double x); extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_ru(double x); diff --git a/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp b/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp index bcdc39773bfd7..435de7c895b19 100644 --- a/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp +++ b/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp @@ -23,6 +23,10 @@ float __imf_fmul_rz(float, float); float __imf_fmul_rn(float, float); float __imf_fmul_ru(float, float); float __imf_fmul_rd(float, float); +float __imf_fdiv_rz(float, float); +float __imf_fdiv_rn(float, float); +float __imf_fdiv_ru(float, float); +float __imf_fdiv_rd(float, float); double __imf_dadd_rz(double, double); double __imf_dadd_rn(double, double); @@ -36,6 +40,10 @@ double __imf_dmul_rz(double, double); double __imf_dmul_rn(double, double); double __imf_dmul_ru(double, double); double __imf_dmul_rd(double, double); +double __imf_ddiv_rz(double, double); +double __imf_ddiv_rn(double, double); +double __imf_ddiv_ru(double, double); +double __imf_ddiv_rd(double, double); }; namespace sycl { @@ -90,6 +98,22 @@ template Tp fmul_rz(Tp x, Tp y) { return __imf_fmul_rz(x, y); } +template Tp fdiv_rd(Tp x, Tp y) { + return __imf_fdiv_rd(x, y); +} + +template Tp fdiv_rn(Tp x, Tp y) { + return __imf_fdiv_rn(x, y); +} + +template Tp fdiv_ru(Tp x, Tp y) { + return __imf_fdiv_ru(x, y); +} + +template Tp fdiv_rz(Tp x, Tp y) { + return __imf_fdiv_rz(x, y); +} + template Tp dadd_rd(Tp x, Tp y) { return __imf_dadd_rd(x, y); } @@ -137,6 +161,22 @@ template Tp dmul_ru(Tp x, Tp y) { template Tp dmul_rz(Tp x, Tp y) { return __imf_dmul_rz(x, y); } + +template Tp ddiv_rd(Tp x, Tp y) { + return __imf_ddiv_rd(x, y); +} + +template Tp ddiv_rn(Tp x, Tp y) { + return __imf_ddiv_rn(x, y); +} + +template Tp ddiv_ru(Tp x, Tp y) { + return __imf_ddiv_ru(x, y); +} + +template Tp ddiv_rz(Tp x, Tp y) { + return __imf_ddiv_rz(x, y); +} } // namespace ext::intel::math } // namespace _V1 } // namespace sycl diff --git a/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp b/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp index 9da54ef0aa00d..0c8872713d656 100644 --- a/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp @@ -94,5 +94,32 @@ int main(int, char **) { std::cout << "sycl::ext::intel::math::fmul_rz passes." << std::endl; } + { + std::initializer_list input_vals1 = {0x1p-1, 0x1.8bd054p+6, + 0x1.fcd686p+0, -0x1.7f9abp+3}; + std::initializer_list input_vals2 = {-0x1.a8p+2, -0x1.674a3cp+5, + 0x1.f3d6aep+10, 0x1.d6bf48p+10}; + std::initializer_list ref_vals_rd = {0xbd9a90e8, 0xc00d030d, + 0x3a824df9, 0xbbd09c3a}; + std::initializer_list ref_vals_rn = {0xbd9a90e8, 0xc00d030c, + 0x3a824df9, 0xbbd09c39}; + std::initializer_list ref_vals_ru = {0xbd9a90e7, 0xc00d030c, + 0x3a824dfa, 0xbbd09c39}; + std::initializer_list ref_vals_rz = {0xbd9a90e7, 0xc00d030c, + 0x3a824df9, 0xbbd09c39}; + test2(device_queue, input_vals1, input_vals2, ref_vals_rd, + F2T(unsigned, sycl::ext::intel::math::fdiv_rd)); + std::cout << "sycl::ext::intel::math::fdiv_rd passes." << std::endl; + test2(device_queue, input_vals1, input_vals2, ref_vals_rn, + F2T(unsigned, sycl::ext::intel::math::fdiv_rn)); + std::cout << "sycl::ext::intel::math::fdiv_rn passes." << std::endl; + test2(device_queue, input_vals1, input_vals2, ref_vals_ru, + F2T(unsigned, sycl::ext::intel::math::fdiv_ru)); + std::cout << "sycl::ext::intel::math::fdiv_ru passes." << std::endl; + test2(device_queue, input_vals1, input_vals2, ref_vals_rz, + F2T(unsigned, sycl::ext::intel::math::fdiv_rz)); + std::cout << "sycl::ext::intel::math::fdiv_rz passes." << std::endl; + } + return 0; } diff --git a/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp b/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp index 18ae31af4c915..a23d1b30c35c4 100644 --- a/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp +++ b/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp @@ -113,5 +113,38 @@ int main(int, char **) { std::cout << "sycl::ext::intel::math::dmul_rz passes." << std::endl; } + { + std::initializer_list input_vals1 = { + 0x1.5ef3da7bf609ap+4, 0x1.fbd37afb0f8edp-1, 0x1.9238e38e38e35p+6, + 0x1.7p+3}; + std::initializer_list input_vals2 = { + -0x1.bc7db6de6d33fp+9, 0x1.2f638fa4e71a6p+10, 0x1.08e38e38e38e3p+4, + -0x1.94p+3}; + std::initializer_list ref_vals_rd = { + 0xbf994414312c26ab, 0x3f4ac811fc63acd9, 0x40184b98e9aa180a, + 0xbfed260511be1959}; + std::initializer_list ref_vals_rn = { + 0xbf994414312c26ab, 0x3f4ac811fc63acd9, 0x40184b98e9aa180b, + 0xbfed260511be1959}; + std::initializer_list ref_vals_ru = { + 0xbf994414312c26aa, 0x3f4ac811fc63acda, 0x40184b98e9aa180b, + 0xbfed260511be1958}; + std::initializer_list ref_vals_rz = { + 0xbf994414312c26aa, 0x3f4ac811fc63acd9, 0x40184b98e9aa180a, + 0xbfed260511be1958}; + test2(device_queue, input_vals1, input_vals2, ref_vals_rd, + F2T(unsigned long long, sycl::ext::intel::math::ddiv_rd)); + std::cout << "sycl::ext::intel::math::ddiv_rd passes." << std::endl; + test2(device_queue, input_vals1, input_vals2, ref_vals_rn, + F2T(unsigned long long, sycl::ext::intel::math::ddiv_rn)); + std::cout << "sycl::ext::intel::math::ddiv_rn passes." << std::endl; + test2(device_queue, input_vals1, input_vals2, ref_vals_ru, + F2T(unsigned long long, sycl::ext::intel::math::ddiv_ru)); + std::cout << "sycl::ext::intel::math::ddiv_ru passes." << std::endl; + test2(device_queue, input_vals1, input_vals2, ref_vals_rz, + F2T(unsigned long long, sycl::ext::intel::math::ddiv_rz)); + std::cout << "sycl::ext::intel::math::ddiv_rz passes." << std::endl; + } + return 0; }