diff --git a/libdevice/imf_rounding_op.hpp b/libdevice/imf_rounding_op.hpp
index 2edd99ed10c07..8c5fdf0d88f9d 100644
--- a/libdevice/imf_rounding_op.hpp
+++ b/libdevice/imf_rounding_op.hpp
@@ -627,4 +627,236 @@ template <typename Ty> Ty __fp_mul(Ty x, Ty y, int rd) {
       Ty, (z_sig << (sizeof(Ty) * 8 - 1)) |
               (z_exp << (std::numeric_limits<Ty>::digits - 1)) | z_fra);
 }
+
+template <typename UTy> static UTy fra_uint_div(UTy x, UTy y, unsigned nbits) {
+  UTy res = 0;
+  unsigned iters = 0;
+  if (x == 0)
+    return 0x0;
+  while (iters < nbits) {
+    res = res << 1;
+    x = x << 1;
+    if (x > y) {
+      x = x - y;
+      res = res | 0x1;
+    } else if (x == y) {
+      res = res | 0x1;
+      res = res << (nbits - iters - 1);
+      return res;
+    } else {
+    }
+    iters++;
+  }
+  res = res | 0x1;
+  return res;
+}
+
+template <typename Ty> Ty __fp_div(Ty x, Ty y, int rd) {
+  typedef typename __iml_fp_config<Ty>::utype UTy;
+  typedef typename __iml_fp_config<Ty>::stype STy;
+  UTy x_bit = __builtin_bit_cast(UTy, x);
+  UTy y_bit = __builtin_bit_cast(UTy, y);
+  UTy x_exp = (x_bit >> (std::numeric_limits<Ty>::digits - 1)) &
+              __iml_fp_config<Ty>::exp_mask;
+  UTy y_exp = (y_bit >> (std::numeric_limits<Ty>::digits - 1)) &
+              __iml_fp_config<Ty>::exp_mask;
+  UTy x_fra = x_bit & __iml_fp_config<Ty>::fra_mask;
+  UTy y_fra = y_bit & __iml_fp_config<Ty>::fra_mask;
+  UTy x_sig = x_bit >> ((sizeof(Ty) * 8) - 1);
+  UTy y_sig = y_bit >> ((sizeof(Ty) * 8) - 1);
+  UTy z_sig = x_sig ^ y_sig;
+  UTy z_exp = 0x0, z_fra = 0x0;
+  const UTy one_bits = 0x1;
+  const UTy sig_off_mask = (one_bits << (sizeof(UTy) * 8 - 1)) - 1;
+
+  if (((x_exp == __iml_fp_config<Ty>::exp_mask) && (x_fra != 0x0)) ||
+      ((y_exp == __iml_fp_config<Ty>::exp_mask) && (y_fra != 0x0)) ||
+      ((y_bit & sig_off_mask) == 0x0)) {
+    UTy tmp = __iml_fp_config<Ty>::nan_bits;
+    return __builtin_bit_cast(Ty, tmp);
+  }
+
+  if ((x_exp == __iml_fp_config<Ty>::exp_mask) && (x_fra == 0x0)) {
+    if ((y_exp == __iml_fp_config<Ty>::exp_mask) && (y_fra == 0x0)) {
+      UTy tmp = __iml_fp_config<Ty>::nan_bits;
+      return __builtin_bit_cast(Ty, tmp);
+    } else {
+      UTy tmp =
+          (z_sig << (sizeof(Ty) * 8 - 1)) | __iml_fp_config<Ty>::pos_inf_bits;
+      return __builtin_bit_cast(Ty, tmp);
+    }
+  }
+
+  if ((x_bit & sig_off_mask) == 0x0)
+    return __builtin_bit_cast(Ty, (z_sig << (sizeof(UTy) * 8 - 1)) | 0x0);
+
+  if ((y_exp == __iml_fp_config<Ty>::exp_mask) && (y_fra == 0x0))
+    return __builtin_bit_cast(Ty, (z_sig << (sizeof(UTy) * 8 - 1)) | 0x0);
+
+  int sx_exp = x_exp, sy_exp = y_exp;
+  sx_exp = (sx_exp == 0) ? (1 - __iml_fp_config<Ty>::bias)
+                         : (sx_exp - __iml_fp_config<Ty>::bias);
+  sy_exp = (sy_exp == 0) ? (1 - __iml_fp_config<Ty>::bias)
+                         : (sy_exp - __iml_fp_config<Ty>::bias);
+  int exp_diff = sx_exp - sy_exp;
+  if (x_exp != 0x0)
+    x_fra = (one_bits << (std::numeric_limits<Ty>::digits - 1)) | x_fra;
+  if (y_exp != 0x0)
+    y_fra = (one_bits << (std::numeric_limits<Ty>::digits - 1)) | y_fra;
+
+  if (x_fra >= y_fra) {
+    // x_fra / y_fra max value for fp32 is 0xFFFFFF when x is normal
+    // and y is subnormal, so msb_pos max value is 23
+    UTy tmp = x_fra / y_fra;
+    UTy fra_rem = x_fra - y_fra * tmp;
+    int msb_pos = get_msb_pos(tmp);
+    int tmp2 = exp_diff + msb_pos;
+    if (tmp2 > __iml_fp_config<Ty>::bias)
+      return __handling_fp_overflow<Ty>(z_sig, rd);
+
+    if (tmp2 >= (1 - __iml_fp_config<Ty>::bias)) {
+      // Fall into normal floating point range
+      z_exp = tmp2 + __iml_fp_config<Ty>::bias;
+      // For fp32, starting msb_pos bits in fra comes from tmp and we need
+      // 23 - msb_pos( + grs) more bits from fraction division.
+      z_fra = ((one_bits << msb_pos) - 1) & tmp;
+      z_fra = z_fra << ((std::numeric_limits<Ty>::digits - 1) - msb_pos);
+      UTy fra_bits_quo = fra_uint_div(
+          fra_rem, y_fra, std::numeric_limits<Ty>::digits - msb_pos + 2);
+      z_fra = z_fra | (fra_bits_quo >> 3);
+      int rb = __handling_rounding(z_sig, z_fra, fra_bits_quo & 0x7, rd);
+      if (rb != 0) {
+        z_fra++;
+        if (z_fra > __iml_fp_config<Ty>::fra_mask) {
+          z_exp++;
+          if (z_exp == __iml_fp_config<Ty>::exp_mask)
+            return __handling_fp_overflow<Ty>(z_sig, rd);
+        }
+      }
+      return __builtin_bit_cast(
+          Ty, (z_sig << (sizeof(Ty) * 8 - 1)) |
+                  (z_exp << (std::numeric_limits<Ty>::digits - 1)) | z_fra);
+    }
+
+    // orignal value can be represented as (0.1xxxx.... * 2^tmp2)
+    // which is equivalent to 0.00000...1xxxxx * 2^(-126)
+    tmp2 = tmp2 + 1;
+    if ((tmp2 + std::numeric_limits<Ty>::digits - 1) <=
+        (1 - __iml_fp_config<Ty>::bias)) {
+      bool above_half = false;
+      if ((tmp2 + std::numeric_limits<Ty>::digits - 1) ==
+          (1 - __iml_fp_config<Ty>::bias))
+        above_half =
+            !((x_fra == y_fra * tmp) && (tmp == (one_bits << msb_pos)));
+      return __handling_fp_underflow<Ty, UTy>(z_sig, rd, above_half);
+    } else {
+      int rb;
+      // Fall into subnormal floating point range. For fp32, there are -126 -
+      // tmp2 leading zeros in final fra and we need get 23 + 126 + tmp2( + grs)
+      // bits from fraction division.
+      if (msb_pos >= (std::numeric_limits<Ty>::digits +
+                      __iml_fp_config<Ty>::bias + tmp2)) {
+        unsigned fra_discard_bits = msb_pos + 3 - __iml_fp_config<Ty>::bias -
+                                    std::numeric_limits<Ty>::digits - tmp2;
+        z_fra = tmp >> fra_discard_bits;
+        int grs_bits = (tmp >> (fra_discard_bits - 3)) & 0x7;
+        if ((grs_bits & 0x1) == 0x0) {
+          if ((tmp & ((0x1 << (fra_discard_bits - 3)) - 0x1)) || (fra_rem != 0))
+            grs_bits = grs_bits | 0x1;
+        }
+        rb = __handling_rounding(z_sig, z_fra, grs_bits, rd);
+      } else {
+        // For fp32, we need to get (23 + 126 + tmp2 + 3) - (msb_pos + 1) bits
+        // from fra division and the last bit is sticky bit.
+        z_fra = tmp;
+        unsigned fra_get_bits = std::numeric_limits<Ty>::digits +
+                                __iml_fp_config<Ty>::bias + tmp2 - msb_pos;
+        z_fra = z_fra << fra_get_bits;
+        UTy fra_bits_quo = fra_uint_div(fra_rem, y_fra, fra_get_bits);
+        z_fra = z_fra | fra_bits_quo;
+        int grs_bits = z_fra & 0x7;
+        z_fra = z_fra >> 3;
+        rb = __handling_rounding(z_sig, z_fra, grs_bits, rd);
+      }
+      if (rb != 0) {
+        z_fra++;
+        if (z_fra > __iml_fp_config<Ty>::fra_mask) {
+          z_exp++;
+          z_fra = 0x0;
+        }
+      }
+      return __builtin_bit_cast(
+          Ty, (z_sig << (sizeof(Ty) * 8 - 1)) |
+                  (z_exp << (std::numeric_limits<Ty>::digits - 1)) | z_fra);
+    }
+  } else {
+    // x_fra < y_fra, the final result can be represented as
+    // (2^exp_diff) * 0.000...01xxxxx
+    unsigned lz = 0;
+    UTy x_tmp = x_fra;
+    x_tmp = x_tmp << 1;
+    while (x_tmp < y_fra) {
+      lz++;
+      x_tmp = x_tmp << 1;
+    }
+    // x_fra < y_fra, the final result can be represented as
+    // (2^exp_diff) * 0.000...01xxxxx... which is equivalent to
+    // 2 ^ (exp_diff - lz - 1) * 1.xxxxx...
+    int nor_exp = exp_diff - lz - 1;
+    if (nor_exp > __iml_fp_config<Ty>::bias)
+      return __handling_fp_overflow<Ty>(z_sig, rd);
+
+    if (nor_exp >= (1 - __iml_fp_config<Ty>::bias)) {
+      z_exp = nor_exp + __iml_fp_config<Ty>::bias;
+      x_fra = x_fra << lz;
+      UTy fra_bits_quo =
+          fra_uint_div(x_fra, y_fra, 3 + std::numeric_limits<Ty>::digits);
+      z_fra = (fra_bits_quo >> 3) & __iml_fp_config<Ty>::fra_mask;
+      int grs_bits = fra_bits_quo & 0x7;
+      int rb = __handling_rounding(z_sig, z_fra, grs_bits, rd);
+      if (rb != 0x0) {
+        z_fra++;
+        if (z_fra > __iml_fp_config<Ty>::fra_mask) {
+          z_exp++;
+          z_fra = 0x0;
+          if (z_exp == __iml_fp_config<Ty>::exp_mask)
+            return __handling_fp_overflow<Ty>(z_sig, rd);
+        }
+      }
+      return __builtin_bit_cast(
+          Ty, (z_sig << (sizeof(Ty) * 8 - 1)) |
+                  (z_exp << (std::numeric_limits<Ty>::digits - 1)) | z_fra);
+    }
+
+    // Fall into subnormal range or underflow happens. For fp32,
+    // nor_exp < -126, so (-126 - exp_diff + lz + 1) > 0 which means
+    // (lz - exp_diff - 126) >= 0
+    unsigned lzs = lz - __iml_fp_config<Ty>::bias - exp_diff + 1;
+    if (lzs >= (std::numeric_limits<Ty>::digits - 1)) {
+      bool above_half = false;
+      if (lzs == (std::numeric_limits<Ty>::digits - 1)) {
+        if ((x_fra << (lz + 1)) > y_fra)
+          above_half = true;
+      }
+      return __handling_fp_underflow<Ty>(z_sig, rd, above_half);
+    } else {
+      x_fra = x_fra << lz;
+      UTy fra_bits_quo =
+          fra_uint_div(x_fra, y_fra, std::numeric_limits<Ty>::digits - lzs + 2);
+      z_fra = fra_bits_quo >> 3;
+      int grs_bits = fra_bits_quo & 0x7;
+      int rb = __handling_rounding(z_sig, z_fra, grs_bits, rd);
+      if (rb != 0x0) {
+        z_fra++;
+        if (z_fra > __iml_fp_config<Ty>::fra_mask) {
+          z_exp++;
+          z_fra = 0x0;
+        }
+      }
+      return __builtin_bit_cast(
+          Ty, (z_sig << (sizeof(Ty) * 8 - 1)) |
+                  (z_exp << (std::numeric_limits<Ty>::digits - 1)) | z_fra);
+    }
+  }
+}
 #endif
diff --git a/libdevice/imf_utils/fp32_round.cpp b/libdevice/imf_utils/fp32_round.cpp
index 1acb1d62fd6ba..10df70f3b13ed 100644
--- a/libdevice/imf_utils/fp32_round.cpp
+++ b/libdevice/imf_utils/fp32_round.cpp
@@ -68,4 +68,24 @@ DEVICE_EXTERN_C_INLINE
 float __devicelib_imf_fmul_rz(float x, float y) {
   return __fp_mul(x, y, __IML_RTZ);
 }
+
+DEVICE_EXTERN_C_INLINE
+float __devicelib_imf_fdiv_rd(float x, float y) {
+  return __fp_div(x, y, __IML_RTN);
+}
+
+DEVICE_EXTERN_C_INLINE
+float __devicelib_imf_fdiv_rn(float x, float y) {
+  return __fp_div(x, y, __IML_RTE);
+}
+
+DEVICE_EXTERN_C_INLINE
+float __devicelib_imf_fdiv_ru(float x, float y) {
+  return __fp_div(x, y, __IML_RTP);
+}
+
+DEVICE_EXTERN_C_INLINE
+float __devicelib_imf_fdiv_rz(float x, float y) {
+  return __fp_div(x, y, __IML_RTZ);
+}
 #endif
diff --git a/libdevice/imf_utils/fp64_round.cpp b/libdevice/imf_utils/fp64_round.cpp
index 7a3743df6cd06..ff3e3ac6babcd 100644
--- a/libdevice/imf_utils/fp64_round.cpp
+++ b/libdevice/imf_utils/fp64_round.cpp
@@ -68,4 +68,24 @@ DEVICE_EXTERN_C_INLINE
 double __devicelib_imf_dmul_rz(double x, double y) {
   return __fp_mul(x, y, __IML_RTZ);
 }
+
+DEVICE_EXTERN_C_INLINE
+double __devicelib_imf_ddiv_rd(double x, double y) {
+  return __fp_div(x, y, __IML_RTN);
+}
+
+DEVICE_EXTERN_C_INLINE
+double __devicelib_imf_ddiv_rn(double x, double y) {
+  return __fp_div(x, y, __IML_RTE);
+}
+
+DEVICE_EXTERN_C_INLINE
+double __devicelib_imf_ddiv_ru(double x, double y) {
+  return __fp_div(x, y, __IML_RTP);
+}
+
+DEVICE_EXTERN_C_INLINE
+double __devicelib_imf_ddiv_rz(double x, double y) {
+  return __fp_div(x, y, __IML_RTZ);
+}
 #endif
diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp
index c2002d93af073..e01b518af43fc 100644
--- a/libdevice/imf_wrapper.cpp
+++ b/libdevice/imf_wrapper.cpp
@@ -1924,4 +1924,28 @@ float __devicelib_imf_fmul_rz(float, float);
 
 DEVICE_EXTERN_C_INLINE
 float __imf_fmul_rz(float x, float y) { return __devicelib_imf_fmul_rz(x, y); }
+
+DEVICE_EXTERN_C_INLINE
+float __devicelib_imf_fdiv_rd(float, float);
+
+DEVICE_EXTERN_C_INLINE
+float __imf_fdiv_rd(float x, float y) { return __devicelib_imf_fdiv_rd(x, y); }
+
+DEVICE_EXTERN_C_INLINE
+float __devicelib_imf_fdiv_rn(float, float);
+
+DEVICE_EXTERN_C_INLINE
+float __imf_fdiv_rn(float x, float y) { return __devicelib_imf_fdiv_rn(x, y); }
+
+DEVICE_EXTERN_C_INLINE
+float __devicelib_imf_fdiv_ru(float, float);
+
+DEVICE_EXTERN_C_INLINE
+float __imf_fdiv_ru(float x, float y) { return __devicelib_imf_fdiv_ru(x, y); }
+
+DEVICE_EXTERN_C_INLINE
+float __devicelib_imf_fdiv_rz(float, float);
+
+DEVICE_EXTERN_C_INLINE
+float __imf_fdiv_rz(float x, float y) { return __devicelib_imf_fdiv_rz(x, y); }
 #endif // __LIBDEVICE_IMF_ENABLED__
diff --git a/libdevice/imf_wrapper_fp64.cpp b/libdevice/imf_wrapper_fp64.cpp
index 596ad69cf5eea..ee75e2c10ea50 100644
--- a/libdevice/imf_wrapper_fp64.cpp
+++ b/libdevice/imf_wrapper_fp64.cpp
@@ -473,4 +473,36 @@ DEVICE_EXTERN_C_INLINE
 double __imf_dmul_rz(double x, double y) {
   return __devicelib_imf_dmul_rz(x, y);
 }
+
+DEVICE_EXTERN_C_INLINE
+double __devicelib_imf_ddiv_rd(double, double);
+
+DEVICE_EXTERN_C_INLINE
+double __imf_ddiv_rd(double x, double y) {
+  return __devicelib_imf_ddiv_rd(x, y);
+}
+
+DEVICE_EXTERN_C_INLINE
+double __devicelib_imf_ddiv_rn(double, double);
+
+DEVICE_EXTERN_C_INLINE
+double __imf_ddiv_rn(double x, double y) {
+  return __devicelib_imf_ddiv_rn(x, y);
+}
+
+DEVICE_EXTERN_C_INLINE
+double __devicelib_imf_ddiv_ru(double, double);
+
+DEVICE_EXTERN_C_INLINE
+double __imf_ddiv_ru(double x, double y) {
+  return __devicelib_imf_ddiv_ru(x, y);
+}
+
+DEVICE_EXTERN_C_INLINE
+double __devicelib_imf_ddiv_rz(double, double);
+
+DEVICE_EXTERN_C_INLINE
+double __imf_ddiv_rz(double x, double y) {
+  return __devicelib_imf_ddiv_rz(x, y);
+}
 #endif // __LIBDEVICE_IMF_ENABLED__
diff --git a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp
index 3d389497363e4..1f7a961b012d1 100644
--- a/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp
+++ b/llvm/tools/sycl-post-link/SYCLDeviceLibReqMask.cpp
@@ -235,6 +235,10 @@ SYCLDeviceLibFuncMap SDLMap = {
     {"__devicelib_imf_fmul_rn", DeviceLibExt::cl_intel_devicelib_imf},
     {"__devicelib_imf_fmul_ru", DeviceLibExt::cl_intel_devicelib_imf},
     {"__devicelib_imf_fmul_rz", DeviceLibExt::cl_intel_devicelib_imf},
+    {"__devicelib_imf_fdiv_rd", DeviceLibExt::cl_intel_devicelib_imf},
+    {"__devicelib_imf_fdiv_rn", DeviceLibExt::cl_intel_devicelib_imf},
+    {"__devicelib_imf_fdiv_ru", DeviceLibExt::cl_intel_devicelib_imf},
+    {"__devicelib_imf_fdiv_rz", DeviceLibExt::cl_intel_devicelib_imf},
     {"__devicelib_imf_float2int_rd", DeviceLibExt::cl_intel_devicelib_imf},
     {"__devicelib_imf_float2int_rn", DeviceLibExt::cl_intel_devicelib_imf},
     {"__devicelib_imf_float2int_ru", DeviceLibExt::cl_intel_devicelib_imf},
@@ -452,6 +456,10 @@ SYCLDeviceLibFuncMap SDLMap = {
     {"__devicelib_imf_dmul_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64},
     {"__devicelib_imf_dmul_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64},
     {"__devicelib_imf_dmul_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64},
+    {"__devicelib_imf_ddiv_rd", DeviceLibExt::cl_intel_devicelib_imf_fp64},
+    {"__devicelib_imf_ddiv_rn", DeviceLibExt::cl_intel_devicelib_imf_fp64},
+    {"__devicelib_imf_ddiv_ru", DeviceLibExt::cl_intel_devicelib_imf_fp64},
+    {"__devicelib_imf_ddiv_rz", DeviceLibExt::cl_intel_devicelib_imf_fp64},
     {"__devicelib_imf_double2float_rd",
      DeviceLibExt::cl_intel_devicelib_imf_fp64},
     {"__devicelib_imf_double2float_rn",
diff --git a/sycl/include/sycl/builtins.hpp b/sycl/include/sycl/builtins.hpp
index f4610c817fe55..87788b73a4420 100644
--- a/sycl/include/sycl/builtins.hpp
+++ b/sycl/include/sycl/builtins.hpp
@@ -103,6 +103,10 @@ extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_rd(float x, float y);
 extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_rn(float x, float y);
 extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_ru(float x, float y);
 extern __DPCPP_SYCL_EXTERNAL float __imf_fmul_rz(float x, float y);
+extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rd(float x, float y);
+extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rn(float x, float y);
+extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_ru(float x, float y);
+extern __DPCPP_SYCL_EXTERNAL float __imf_fdiv_rz(float x, float y);
 extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rd(float x);
 extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_rn(float x);
 extern __DPCPP_SYCL_EXTERNAL int __imf_float2int_ru(float x);
@@ -328,6 +332,10 @@ extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_rd(double x, double y);
 extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_rn(double x, double y);
 extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_ru(double x, double y);
 extern __DPCPP_SYCL_EXTERNAL double __imf_dmul_rz(double x, double y);
+extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rd(double x, double y);
+extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rn(double x, double y);
+extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_ru(double x, double y);
+extern __DPCPP_SYCL_EXTERNAL double __imf_ddiv_rz(double x, double y);
 extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rd(double x);
 extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_rn(double x);
 extern __DPCPP_SYCL_EXTERNAL float __imf_double2float_ru(double x);
diff --git a/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp b/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp
index bcdc39773bfd7..435de7c895b19 100644
--- a/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp
+++ b/sycl/include/sycl/ext/intel/math/imf_rounding_math.hpp
@@ -23,6 +23,10 @@ float __imf_fmul_rz(float, float);
 float __imf_fmul_rn(float, float);
 float __imf_fmul_ru(float, float);
 float __imf_fmul_rd(float, float);
+float __imf_fdiv_rz(float, float);
+float __imf_fdiv_rn(float, float);
+float __imf_fdiv_ru(float, float);
+float __imf_fdiv_rd(float, float);
 
 double __imf_dadd_rz(double, double);
 double __imf_dadd_rn(double, double);
@@ -36,6 +40,10 @@ double __imf_dmul_rz(double, double);
 double __imf_dmul_rn(double, double);
 double __imf_dmul_ru(double, double);
 double __imf_dmul_rd(double, double);
+double __imf_ddiv_rz(double, double);
+double __imf_ddiv_rn(double, double);
+double __imf_ddiv_ru(double, double);
+double __imf_ddiv_rd(double, double);
 };
 
 namespace sycl {
@@ -90,6 +98,22 @@ template <typename Tp = float> Tp fmul_rz(Tp x, Tp y) {
   return __imf_fmul_rz(x, y);
 }
 
+template <typename Tp = float> Tp fdiv_rd(Tp x, Tp y) {
+  return __imf_fdiv_rd(x, y);
+}
+
+template <typename Tp = float> Tp fdiv_rn(Tp x, Tp y) {
+  return __imf_fdiv_rn(x, y);
+}
+
+template <typename Tp = float> Tp fdiv_ru(Tp x, Tp y) {
+  return __imf_fdiv_ru(x, y);
+}
+
+template <typename Tp = float> Tp fdiv_rz(Tp x, Tp y) {
+  return __imf_fdiv_rz(x, y);
+}
+
 template <typename Tp = double> Tp dadd_rd(Tp x, Tp y) {
   return __imf_dadd_rd(x, y);
 }
@@ -137,6 +161,22 @@ template <typename Tp = double> Tp dmul_ru(Tp x, Tp y) {
 template <typename Tp = double> Tp dmul_rz(Tp x, Tp y) {
   return __imf_dmul_rz(x, y);
 }
+
+template <typename Tp = double> Tp ddiv_rd(Tp x, Tp y) {
+  return __imf_ddiv_rd(x, y);
+}
+
+template <typename Tp = double> Tp ddiv_rn(Tp x, Tp y) {
+  return __imf_ddiv_rn(x, y);
+}
+
+template <typename Tp = double> Tp ddiv_ru(Tp x, Tp y) {
+  return __imf_ddiv_ru(x, y);
+}
+
+template <typename Tp = double> Tp ddiv_rz(Tp x, Tp y) {
+  return __imf_ddiv_rz(x, y);
+}
 } // namespace ext::intel::math
 } // namespace _V1
 } // namespace sycl
diff --git a/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp b/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp
index 9da54ef0aa00d..0c8872713d656 100644
--- a/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp
+++ b/sycl/test-e2e/DeviceLib/imf_fp32_rounding_test.cpp
@@ -94,5 +94,32 @@ int main(int, char **) {
     std::cout << "sycl::ext::intel::math::fmul_rz passes." << std::endl;
   }
 
+  {
+    std::initializer_list<float> input_vals1 = {0x1p-1, 0x1.8bd054p+6,
+                                                0x1.fcd686p+0, -0x1.7f9abp+3};
+    std::initializer_list<float> input_vals2 = {-0x1.a8p+2, -0x1.674a3cp+5,
+                                                0x1.f3d6aep+10, 0x1.d6bf48p+10};
+    std::initializer_list<unsigned> ref_vals_rd = {0xbd9a90e8, 0xc00d030d,
+                                                   0x3a824df9, 0xbbd09c3a};
+    std::initializer_list<unsigned> ref_vals_rn = {0xbd9a90e8, 0xc00d030c,
+                                                   0x3a824df9, 0xbbd09c39};
+    std::initializer_list<unsigned> ref_vals_ru = {0xbd9a90e7, 0xc00d030c,
+                                                   0x3a824dfa, 0xbbd09c39};
+    std::initializer_list<unsigned> ref_vals_rz = {0xbd9a90e7, 0xc00d030c,
+                                                   0x3a824df9, 0xbbd09c39};
+    test2(device_queue, input_vals1, input_vals2, ref_vals_rd,
+          F2T(unsigned, sycl::ext::intel::math::fdiv_rd));
+    std::cout << "sycl::ext::intel::math::fdiv_rd passes." << std::endl;
+    test2(device_queue, input_vals1, input_vals2, ref_vals_rn,
+          F2T(unsigned, sycl::ext::intel::math::fdiv_rn));
+    std::cout << "sycl::ext::intel::math::fdiv_rn passes." << std::endl;
+    test2(device_queue, input_vals1, input_vals2, ref_vals_ru,
+          F2T(unsigned, sycl::ext::intel::math::fdiv_ru));
+    std::cout << "sycl::ext::intel::math::fdiv_ru passes." << std::endl;
+    test2(device_queue, input_vals1, input_vals2, ref_vals_rz,
+          F2T(unsigned, sycl::ext::intel::math::fdiv_rz));
+    std::cout << "sycl::ext::intel::math::fdiv_rz passes." << std::endl;
+  }
+
   return 0;
 }
diff --git a/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp b/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp
index 18ae31af4c915..a23d1b30c35c4 100644
--- a/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp
+++ b/sycl/test-e2e/DeviceLib/imf_fp64_rounding_test.cpp
@@ -113,5 +113,38 @@ int main(int, char **) {
     std::cout << "sycl::ext::intel::math::dmul_rz passes." << std::endl;
   }
 
+  {
+    std::initializer_list<double> input_vals1 = {
+        0x1.5ef3da7bf609ap+4, 0x1.fbd37afb0f8edp-1, 0x1.9238e38e38e35p+6,
+        0x1.7p+3};
+    std::initializer_list<double> input_vals2 = {
+        -0x1.bc7db6de6d33fp+9, 0x1.2f638fa4e71a6p+10, 0x1.08e38e38e38e3p+4,
+        -0x1.94p+3};
+    std::initializer_list<unsigned long long> ref_vals_rd = {
+        0xbf994414312c26ab, 0x3f4ac811fc63acd9, 0x40184b98e9aa180a,
+        0xbfed260511be1959};
+    std::initializer_list<unsigned long long> ref_vals_rn = {
+        0xbf994414312c26ab, 0x3f4ac811fc63acd9, 0x40184b98e9aa180b,
+        0xbfed260511be1959};
+    std::initializer_list<unsigned long long> ref_vals_ru = {
+        0xbf994414312c26aa, 0x3f4ac811fc63acda, 0x40184b98e9aa180b,
+        0xbfed260511be1958};
+    std::initializer_list<unsigned long long> ref_vals_rz = {
+        0xbf994414312c26aa, 0x3f4ac811fc63acd9, 0x40184b98e9aa180a,
+        0xbfed260511be1958};
+    test2(device_queue, input_vals1, input_vals2, ref_vals_rd,
+          F2T(unsigned long long, sycl::ext::intel::math::ddiv_rd));
+    std::cout << "sycl::ext::intel::math::ddiv_rd passes." << std::endl;
+    test2(device_queue, input_vals1, input_vals2, ref_vals_rn,
+          F2T(unsigned long long, sycl::ext::intel::math::ddiv_rn));
+    std::cout << "sycl::ext::intel::math::ddiv_rn passes." << std::endl;
+    test2(device_queue, input_vals1, input_vals2, ref_vals_ru,
+          F2T(unsigned long long, sycl::ext::intel::math::ddiv_ru));
+    std::cout << "sycl::ext::intel::math::ddiv_ru passes." << std::endl;
+    test2(device_queue, input_vals1, input_vals2, ref_vals_rz,
+          F2T(unsigned long long, sycl::ext::intel::math::ddiv_rz));
+    std::cout << "sycl::ext::intel::math::ddiv_rz passes." << std::endl;
+  }
+
   return 0;
 }