From ee6e9e8dc8435aa506c7297d78063d9f373a9664 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 28 Jun 2021 11:49:20 -0700 Subject: [PATCH 01/40] [SYCL][ESIMD][EMU] ESIMD_CPU Kernel launch and Emulated Intrinsics * This PR is for enabling kernel launching for ESIMD_CPU * Also contains emulated intrinsics for memory operations * esimd_cpu backend is loaded in SYCL runtime * Base PR : https://github.com/intel/llvm/pull/4011 --- sycl/CMakeLists.txt | 9 + sycl/include/CL/sycl/accessor.hpp | 5 + sycl/include/CL/sycl/detail/cg_types.hpp | 2 +- sycl/include/CL/sycl/handler.hpp | 101 ++- .../esimd/detail/atomic_intrin.hpp | 19 + .../esimd/detail/emu/esimd_emu_functions_v1.h | 0 .../detail/emu/esimdcpu_device_interface.hpp | 0 .../esimd/detail/memory_intrin.hpp | 817 ++++++++++++++++-- .../ext/intel/experimental/esimd/memory.hpp | 2 +- sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp | 3 +- sycl/source/detail/config.hpp | 9 +- sycl/source/detail/device_filter.cpp | 3 +- sycl/source/detail/pi.cpp | 13 + sycl/source/detail/scheduler/commands.cpp | 17 +- 14 files changed, 904 insertions(+), 96 deletions(-) create mode 100644 sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp rename sycl/include/{CL/sycl/INTEL => sycl/ext/intel/experimental}/esimd/detail/emu/esimd_emu_functions_v1.h (100%) rename sycl/include/{CL/sycl/INTEL => sycl/ext/intel/experimental}/esimd/detail/emu/esimdcpu_device_interface.hpp (100%) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 3801f7ce88321..c21cae8ecb65e 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -285,6 +285,15 @@ if(SYCL_BUILD_PI_ROCM) list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_rocm) endif() +if (SYCL_BUILD_PI_ESIMD_CPU) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-headers) + if (MSVC) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-libs libcmrt-dlls) + else() + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-sos) + endif() +endif() + # Use it as fake dependency in order to force another command(s) to execute. add_custom_command(OUTPUT __force_it COMMAND "${CMAKE_COMMAND}" -E echo diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index abae9c107b130..872c56813efb3 100755 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -671,6 +671,11 @@ class image_accessor AccessorBaseHost::getPtr() /*Ptr to Image*/); #endif } + +#ifndef __SYCL_DEVICE_ONLY__ +public: + void *get_pointer() const { return detail::AccessorBaseHost::getPtr(); } +#endif // __SYCL_DEVICE_ONLY__ }; template class HostKernel : public HostKernelBase { using IDBuilder = sycl::detail::Builder; - KernelType MKernel; public: + KernelType MKernel; HostKernel(KernelType Kernel) : MKernel(Kernel) {} void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override { // adjust ND range for serial host: diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index ad9b003c51a3f..e71d7770fd6b0 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -514,6 +514,98 @@ class __SYCL_EXPORT handler { return {z, y, x}; } + /* The kernel passed to StoreLambda can take an id, an item or an nd_item as + * its argument. Since esimd plugin directly invokes the kernel (doesn’t use + * piKernelSetArg), the kernel argument type must be known to the plugin. + * However, passing kernel argument type to the plugin requires changing ABI + * in HostKernel class. To overcome this problem, helpers below wrap the + * “original” kernel with a functor that always takes an nd_item as argument. + * A functor is used instead of a lambda because extractArgsAndReqsFromLambda + * needs access to the “original” kernel and keeps references to its internal + * data, i.e. the kernel passed as argument cannot be local in scope. The + * functor itself is again encapsulated in a std::function since functor’s + * type is unknown to the plugin. + */ + + template + KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { + NormalizedKernelType NormalizedKernel(KernelFunc); + auto NormalizedKernelFunc = + std::function &)>(NormalizedKernel); + auto HostKernelPtr = + new detail::HostKernel, Dims, KernelType>( + NormalizedKernelFunc); + MHostKernel.reset(HostKernelPtr); + return &HostKernelPtr->MKernel.template target() + ->MKernelFunc; + } + + template + typename std::enable_if>::value, + KernelType *>::type + ResetHostKernel(const KernelType &KernelFunc) { + struct NormalizedKernelType { + KernelType MKernelFunc; + NormalizedKernelType(const KernelType &KernelFunc) + : MKernelFunc(KernelFunc) {} + void operator()(const nd_item &Arg) { + MKernelFunc(Arg.get_global_id()); + } + }; + return ResetHostKernelHelper( + KernelFunc); + } + template + typename std::enable_if>::value, + KernelType *>::type + ResetHostKernel(const KernelType &KernelFunc) { + struct NormalizedKernelType { + KernelType MKernelFunc; + NormalizedKernelType(const KernelType &KernelFunc) + : MKernelFunc(KernelFunc) {} + void operator()(const nd_item &Arg) { MKernelFunc(Arg); } + }; + return ResetHostKernelHelper( + KernelFunc); + } + + template + typename std::enable_if>::value, + KernelType *>::type + ResetHostKernel(const KernelType &KernelFunc) { + struct NormalizedKernelType { + KernelType MKernelFunc; + NormalizedKernelType(const KernelType &KernelFunc) + : MKernelFunc(KernelFunc) {} + void operator()(const nd_item &Arg) { + sycl::item Item = detail::Builder::createItem( + Arg.get_global_range(), Arg.get_global_id()); + MKernelFunc(Item); + } + }; + return ResetHostKernelHelper( + KernelFunc); + } + + template + typename std::enable_if>::value, + KernelType *>::type + ResetHostKernel(const KernelType &KernelFunc) { + struct NormalizedKernelType { + KernelType MKernelFunc; + NormalizedKernelType(const KernelType &KernelFunc) + : MKernelFunc(KernelFunc) {} + void operator()(const nd_item &Arg) { + sycl::item Item = detail::Builder::createItem( + Arg.get_global_range(), Arg.get_global_id(), Arg.get_offset()); + MKernelFunc(Item); + } + }; + return ResetHostKernelHelper( + KernelFunc); + } + /// Stores lambda to the template-free object /// /// Also initializes kernel name, list of arguments and requirements using @@ -530,9 +622,8 @@ class __SYCL_EXPORT handler { "kernel_handler is not yet supported by host device.", PI_INVALID_OPERATION); } - MHostKernel.reset( - new detail::HostKernel( - KernelFunc)); + KernelType *KernelPtr = + ResetHostKernel(KernelFunc); using KI = sycl::detail::KernelInfo; // Empty name indicates that the compilation happens without integration @@ -540,8 +631,8 @@ class __SYCL_EXPORT handler { if (KI::getName() != nullptr && KI::getName()[0] != '\0') { // TODO support ESIMD in no-integration-header case too. MArgs.clear(); - extractArgsAndReqsFromLambda(MHostKernel->getPtr(), KI::getNumParams(), - &KI::getParamDesc(0), KI::isESIMD()); + extractArgsAndReqsFromLambda(reinterpret_cast(KernelPtr), + KI::getNumParams(), &KI::getParamDesc(0)); MKernelName = KI::getName(); MOSModuleHandle = detail::OSUtil::getOSModuleHandle(KI::getName()); } else { diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp new file mode 100644 index 0000000000000..21cb6ad5c0826 --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp @@ -0,0 +1,19 @@ +//==-------- atomic_intrin.hpp - Atomic intrinsic definition file ----------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include + +template Ty atomic_add_fetch(Ty *ptr, Ty val) { +#ifdef _WIN32 + // TODO: Windows will be supported soon + throw cl::sycl::feature_not_supported(); +#else + return __atomic_add_fetch(ptr, val, __ATOMIC_RELAXED); +#endif +} diff --git a/sycl/include/CL/sycl/INTEL/esimd/detail/emu/esimd_emu_functions_v1.h b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h similarity index 100% rename from sycl/include/CL/sycl/INTEL/esimd/detail/emu/esimd_emu_functions_v1.h rename to sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h diff --git a/sycl/include/CL/sycl/INTEL/esimd/detail/emu/esimdcpu_device_interface.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp similarity index 100% rename from sycl/include/CL/sycl/INTEL/esimd/detail/emu/esimdcpu_device_interface.hpp rename to sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 03ec0ab45b171..9225944df8581 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -521,6 +521,155 @@ __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, __SEIEED::vector_type_t msgSrc0); #ifndef __SYCL_DEVICE_ONLY__ +#define __SYCL_EXPLICIT_SIMD_PLUGIN__ + +// Header files required for accessing CM-managed memory - Surface, +// buffer, etc +namespace cm_support { +#include +} // namespace cm_support +#include "cmrt_if_defs.hpp" + +#include +#include +#include +#include + +namespace raw_send { + +enum class msgField : short { + OP, + VNNI, + ADDRSIZE, + DATASIZE, + VECTSIZE, + TRANSPOSE, + CACHE, + DSTLEN, + SRC0LEN, + ADDRTYPE +}; + +enum class msgOp : short { + DP_LOAD = 0x0, // scatter/vector load + LOAD_2D = 0x3, + DP_STORE = 0x4, // scatter/vector store + STORE_2D = 0x7, + OP_MAX = 0x3F +}; + +typedef struct _bitfields_ { + uint32_t offset; + uint32_t mask; +} bitfields; + +const bitfields BIT_FIELDS[10] = { + {0, 0x3F}, // OP / 6 bits + {7, 0x1}, // VNNI -> LOAD only + {7, 0x3}, // Address size + {9, 0x7}, // DATASIZE + {12, 0x7}, // VECTSIZE + {15, 0x1}, // TRANSPOSE -> LOAD only + {17, 0x7}, // CACHE + {20, 0x1F}, // DSTLEN + {25, 0xF}, // SRC0LEN, + {29, 0x3} // ADDRTYPE +}; +uint32_t inline getMsgField(uint32_t msg, msgField field) { + uint32_t idx = static_cast(field); + return ((msg >> BIT_FIELDS[idx].offset) & BIT_FIELDS[idx].mask); +} + +auto inline getMsgOp(uint32_t msg) { + msgOp ret; + ret = static_cast(getMsgField((uint32_t)msg, msgField::OP)); + return ret; +} + +template +uint64_t inline getSurfaceBaseAddr(__SEIEED::vector_type_t addrMsg) { + constexpr int sizeofT = sizeof(T); + uint64_t Ret = 0; + + if constexpr (sizeofT == 4) { + Ret = (uint64_t)addrMsg[1] << 32; + Ret |= (uint64_t)addrMsg[0]; + } else if constexpr (sizeofT == 8) { + Ret = addrMsg[0]; + } + + return Ret; +} + +template +uint64_t inline getLaneAddr(__SEIEED::vector_type_t addrMsg, + unsigned lane_id) { + // (matrix_ref addrMsg) + // vector_ref addr_ref = addrMsg.template select<1, 1, 2, 1>(0, 2 + // * lane_id).template format(); return addr_ref(0); + throw cl::sycl::feature_not_supported(); +} + +template +auto inline getSurfaceDim(__SEIEED::vector_type_t addrMsg) { + __SEIEED::vector_type_t Ret; + constexpr int sizeofT = sizeof(T); + + static_assert(sizeofT == 4, "Unsupported addrMsg format!!"); + + if constexpr (sizeofT == 4) { + for (int idx = 0; idx < 4; idx++) { + Ret[idx] = addrMsg[idx + 2]; + } + } + + return Ret; +} + +template +auto inline getBlockOffsets(__SEIEED::vector_type_t addrMsg) { + __SEIEED::vector_type_t Ret; + constexpr int sizeofT = sizeof(T); + + static_assert(sizeofT == 4, "Unsupported addrMsg format!!"); + + if constexpr (sizeofT == 4) { + for (int idx = 0; idx < 4; idx++) { + Ret[idx] = static_cast(addrMsg[idx + 5]); + } + } + + return Ret; +} + +template +auto inline getBlockDim(__SEIEED::vector_type_t addrMsg) { + __SEIEED::vector_type_t Ret; + constexpr int sizeofT = sizeof(T); + T RawValue = 0; + + static_assert(sizeofT == 4, "Unsupported addrMsg format!!"); + + if constexpr (sizeofT == 4) { + RawValue = addrMsg[7]; + Ret[0] = (unsigned char)(RawValue & 0xFF); // width + Ret[1] = (unsigned char)((RawValue >> 8) & 0xFF); // height + Ret[2] = (unsigned char)((RawValue >> 24) & 0xFF); // For ArrayLen + } + + assert(RawValue != 0); + + return Ret; +} + +template +auto inline getArrayLen(__SEIEED::vector_type_t addrMsg) { + auto blkDim = getBlockDim(addrMsg); + return (blkDim[2] >> 4); +} + +} // namespace raw_send + template inline __SEIEED::vector_type_t @@ -693,48 +842,105 @@ template inline __SEIEED::vector_type_t __esimd_media_block_load(unsigned modififer, TACC handle, unsigned plane, unsigned width, unsigned x, unsigned y) { - // On host the input surface is modeled as sycl image 2d object, - // and the read/write access is done through accessor, - // which is passed in as the handle argument. - auto range = __SEIEED::AccessorPrivateProxy::getImageRange(handle); - unsigned bpp = __SEIEED::AccessorPrivateProxy::getElemSize(handle); - unsigned vpp = bpp / sizeof(Ty); - unsigned int i = x / bpp; - unsigned int j = y; - - assert(x % bpp == 0); - unsigned int xbound = range[0] - 1; - unsigned int ybound = range[1] - 1; - __SEIEED::vector_type_t vals; - for (int row = 0; row < M; row++) { - for (int col = 0; col < N; col += vpp) { - unsigned int xoff = (i > xbound) ? xbound : i; - unsigned int yoff = (j > ybound) ? ybound : j; - auto coords = cl::sycl::cl_int2(xoff, yoff); - cl::sycl::cl_uint4 data = handle.read(coords); - - __SEIEED::vector_type_t res; - for (int idx = 0; idx < 4; idx++) { - res[idx] = data[idx]; + + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + _pi_image *img = + static_cast<_pi_image *>(static_cast(handle.get_pointer())); + std::unique_lock lock(img->mutexLock); + + char *readBase = I->sycl_get_surface_base_addr_ptr(img->SurfaceIndex); + + uint32_t bpp = static_cast(img->BytesPerPixel); + uint32_t imgWidth = static_cast(img->Width) * bpp; + uint32_t imgHeight = static_cast(img->Height); + int x_pos_a, y_pos_a, offset, index; + + // TODO : Remove intermediate 'in' matrix + std::vector> in(M, std::vector(N)); + int R = M; + int C = N; + for (int i = 0; i < R; i++) { + for (int j = 0; j < C; j++) { + x_pos_a = x + j * sizeof(Ty); + { y_pos_a = y + i; } + // We should check the boundary condition based on sizeof(Ty), x_pos_a is + // 0-based Note: Use a signed variable; otherwise sizeof(Ty) is unsigned + if ((x_pos_a + sizeof(Ty)) > imgWidth) { + // If we're trying to read outside the boundary, limit the value of + // x_pos_a Assumption -- We don't this situation: + // x_pos_a width's boundary + // | | + // <---type(Ty)---> + // At most x_pos_a+sizeof(Ty) is exactly at the boundary. + x_pos_a = imgWidth; + } + if (y_pos_a > imgHeight - 1) { + y_pos_a = imgHeight - 1; + } + if (y_pos_a < 0) { + y_pos_a = 0; + } + { + if (x_pos_a < 0) { + // Need to align x position to bbp + int offset = x % bpp; + x_pos_a -= offset; + } + while (x_pos_a < 0) { + // If we're trying to read outside the left boundary, increase x_pos_a + x_pos_a += bpp; + } } - constexpr int refN = sizeof(cl::sycl::cl_uint4) / sizeof(Ty); - unsigned int stride = sizeof(cl::sycl::cl_uint4) / bpp; - using refTy = __SEIEED::vector_type_t; - auto ref = reinterpret_cast(res); - - unsigned int offset1 = col + row * N; - unsigned int offset2 = 0; - for (int idx = 0; idx < vpp; idx++) { - vals[offset1] = ref[offset2]; - offset1++; - offset2 += stride; + if (x_pos_a >= imgWidth) { + { + x_pos_a = x_pos_a - bpp; + for (uint byte_count = 0; byte_count < sizeof(Ty); byte_count++) { + if (x_pos_a >= imgWidth) { + x_pos_a = x_pos_a - bpp; + } + offset = y_pos_a * imgWidth + x_pos_a; + + /* + If destination size per element is less then or equal pixel size + of the surface move the pixel value accross the destination + elements. If destination size per element is greater then pixel + size of the surface replicate pixel value in the destination + element. + */ + if (sizeof(Ty) <= bpp) { + for (uint bpp_count = 0; j < C && bpp_count < bpp; + j++, bpp_count += sizeof(Ty)) { + in[i][j] = *((Ty *)(readBase + offset + bpp_count)); + } + j--; + break; + } else { + // ((unsigned char*)in.get_addr(i*C+j))[byte_count] = *((unsigned + // char*)((char*)buff_iter->p + offset)); + unsigned char *pTempBase = + ((unsigned char *)in[i].data()) + j * sizeof(Ty); + pTempBase[byte_count] = *((unsigned char *)(readBase + offset)); + } + + x_pos_a = x_pos_a + 1; + } + x_pos_a = imgWidth; + } + } else { + offset = y_pos_a * imgWidth + x_pos_a; + { in[i][j] = *((Ty *)(readBase + offset)); } } - i++; } - i = x / bpp; - j++; + } + + for (auto i = 0, k = 0; i < M; i++) { + for (auto j = 0; j < N; j++) { + vals[k++] = in[i][j]; + } } return vals; @@ -745,45 +951,57 @@ inline void __esimd_media_block_store(unsigned modififer, TACC handle, unsigned plane, unsigned width, unsigned x, unsigned y, __SEIEED::vector_type_t vals) { - unsigned bpp = __SEIEED::AccessorPrivateProxy::getElemSize(handle); - unsigned vpp = bpp / sizeof(Ty); - auto range = __SEIEED::AccessorPrivateProxy::getImageRange(handle); - unsigned int i = x / bpp; - unsigned int j = y; - - assert(x % bpp == 0); - - for (int row = 0; row < M; row++) { - for (int col = 0; col < N; col += vpp) { - constexpr int Sz = sizeof(cl::sycl::cl_uint4) / sizeof(Ty); - __SEIEED::vector_type_t res = 0; - - unsigned int offset1 = col + row * N; - unsigned int offset2 = 0; - unsigned int stride = sizeof(cl::sycl::cl_uint4) / bpp; - for (int idx = 0; idx < vpp; idx++) { - res[offset2] = vals[offset1]; - offset1++; - offset2 += stride; - } + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + _pi_image *img = + static_cast<_pi_image *>(static_cast(handle.get_pointer())); + + char *writeBase = I->sycl_get_surface_base_addr_ptr(img->SurfaceIndex); + + uint32_t bpp = static_cast(img->BytesPerPixel); + uint32_t imgWidth = static_cast(img->Width) * bpp; + uint32_t imgHeight = static_cast(img->Height); + int x_pos_a, y_pos_a, offset; + + assert((x % 4) == 0); + assert((N * sizeof(Ty)) % 4 == 0); - using refTy = __SEIEED::vector_type_t; - auto ref = reinterpret_cast(res); + // TODO : Remove intermediate 'out' matrix + std::vector> out(M, std::vector(N)); - cl::sycl::cl_uint4 data; - for (int idx = 0; idx < 4; idx++) { - data[idx] = ref[idx]; + std::unique_lock lock(img->mutexLock); + + for (int i = 0, k = 0; i < M; i++) { + for (int j = 0; j < N; j++) { + out[i][j] = vals[k++]; + } + } + + for (int i = 0; i < M; i++) { + for (int j = 0; j < N; j++) { + x_pos_a = x + j * sizeof(Ty); + { y_pos_a = y + i; } + if ((int)x_pos_a < 0) { + continue; + } + if ((int)y_pos_a < 0) { + continue; + } + if ((int)(x_pos_a + sizeof(Ty)) > imgWidth) { + continue; } - if (i < range[0] && j < range[1]) { - auto coords = cl::sycl::cl_int2(i, j); - handle.write(coords, data); + if ((int)y_pos_a > imgHeight - 1) { + continue; } - i++; + offset = y_pos_a * imgWidth + x_pos_a; + *((Ty *)(writeBase + offset)) = out[i][j]; } - i = x / bpp; - j++; } + + /// TODO : Optimize + I->cm_fence_ptr(); } template @@ -820,10 +1038,27 @@ __esimd_dp4(__SEIEED::vector_type_t v1, return retv; } -/// TODO -inline void __esimd_barrier() {} +inline void __esimd_slm_init(size_t size) { + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + I->cm_slm_init_ptr(size); +} -inline void __esimd_sbarrier(__SEIEE::split_barrier_action flag) {} +inline void __esimd_barrier() { + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + I->cm_barrier_ptr(); +} + +inline void __esimd_sbarrier( + sycl::ext::intel::experimental::esimd::EsimdSbarrierType flag) { + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + I->cm_sbarrier_ptr((uint32_t)flag); +} inline void __esimd_slm_fence(uint8_t cntl) {} @@ -832,6 +1067,17 @@ inline __SEIEED::vector_type_t __esimd_slm_read(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t pred) { __SEIEED::vector_type_t retv; + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + char *SlmBase = I->__cm_emu_get_slm_ptr(); + for (int i = 0; i < N; ++i) { + if (pred[i]) { + Ty *addr = reinterpret_cast(addrs[i] + SlmBase); + retv[i] = *addr; + } + } + return retv; } @@ -839,19 +1085,49 @@ __esimd_slm_read(__SEIEED::vector_type_t addrs, template inline void __esimd_slm_write(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) {} + __SEIEED::vector_type_t pred) { + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + + char *SlmBase = I->__cm_emu_get_slm_ptr(); + for (int i = 0; i < N; ++i) { + if (pred[i]) { + Ty *addr = reinterpret_cast(addrs[i] + SlmBase); + *addr = vals[i]; + } + } +} // slm_block_read reads a block of data from SLM template inline __SEIEED::vector_type_t __esimd_slm_block_read(uint32_t addr) { __SEIEED::vector_type_t retv; + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + char *SlmBase = I->__cm_emu_get_slm_ptr(); + addr <<= 4; + for (int i = 0; i < N; ++i) { + Ty *SlmAddr = reinterpret_cast(addr + SlmBase); + retv[i] = *SlmAddr; + addr += sizeof(Ty); + } return retv; } // slm_block_write writes a block of data to SLM template inline void __esimd_slm_block_write(uint32_t addr, - __SEIEED::vector_type_t vals) {} + __SEIEED::vector_type_t vals) { + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + char *SlmBase = I->__cm_emu_get_slm_ptr(); + addr <<= 4; + for (int i = 0; i < N; ++i) { + Ty *SlmAddr = reinterpret_cast(addr + SlmBase); + *SlmAddr = vals[i]; + addr += sizeof(Ty); + } +} // slm_read4 does SLM gather4 template @@ -859,6 +1135,52 @@ inline __SEIEED::vector_type_t __esimd_slm_read4(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t pred) { __SEIEED::vector_type_t retv; + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + char *ReadBase = I->__cm_emu_get_slm_ptr(); + + unsigned int Next = 0; + if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::R)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *addr = reinterpret_cast(addrs[I] + ReadBase); + retv[Next] = *addr; + } + } + } + + ReadBase += sizeof(Ty); + + if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::G)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *addr = reinterpret_cast(addrs[I] + ReadBase); + retv[Next] = *addr; + } + } + } + + ReadBase += sizeof(Ty); + + if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::B)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *addr = reinterpret_cast(addrs[I] + ReadBase); + retv[Next] = *addr; + } + } + } + + ReadBase += sizeof(Ty); + + if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::A)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *addr = reinterpret_cast(addrs[I] + ReadBase); + retv[Next] = *addr; + } + } + } return retv; } @@ -867,7 +1189,55 @@ template inline void __esimd_slm_write4( __SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) {} + __SEIEED::vector_type_t pred) { + + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + char *WriteBase = I->__cm_emu_get_slm_ptr(); + + unsigned int Next = 0; + if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::R)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *addr = reinterpret_cast(addrs[I] + WriteBase); + *addr = vals[Next]; + } + } + } + + WriteBase += sizeof(Ty); + + if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::G)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *addr = reinterpret_cast(addrs[I] + WriteBase); + *addr = vals[Next]; + } + } + } + + WriteBase += sizeof(Ty); + + if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::B)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *addr = reinterpret_cast(addrs[I] + WriteBase); + *addr = vals[Next]; + } + } + } + + WriteBase += sizeof(Ty); + + if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::A)) { + for (int I = 0; I < N; I++, Next++) { + if (pred[I]) { + Ty *addr = reinterpret_cast(addrs[I] + WriteBase); + *addr = vals[Next]; + } + } + } +} // slm_atomic: SLM atomic template <__SEIEE::atomic_op Op, typename Ty, int N> @@ -875,6 +1245,23 @@ inline __SEIEED::vector_type_t __esimd_slm_atomic0(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t pred) { __SEIEED::vector_type_t retv; + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + char *WriteBase = I->__cm_emu_get_slm_ptr(); + + for (int i = 0; i < N; i++) { + if (pred[i]) { + Ty *p = reinterpret_cast(addrs[i] + WriteBase); + + switch (Op) { + case __SEIEE::EsimdAtomicOpType::ATOMIC_INC: + retv[i] = atomic_add_fetch(p, 1); + break; + default: + throw cl::sycl::feature_not_supported(); + } + } + } return retv; } @@ -884,6 +1271,21 @@ __esimd_slm_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t pred) { __SEIEED::vector_type_t retv; + + for (int i = 0; i < N; i++) { + if (pred[i]) { + Ty *p = reinterpret_cast(addrs[i]); + + switch (Op) { + case __SEIEE::EsimdAtomicOpType::ATOMIC_ADD: + retv[i] = atomic_add_fetch(p, src0[i]); + break; + default: + throw cl::sycl::feature_not_supported(); + } + } + } + return retv; } @@ -930,15 +1332,59 @@ __esimd_flat_atomic2(__SEIEED::vector_type_t addrs, template inline __SEIEED::vector_type_t __esimd_block_read(SurfIndAliasTy surf_ind, uint32_t offset) { - throw cl::sycl::feature_not_supported(); - return __SEIEED::vector_type_t(); + __SEIEED::vector_type_t retv; + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + _pi_buffer *buf = + static_cast<_pi_buffer *>(static_cast(surf_ind.get_pointer())); + + char *readBase = I->sycl_get_surface_base_addr_ptr(buf->SurfaceIndex); + + uint32_t width = static_cast(buf->Size); + uint32_t pos = offset; + + std::unique_lock lock(buf->mutexLock); + + for (int idx = 0; idx < N; idx++) { + if (pos >= width) { + retv[idx] = 0; + } else { + retv[idx] = *((Ty *)(readBase + pos)); + } + pos += (uint32_t)sizeof(Ty); + } + + return retv; } template inline void __esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset, __SEIEED::vector_type_t vals) { + sycl::detail::ESIMDDeviceInterface *I = + sycl::detail::getESIMDDeviceInterface(); + _pi_buffer *buf = + static_cast<_pi_buffer *>(static_cast(surf_ind.get_pointer())); - throw cl::sycl::feature_not_supported(); + char *writeBase = I->sycl_get_surface_base_addr_ptr(buf->SurfaceIndex); + + uint32_t width = static_cast(buf->Size); + assert(buf->Size == width); + + uint32_t pos = offset << 4; + + std::unique_lock lock(buf->mutexLock); + + for (int idx = 0; idx < N; idx++) { + if (pos < width) { + *((Ty *)(writeBase + pos)) = vals[idx]; + } else { + break; + } + pos += (uint32_t)sizeof(Ty); + } + + /// TODO : Optimize + I->cm_fence_ptr(); } /// \brief esimd_get_value @@ -1032,8 +1478,173 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgDst) { - throw cl::sycl::feature_not_supported(); - return 0; + assert(sfid == 0xF); // UGM type only + + __SEIEED::vector_type_t retv; + + auto op = raw_send::getMsgOp(msgDesc); + assert(op == raw_send::msgOp::LOAD_2D); + uint64_t surfaceBase = raw_send::getSurfaceBaseAddr(msgSrc0); + auto surfaceDim = raw_send::getSurfaceDim(msgSrc0); + auto blockOffset = raw_send::getBlockOffsets(msgSrc0); + auto blockDim = raw_send::getBlockDim(msgSrc0); + auto arrayLen = raw_send::getArrayLen(msgSrc0); + + unsigned SurfaceWidth = surfaceDim[0] + 1; + unsigned SurfaceHeight = surfaceDim[1] + 1; + unsigned SurfacePitch = surfaceDim[2] + 1; + + int X = blockOffset[0]; + int Y = blockOffset[1]; + int Width = blockDim[0] + 1; + int Height = blockDim[1] + 1; + int NBlks = arrayLen + 1; + + bool Transposed = + raw_send::getMsgField(msgDesc, raw_send::msgField::TRANSPOSE); + bool Transformed = raw_send::getMsgField(msgDesc, raw_send::msgField::VNNI); + + constexpr unsigned sizeofT = sizeof(Ty1); + + char *buffBase = (char *)surfaceBase; + + // TODO : Acquire mutex for the surface pointed to by 'surfaceBase' + int vecIdx = 0; + int blkCount = 0; + + for (int xBase = X * sizeofT; blkCount < NBlks; xBase += sizeofT * Width) { + if (Transformed == true) { + constexpr int elems_per_DW = (sizeofT == 1) ? 4 : 2; /// VNNI_pack + if (Transposed == false) { /// Transform only load + int yRead = Y * SurfacePitch; + for (int u = 0; u < Height; + u += elems_per_DW, yRead += SurfacePitch * elems_per_DW) { + if ((yRead < 0) || (yRead >= SurfacePitch * SurfaceHeight)) { + /// Vertically out-of-bound, padding zero on out of boundary + for (int v = 0; v < Width; v += 1) { + for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { + retv[vecIdx] = (Ty1)(0); + } // k loop + } + // vecIdx += Width * elems_per_DW;; + continue; + } + + int xRead = xBase; + for (int v = 0; v < Width; v += 1, xRead += sizeofT) { + if ((xRead < 0) || (xRead >= SurfaceWidth)) { + /// Horizontally out-of-bound + for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { + retv[vecIdx] = (Ty1)(0); + } // k loop + // vecIdx += elems_per_DW; + continue; + } + + char *base = buffBase + yRead + xRead; + int offset = 0; + for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { + retv[vecIdx] = *((Ty1 *)(base + offset)); + // Increasing in Y-direction + offset += SurfacePitch; + } // k loop + } // v loop + } /// u loop + } // Transposed = false + else // Transposed == true + { /// Transform & Transpose load + int xRead = xBase; + for (int v = 0; v < Width; + v += elems_per_DW, xRead += sizeofT * elems_per_DW) { + if ((xRead < 0) || (xRead >= SurfaceWidth)) { + // Horizontally out-of-bound + for (int u = 0; u < Height; u += 1) { + for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { + retv[vecIdx] = (Ty1)(0); + } // k loop + } + // vecIdx += Height * elems_per_DW; + continue; + } + + int yRead = Y * SurfacePitch; + for (int u = 0; u < Height; u += 1, yRead += SurfacePitch) { + if ((yRead < 0) || (yRead >= SurfacePitch * SurfaceHeight)) { + /// Vertically out-of-bound + for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { + retv[vecIdx] = (Ty1)(0); + } // k loop + // vecIdx += elems_per_DW; + continue; + } + + char *base = buffBase + yRead + xRead; + int offset = 0; + for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { + retv[vecIdx] = *((Ty1 *)(base + offset)); + // Increasing in X-direction + offset += sizeofT; + } // k loop + } // u loop + } // v loop + } // Transposed == true + } // Transformed == true + else // (Transformed == false) + { + if (Transposed == false) { /// Linear load - no transform, no transpose + int yRead = Y * SurfacePitch; + for (int u = 0; u < Height; u += 1, yRead += SurfacePitch) { + if ((yRead < 0) || (yRead >= SurfacePitch * SurfaceHeight)) { + // Vertically Out-of-bound + for (int v = 0; v < Width; v += 1, vecIdx += 1) { + retv[vecIdx] = (Ty1)(0); + } + // vecIdx += Width; + continue; + } + + int xRead = xBase; + for (int v = 0; v < Width; v += 1, xRead += sizeofT, vecIdx += 1) { + if ((xRead >= 0) && (xRead < SurfaceWidth)) { + retv[vecIdx] = *((Ty1 *)(buffBase + yRead + xRead)); + } else { + // Horizontally out of bound + retv[vecIdx] = (Ty1)(0); + } + } // v loop + } // u loop + } /// Transposed == false + else // Transposed = true + { /// Transpose load - no transform + int xRead = xBase; + for (int v = 0; v < Width; v += 1, xRead += sizeofT) { + if ((xRead < 0) || (xRead > SurfaceWidth)) { + // Horizontally out-of-bound + for (int u = 0; u < Height; u += 1, vecIdx += 1) { + retv[vecIdx] = (Ty1)(0); + } + // vecIdx += Height; + continue; + } + + int yRead = Y * SurfacePitch; + for (int u = 0; u < Height; + u += 1, yRead += SurfacePitch, vecIdx += 1) { + if ((yRead >= 0) && (yRead < SurfacePitch * SurfaceHeight)) { + retv[vecIdx] = *((Ty1 *)(buffBase + yRead + xRead)); + } else { + // Vertically out of bound + retv[vecIdx] = (Ty1)(0); + } + } // u loop + } // v loop + } // Transposed == true + } // Transformed == false + blkCount += 1; + vecIdx = blkCount * Width * Height; + } // xBase loop + + return retv; } /// \brief Raw sends store. @@ -1068,7 +1679,44 @@ inline void __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgSrc1) { - throw cl::sycl::feature_not_supported(); + assert(sfid == 0xF); // UGM type only + auto op = raw_send::getMsgOp(msgDesc); + assert(op == raw_send::msgOp::STORE_2D); + uint64_t surfaceBase = raw_send::getSurfaceBaseAddr(msgSrc0); + auto surfaceDim = raw_send::getSurfaceDim(msgSrc0); + auto blockOffset = raw_send::getBlockOffsets(msgSrc0); + auto blockDim = raw_send::getBlockDim(msgSrc0); + + unsigned SurfaceWidth = surfaceDim[0] + 1; + unsigned SurfaceHeight = surfaceDim[1] + 1; + unsigned SurfacePitch = surfaceDim[2] + 1; + + int X = blockOffset[0]; + int Y = blockOffset[1]; + int Width = blockDim[0] + 1; + int Height = blockDim[1] + 1; + + constexpr unsigned sizeofT = sizeof(Ty2); + + char *buffBase = (char *)surfaceBase; + + int vecIdx = 0; + int rowCount = 0; + for (int yWrite = Y * SurfacePitch; rowCount < Height; + yWrite += SurfacePitch) { + if (yWrite == SurfacePitch * SurfaceHeight) { + // Vertically Out-of-bound + break; + } + int writeCount = 0; + for (int xWrite = X * sizeofT; writeCount < Width; + xWrite += sizeofT, vecIdx += 1, writeCount += 1) { + if (xWrite >= 0 && xWrite < SurfaceWidth) { + *((Ty2 *)(buffBase + yWrite + xWrite)) = msgSrc1[vecIdx]; + } + } // xWrite loop + rowCount += 1; + } // yWrite loop } /// \brief Raw send store. @@ -1096,6 +1744,13 @@ inline void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0) { + auto op = raw_send::getMsgOp(msgDesc); + + if (op == raw_send::msgOp::LOAD_2D) { + // Prefetch? + return; + } + throw cl::sycl::feature_not_supported(); } diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index dadcc437d0e5d..95dfdc9ab249b 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -803,7 +803,7 @@ media_block_store(AccessorTy acc, unsigned x, unsigned y, simd vals) { #ifndef __SYCL_DEVICE_ONLY__ -inline void slm_init(uint32_t size) {} +inline void slm_init(uint32_t size) { __esimd_slm_init(size); } #endif diff --git a/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp b/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp index e6b17be78db34..b110c88c374dd 100755 --- a/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp +++ b/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp @@ -43,4 +43,5 @@ template <> uint32_t pi_cast(uint64_t Value) { std::terminate(); } -#include +#include +#include diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index dd91d24eba642..158a540771a1c 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -131,11 +131,12 @@ template <> class SYCLConfig { return BackendPtr; const char *ValStr = BaseT::getRawValue(); - const std::array, 5> SyclBeMap = { + const std::array, 6> SyclBeMap = { {{"PI_OPENCL", backend::opencl}, {"PI_LEVEL_ZERO", backend::level_zero}, {"PI_LEVEL0", backend::level_zero}, // for backward compatibility {"PI_CUDA", backend::cuda}, + {"PI_ESIMD_CPU", backend::esimd_cpu}, {"PI_ROCM", backend::rocm}}}; if (ValStr) { auto It = std::find_if( @@ -145,7 +146,8 @@ template <> class SYCLConfig { }); if (It == SyclBeMap.end()) pi::die("Invalid backend. " - "Valid values are PI_OPENCL/PI_LEVEL_ZERO/PI_CUDA/PI_ROCM"); + "Valid values are " + "PI_OPENCL/PI_LEVEL_ZERO/PI_CUDA/PI_ESIMD_CPU/PI_ROCM"); static backend Backend = It->second; BackendPtr = &Backend; } @@ -185,11 +187,12 @@ static const std::array, 5> {"*", info::device_type::all}}}; // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST -static const std::array, 5> SyclBeMap = { +static const std::array, 6> SyclBeMap = { {{"host", backend::host}, {"opencl", backend::opencl}, {"level_zero", backend::level_zero}, {"cuda", backend::cuda}, + {"esimd_cpu", backend::esimd_cpu}, {"*", backend::all}}}; template <> class SYCLConfig { diff --git a/sycl/source/detail/device_filter.cpp b/sycl/source/detail/device_filter.cpp index 8558d0d9e4f94..a438911fe4696 100644 --- a/sycl/source/detail/device_filter.cpp +++ b/sycl/source/detail/device_filter.cpp @@ -75,7 +75,8 @@ device_filter::device_filter(const std::string &FilterString) { } catch (...) { std::string Message = std::string("Invalid device filter: ") + FilterString + - "\nPossible backend values are {host,opencl,level_zero,cuda,*}.\n" + "\nPossible backend values are " + "{host,opencl,level_zero,cuda,esimd_cpu*}.\n" "Possible device types are {host,cpu,gpu,acc,*}.\n" "Device number should be an non-negative integer.\n"; throw cl::sycl::invalid_parameter_error(Message, PI_INVALID_VALUE); diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 5745e4c2c58ae..1f4257e63d667 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -237,12 +237,14 @@ bool findPlugins(vector_class> &PluginNames) { PluginNames.emplace_back(__SYCL_LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::cuda); + PluginNames.emplace_back(__SYCL_ESIMD_CPU_PLUGIN_NAME, backend::esimd_cpu); PluginNames.emplace_back(__SYCL_ROCM_PLUGIN_NAME, backend::rocm); } else { std::vector Filters = FilterList->get(); bool OpenCLFound = false; bool LevelZeroFound = false; bool CudaFound = false; + bool EsimdCpuFound = false; bool RocmFound = false; for (const device_filter &Filter : Filters) { backend Backend = Filter.Backend; @@ -261,6 +263,12 @@ bool findPlugins(vector_class> &PluginNames) { PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::cuda); CudaFound = true; } + if (!EsimdCpuFound && + (Backend == backend::esimd_cpu || Backend == backend::all)) { + PluginNames.emplace_back(__SYCL_ESIMD_CPU_PLUGIN_NAME, + backend::esimd_cpu); + EsimdCpuFound = true; + } if (!RocmFound && (Backend == backend::rocm || Backend == backend::all)) { PluginNames.emplace_back(__SYCL_ROCM_PLUGIN_NAME, backend::rocm); RocmFound = true; @@ -379,6 +387,11 @@ static void initializePlugins(vector_class *Plugins) { // Use the LEVEL_ZERO plugin as the GlobalPlugin GlobalPlugin = std::make_shared(PluginInformation, backend::level_zero, Library); + } else if (InteropBE == backend::esimd_cpu && + PluginNames[I].first.find("esimd_cpu") != std::string::npos) { + // Use the ESIMD_CPU plugin as the GlobalPlugin + GlobalPlugin = std::make_shared(PluginInformation, + backend::esimd_cpu, Library); } Plugins->emplace_back( plugin(PluginInformation, PluginNames[I].second, Library)); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 2734a64776761..202488aaf9c31 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1937,7 +1937,8 @@ cl_int ExecCGCommand::enqueueImp() { NDRDescT &NDRDesc = ExecKernel->MNDRDesc; - if (MQueue->is_host()) { + if (MQueue->is_host() || + (MQueue->getPlugin().getBackend() == backend::esimd_cpu)) { for (ArgDesc &Arg : ExecKernel->MArgs) if (kernel_param_kind_t::kind_accessor == Arg.MType) { Requirement *Req = (Requirement *)(Arg.MPtr); @@ -1949,8 +1950,18 @@ cl_int ExecCGCommand::enqueueImp() { const detail::plugin &Plugin = EventImpls[0]->getPlugin(); Plugin.call(RawEvents.size(), &RawEvents[0]); } - ExecKernel->MHostKernel->call(NDRDesc, - getEvent()->getHostProfilingInfo()); + + if (MQueue->is_host()) { + ExecKernel->MHostKernel->call(NDRDesc, + getEvent()->getHostProfilingInfo()); + } else { + assert(MQueue->getPlugin().getBackend() == backend::esimd_cpu); + MQueue->getPlugin().call( + nullptr, + reinterpret_cast(ExecKernel->MHostKernel->getPtr()), + NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], + &NDRDesc.LocalSize[0], 0, nullptr, nullptr); + } return CL_SUCCESS; } From 6b15320a95913c350a9dc2db38246a5c7a31c072 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Tue, 29 Jun 2021 13:20:27 -0700 Subject: [PATCH 02/40] Atomic fix - Replacing deprecated ATOMIC_* with atomic_op - Relocating atomic_add implementation for flat_atomic1 from slm_atomic1 --- .../esimd/detail/memory_intrin.hpp | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 9225944df8581..7a081283a2a7e 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -1254,7 +1254,7 @@ __esimd_slm_atomic0(__SEIEED::vector_type_t addrs, Ty *p = reinterpret_cast(addrs[i] + WriteBase); switch (Op) { - case __SEIEE::EsimdAtomicOpType::ATOMIC_INC: + case __SEIEE::atomic_op::inc: retv[i] = atomic_add_fetch(p, 1); break; default: @@ -1271,21 +1271,6 @@ __esimd_slm_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t pred) { __SEIEED::vector_type_t retv; - - for (int i = 0; i < N; i++) { - if (pred[i]) { - Ty *p = reinterpret_cast(addrs[i]); - - switch (Op) { - case __SEIEE::EsimdAtomicOpType::ATOMIC_ADD: - retv[i] = atomic_add_fetch(p, src0[i]); - break; - default: - throw cl::sycl::feature_not_supported(); - } - } - } - return retv; } @@ -1315,6 +1300,21 @@ __esimd_flat_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::vector_type_t pred) { __SEIEED::vector_type_t retv; + + for (int i = 0; i < N; i++) { + if (pred[i]) { + Ty *p = reinterpret_cast(addrs[i]); + + switch (Op) { + case __SEIEE::atomic_op::add: + retv[i] = atomic_add_fetch(p, src0[i]); + break; + default: + throw cl::sycl::feature_not_supported(); + } + } + } + return retv; } From 3249056884f5b0696b79fa60d88317bedca87250 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 8 Jul 2021 13:19:32 -0700 Subject: [PATCH 03/40] Removing dependency on _pi_image/buffer in kernel compilation - _pi_buffer and _pi_image are defined in 'cmrt_if_defs.hpp' which is used for both PI and kernel compilation - Dependency is resolved by adding another device interface call (sycl_get_cm_buffer_params, sycl_get_cm_image_params) --- .../esimd/detail/emu/esimd_emu_functions_v1.h | 5 ++ .../detail/emu/esimdcpu_device_interface.hpp | 1 + .../esimd/detail/memory_intrin.hpp | 72 ++++++++++--------- sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp | 1 - 4 files changed, 44 insertions(+), 35 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h index 9fcde11e6e9d4..9b9af9f6718fa 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h @@ -44,3 +44,8 @@ void (*cm_fence_ptr)(void); char *(*sycl_get_surface_base_addr_ptr)(int); char *(*__cm_emu_get_slm_ptr)(void); void (*cm_slm_init_ptr)(size_t); + +void (*sycl_get_cm_buffer_params_ptr)(void *, char **, uint32_t *, + std::mutex *); +void (*sycl_get_cm_image_params_ptr)(void *, char **, uint32_t *, uint32_t *, + uint32_t *, std::mutex *); diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp index eb249f7c61781..ca24b20b38019 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp @@ -24,6 +24,7 @@ // pointer table file ('esimd_emu_functions_v1.h') included in 'struct // ESIMDDeviceInterface' definition. #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 7a081283a2a7e..00af504573d7b 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -528,7 +528,6 @@ __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, namespace cm_support { #include } // namespace cm_support -#include "cmrt_if_defs.hpp" #include #include @@ -847,15 +846,18 @@ __esimd_media_block_load(unsigned modififer, TACC handle, unsigned plane, sycl::detail::ESIMDDeviceInterface *I = sycl::detail::getESIMDDeviceInterface(); - _pi_image *img = - static_cast<_pi_image *>(static_cast(handle.get_pointer())); - std::unique_lock lock(img->mutexLock); + char *readBase; + uint32_t bpp; + uint32_t imgWidth; + uint32_t imgHeight; + std::mutex mutexLock; - char *readBase = I->sycl_get_surface_base_addr_ptr(img->SurfaceIndex); + I->sycl_get_cm_image_params_ptr(static_cast(handle.get_pointer()), + &readBase, &imgWidth, &imgHeight, &bpp, + &mutexLock); + + std::unique_lock lock(mutexLock); - uint32_t bpp = static_cast(img->BytesPerPixel); - uint32_t imgWidth = static_cast(img->Width) * bpp; - uint32_t imgHeight = static_cast(img->Height); int x_pos_a, y_pos_a, offset, index; // TODO : Remove intermediate 'in' matrix @@ -954,14 +956,16 @@ inline void __esimd_media_block_store(unsigned modififer, TACC handle, sycl::detail::ESIMDDeviceInterface *I = sycl::detail::getESIMDDeviceInterface(); - _pi_image *img = - static_cast<_pi_image *>(static_cast(handle.get_pointer())); + char *writeBase; + uint32_t bpp; + uint32_t imgWidth; + uint32_t imgHeight; + std::mutex mutexLock; - char *writeBase = I->sycl_get_surface_base_addr_ptr(img->SurfaceIndex); + I->sycl_get_cm_image_params_ptr(static_cast(handle.get_pointer()), + &writeBase, &imgWidth, &imgHeight, &bpp, + &mutexLock); - uint32_t bpp = static_cast(img->BytesPerPixel); - uint32_t imgWidth = static_cast(img->Width) * bpp; - uint32_t imgHeight = static_cast(img->Height); int x_pos_a, y_pos_a, offset; assert((x % 4) == 0); @@ -970,7 +974,7 @@ inline void __esimd_media_block_store(unsigned modififer, TACC handle, // TODO : Remove intermediate 'out' matrix std::vector> out(M, std::vector(N)); - std::unique_lock lock(img->mutexLock); + std::unique_lock lock(mutexLock); for (int i = 0, k = 0; i < M; i++) { for (int j = 0; j < N; j++) { @@ -1335,23 +1339,23 @@ __esimd_block_read(SurfIndAliasTy surf_ind, uint32_t offset) { __SEIEED::vector_type_t retv; sycl::detail::ESIMDDeviceInterface *I = sycl::detail::getESIMDDeviceInterface(); - _pi_buffer *buf = - static_cast<_pi_buffer *>(static_cast(surf_ind.get_pointer())); - char *readBase = I->sycl_get_surface_base_addr_ptr(buf->SurfaceIndex); + char *readBase; + uint32_t width; + std::mutex mutexLock; - uint32_t width = static_cast(buf->Size); - uint32_t pos = offset; + I->sycl_get_cm_buffer_params_ptr(static_cast(surf_ind.get_pointer()), + &readBase, &width, &mutexLock); - std::unique_lock lock(buf->mutexLock); + std::unique_lock lock(mutexLock); for (int idx = 0; idx < N; idx++) { - if (pos >= width) { + if (offset >= width) { retv[idx] = 0; } else { - retv[idx] = *((Ty *)(readBase + pos)); + retv[idx] = *((Ty *)(readBase + offset)); } - pos += (uint32_t)sizeof(Ty); + offset += (uint32_t)sizeof(Ty); } return retv; @@ -1362,25 +1366,25 @@ inline void __esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset, __SEIEED::vector_type_t vals) { sycl::detail::ESIMDDeviceInterface *I = sycl::detail::getESIMDDeviceInterface(); - _pi_buffer *buf = - static_cast<_pi_buffer *>(static_cast(surf_ind.get_pointer())); - char *writeBase = I->sycl_get_surface_base_addr_ptr(buf->SurfaceIndex); + char *writeBase; + uint32_t width; + std::mutex mutexLock; - uint32_t width = static_cast(buf->Size); - assert(buf->Size == width); + I->sycl_get_cm_buffer_params_ptr(static_cast(surf_ind.get_pointer()), + &writeBase, &width, &mutexLock); - uint32_t pos = offset << 4; + std::unique_lock lock(mutexLock); - std::unique_lock lock(buf->mutexLock); + offset <<= 4; for (int idx = 0; idx < N; idx++) { - if (pos < width) { - *((Ty *)(writeBase + pos)) = vals[idx]; + if (offset < width) { + *((Ty *)(writeBase + offset)) = vals[idx]; } else { break; } - pos += (uint32_t)sizeof(Ty); + offset += (uint32_t)sizeof(Ty); } /// TODO : Optimize diff --git a/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp b/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp index b110c88c374dd..65e0d275bcb33 100755 --- a/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp +++ b/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp @@ -43,5 +43,4 @@ template <> uint32_t pi_cast(uint64_t Value) { std::terminate(); } -#include #include From 7b163d61856ab889029eca7832486f3c9606eea7 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 8 Jul 2021 22:39:34 -0700 Subject: [PATCH 04/40] Revert changes in ESIMD_CPU device interface - These changes are already applied in PR#4011 --- .../experimental/esimd/detail/emu/esimd_emu_functions_v1.h | 5 ----- .../esimd/detail/emu/esimdcpu_device_interface.hpp | 1 - 2 files changed, 6 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h index 9b9af9f6718fa..9fcde11e6e9d4 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h @@ -44,8 +44,3 @@ void (*cm_fence_ptr)(void); char *(*sycl_get_surface_base_addr_ptr)(int); char *(*__cm_emu_get_slm_ptr)(void); void (*cm_slm_init_ptr)(size_t); - -void (*sycl_get_cm_buffer_params_ptr)(void *, char **, uint32_t *, - std::mutex *); -void (*sycl_get_cm_image_params_ptr)(void *, char **, uint32_t *, uint32_t *, - uint32_t *, std::mutex *); diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp index ca24b20b38019..eb249f7c61781 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp @@ -24,7 +24,6 @@ // pointer table file ('esimd_emu_functions_v1.h') included in 'struct // ESIMDDeviceInterface' definition. #include -#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { From 0151dc7b50b09f8865ffbd14fb216c4a46030ad2 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 8 Jul 2021 22:42:42 -0700 Subject: [PATCH 05/40] Revert file relocation for future merging - esimd_emu_functions_v1.h / esimdcpu_device_interface.hpp - Reverted file changes are to be relocated in PR#4011 --- .../sycl/INTEL}/esimd/detail/emu/esimd_emu_functions_v1.h | 0 .../sycl/INTEL}/esimd/detail/emu/esimdcpu_device_interface.hpp | 0 .../sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp | 2 +- sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) rename sycl/include/{sycl/ext/intel/experimental => CL/sycl/INTEL}/esimd/detail/emu/esimd_emu_functions_v1.h (100%) rename sycl/include/{sycl/ext/intel/experimental => CL/sycl/INTEL}/esimd/detail/emu/esimdcpu_device_interface.hpp (100%) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h b/sycl/include/CL/sycl/INTEL/esimd/detail/emu/esimd_emu_functions_v1.h similarity index 100% rename from sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimd_emu_functions_v1.h rename to sycl/include/CL/sycl/INTEL/esimd/detail/emu/esimd_emu_functions_v1.h diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp b/sycl/include/CL/sycl/INTEL/esimd/detail/emu/esimdcpu_device_interface.hpp similarity index 100% rename from sycl/include/sycl/ext/intel/experimental/esimd/detail/emu/esimdcpu_device_interface.hpp rename to sycl/include/CL/sycl/INTEL/esimd/detail/emu/esimdcpu_device_interface.hpp diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 00af504573d7b..80d630aca2054 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -532,7 +532,7 @@ namespace cm_support { #include #include #include -#include +#include namespace raw_send { diff --git a/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp b/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp index 65e0d275bcb33..7ae3eb120b02d 100755 --- a/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp +++ b/sycl/plugins/esimd_cpu/pi_esimd_cpu.hpp @@ -43,4 +43,4 @@ template <> uint32_t pi_cast(uint64_t Value) { std::terminate(); } -#include +#include From 7533a832d67046ae4aa72f6a2286e75a95c557a4 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Fri, 9 Jul 2021 11:54:26 -0700 Subject: [PATCH 06/40] Changes in ESIMD_CPU device interface definition - Address of mutex is used for fetching mutex - Interface change is from PR#4011 --- .../experimental/esimd/detail/memory_intrin.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 80d630aca2054..3c0993bbb5aa5 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -850,13 +850,13 @@ __esimd_media_block_load(unsigned modififer, TACC handle, unsigned plane, uint32_t bpp; uint32_t imgWidth; uint32_t imgHeight; - std::mutex mutexLock; + std::mutex *mutexLock; I->sycl_get_cm_image_params_ptr(static_cast(handle.get_pointer()), &readBase, &imgWidth, &imgHeight, &bpp, &mutexLock); - std::unique_lock lock(mutexLock); + std::unique_lock lock(*mutexLock); int x_pos_a, y_pos_a, offset, index; @@ -960,7 +960,7 @@ inline void __esimd_media_block_store(unsigned modififer, TACC handle, uint32_t bpp; uint32_t imgWidth; uint32_t imgHeight; - std::mutex mutexLock; + std::mutex *mutexLock; I->sycl_get_cm_image_params_ptr(static_cast(handle.get_pointer()), &writeBase, &imgWidth, &imgHeight, &bpp, @@ -974,7 +974,7 @@ inline void __esimd_media_block_store(unsigned modififer, TACC handle, // TODO : Remove intermediate 'out' matrix std::vector> out(M, std::vector(N)); - std::unique_lock lock(mutexLock); + std::unique_lock lock(*mutexLock); for (int i = 0, k = 0; i < M; i++) { for (int j = 0; j < N; j++) { @@ -1342,12 +1342,12 @@ __esimd_block_read(SurfIndAliasTy surf_ind, uint32_t offset) { char *readBase; uint32_t width; - std::mutex mutexLock; + std::mutex *mutexLock; I->sycl_get_cm_buffer_params_ptr(static_cast(surf_ind.get_pointer()), &readBase, &width, &mutexLock); - std::unique_lock lock(mutexLock); + std::unique_lock lock(*mutexLock); for (int idx = 0; idx < N; idx++) { if (offset >= width) { @@ -1369,12 +1369,12 @@ inline void __esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset, char *writeBase; uint32_t width; - std::mutex mutexLock; + std::mutex *mutexLock; I->sycl_get_cm_buffer_params_ptr(static_cast(surf_ind.get_pointer()), &writeBase, &width, &mutexLock); - std::unique_lock lock(mutexLock); + std::unique_lock lock(*mutexLock); offset <<= 4; From c96efe1aa22de9bd8d6a079c8988db4affaf3aa0 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 15 Jul 2021 12:08:47 -0700 Subject: [PATCH 07/40] Removing interleaved '__SYCL_DEVICE_ONLY__' in memory_intrin.hpp - __esimd_surf_read/write --- .../esimd/detail/memory_intrin.hpp | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 3c0993bbb5aa5..d22bfdd51703c 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -34,7 +34,7 @@ class AccessorPrivateProxy { static auto getNativeImageObj(const AccessorTy &Acc) { return Acc.getNativeImageObj(); } -#else +#else // __SYCL_DEVICE_ONLY__ template static auto getImageRange(const AccessorTy &Acc) { return Acc.getAccessRange(); @@ -42,7 +42,7 @@ class AccessorPrivateProxy { static auto getElemSize(const sycl::detail::AccessorBaseHost &Acc) { return Acc.getElemSize(); } -#endif +#endif // __SYCL_DEVICE_ONLY__ }; template __esimd_surf_read(int16_t scale, SurfIndAliasTy surf_ind, uint32_t global_offset, - __SEIEED::vector_type_t elem_offsets) -#ifdef __SYCL_DEVICE_ONLY__ - ; -#else -{ - static_assert(N == 1 || N == 8 || N == 16); - static_assert(TySizeLog2 <= 2); - static_assert(std::is_integral::value || TySizeLog2 == 2); - throw cl::sycl::feature_not_supported(); -} -#endif // __SYCL_DEVICE_ONLY__ + __SEIEED::vector_type_t elem_offsets); // Low-level surface-based scatter. Writes elements of a \ref simd object into a // surface at given offsets. Element can be a 1, 2 or 4-byte value, but it is @@ -208,17 +198,7 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_surf_write(__SEIEED::vector_type_t pred, int16_t scale, SurfIndAliasTy surf_ind, uint32_t global_offset, __SEIEED::vector_type_t elem_offsets, - __SEIEED::vector_type_t vals) -#ifdef __SYCL_DEVICE_ONLY__ - ; -#else -{ - static_assert(N == 1 || N == 8 || N == 16); - static_assert(TySizeLog2 <= 2); - static_assert(std::is_integral::value || TySizeLog2 == 2); - throw cl::sycl::feature_not_supported(); -} -#endif // __SYCL_DEVICE_ONLY__ + __SEIEED::vector_type_t vals); // TODO bring the parameter order of __esimd* intrinsics in accordance with the // correponsing BE intrinsicics parameter order. @@ -519,8 +499,11 @@ __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0); + #ifndef __SYCL_DEVICE_ONLY__ +/// ESIMD_CPU Emulation support using esimd_cpu plugin + #define __SYCL_EXPLICIT_SIMD_PLUGIN__ // Header files required for accessing CM-managed memory - Surface, @@ -816,6 +799,33 @@ inline void __esimd_flat_write4( } } +template +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t +__esimd_surf_read(int16_t scale, SurfIndAliasTy surf_ind, + uint32_t global_offset, + __SEIEED::vector_type_t elem_offsets) { + static_assert(N == 1 || N == 8 || N == 16); + static_assert(TySizeLog2 <= 2); + static_assert(std::is_integral::value || TySizeLog2 == 2); + throw cl::sycl::feature_not_supported(); +} + +template +SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void +__esimd_surf_write(__SEIEED::vector_type_t pred, int16_t scale, + SurfIndAliasTy surf_ind, uint32_t global_offset, + __SEIEED::vector_type_t elem_offsets, + __SEIEED::vector_type_t vals) { + static_assert(N == 1 || N == 8 || N == 16); + static_assert(TySizeLog2 <= 2); + static_assert(std::is_integral::value || TySizeLog2 == 2); + throw cl::sycl::feature_not_supported(); +} + template inline __SEIEED::vector_type_t __esimd_flat_block_read_unaligned(uint64_t addr) { From 1266ceb33b9d1164353959b4ea9bb875a8fc0017 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Tue, 20 Jul 2021 16:21:53 -0700 Subject: [PATCH 08/40] Removing 'get_pointer()' and changing space for 'raw_send' - Kernel build failure fix from fully-working branch - Changing Header file inclusion for ESIMD_CPU --- sycl/include/CL/sycl/accessor.hpp | 5 - .../esimd/detail/memory_intrin.hpp | 123 +++++++++++------- 2 files changed, 77 insertions(+), 51 deletions(-) diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 872c56813efb3..abae9c107b130 100755 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -671,11 +671,6 @@ class image_accessor AccessorBaseHost::getPtr() /*Ptr to Image*/); #endif } - -#ifndef __SYCL_DEVICE_ONLY__ -public: - void *get_pointer() const { return detail::AccessorBaseHost::getPtr(); } -#endif // __SYCL_DEVICE_ONLY__ }; template +#ifndef __SYCL_DEVICE_ONLY__ +/// ESIMD_CPU Emulation support using esimd_cpu plugin + +/// Definition macro to be referenced in CM header files for +/// preventing build failure caused by symbol conflicts between llvm +/// and CM - e.g. vector. +#define __SYCL_EXPLICIT_SIMD_PLUGIN__ + +// Header files required for accessing CM-managed resources - image, +// buffer, runtime API etc. +namespace cm_support { +#include +} // namespace cm_support + +#include +#include +#include +#include + +#endif // ifndef __SYCL_DEVICE_ONLY__ + __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace ext { @@ -42,6 +63,9 @@ class AccessorPrivateProxy { static auto getElemSize(const sycl::detail::AccessorBaseHost &Acc) { return Acc.getElemSize(); } + static void *getPtr(const sycl::detail::AccessorBaseHost &Acc) { + return Acc.getPtr(); + } #endif // __SYCL_DEVICE_ONLY__ }; @@ -504,23 +528,17 @@ __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, /// ESIMD_CPU Emulation support using esimd_cpu plugin -#define __SYCL_EXPLICIT_SIMD_PLUGIN__ - -// Header files required for accessing CM-managed memory - Surface, -// buffer, etc -namespace cm_support { -#include -} // namespace cm_support - -#include -#include -#include -#include - +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace ext { +namespace intel { +namespace experimental { +namespace esimd { +namespace detail { namespace raw_send { enum class msgField : short { - OP, + OP = 0, VNNI, ADDRSIZE, DATASIZE, @@ -651,6 +669,13 @@ auto inline getArrayLen(__SEIEED::vector_type_t addrMsg) { } } // namespace raw_send +} // namespace detail +} // namespace esimd +} // namespace experimental +} // namespace intel +} // namespace ext +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) template @@ -800,8 +825,7 @@ inline void __esimd_flat_write4( } template + __SEIEE::CacheHint L1H, __SEIEE::CacheHint L3H> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t __esimd_surf_read(int16_t scale, SurfIndAliasTy surf_ind, uint32_t global_offset, @@ -813,8 +837,7 @@ __esimd_surf_read(int16_t scale, SurfIndAliasTy surf_ind, } template + __SEIEE::CacheHint L1H, __SEIEE::CacheHint L3H> SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_surf_write(__SEIEED::vector_type_t pred, int16_t scale, SurfIndAliasTy surf_ind, uint32_t global_offset, @@ -862,9 +885,10 @@ __esimd_media_block_load(unsigned modififer, TACC handle, unsigned plane, uint32_t imgHeight; std::mutex *mutexLock; - I->sycl_get_cm_image_params_ptr(static_cast(handle.get_pointer()), - &readBase, &imgWidth, &imgHeight, &bpp, - &mutexLock); + auto ImageHandle = __SEIEED::AccessorPrivateProxy::getPtr(handle); + + I->sycl_get_cm_image_params_ptr(ImageHandle, &readBase, &imgWidth, &imgHeight, + &bpp, &mutexLock); std::unique_lock lock(*mutexLock); @@ -972,9 +996,10 @@ inline void __esimd_media_block_store(unsigned modififer, TACC handle, uint32_t imgHeight; std::mutex *mutexLock; - I->sycl_get_cm_image_params_ptr(static_cast(handle.get_pointer()), - &writeBase, &imgWidth, &imgHeight, &bpp, - &mutexLock); + auto ImageHandle = __SEIEED::AccessorPrivateProxy::getPtr(handle); + + I->sycl_get_cm_image_params_ptr(ImageHandle, &writeBase, &imgWidth, + &imgHeight, &bpp, &mutexLock); int x_pos_a, y_pos_a, offset; @@ -1354,8 +1379,9 @@ __esimd_block_read(SurfIndAliasTy surf_ind, uint32_t offset) { uint32_t width; std::mutex *mutexLock; - I->sycl_get_cm_buffer_params_ptr(static_cast(surf_ind.get_pointer()), - &readBase, &width, &mutexLock); + auto BufferHandle = __SEIEED::AccessorPrivateProxy::getPtr(surf_ind); + + I->sycl_get_cm_buffer_params_ptr(BufferHandle, &readBase, &width, &mutexLock); std::unique_lock lock(*mutexLock); @@ -1381,8 +1407,10 @@ inline void __esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset, uint32_t width; std::mutex *mutexLock; - I->sycl_get_cm_buffer_params_ptr(static_cast(surf_ind.get_pointer()), - &writeBase, &width, &mutexLock); + auto BufferHandle = __SEIEED::AccessorPrivateProxy::getPtr(surf_ind); + + I->sycl_get_cm_buffer_params_ptr(BufferHandle, &writeBase, &width, + &mutexLock); std::unique_lock lock(*mutexLock); @@ -1496,13 +1524,14 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, __SEIEED::vector_type_t retv; - auto op = raw_send::getMsgOp(msgDesc); - assert(op == raw_send::msgOp::LOAD_2D); - uint64_t surfaceBase = raw_send::getSurfaceBaseAddr(msgSrc0); - auto surfaceDim = raw_send::getSurfaceDim(msgSrc0); - auto blockOffset = raw_send::getBlockOffsets(msgSrc0); - auto blockDim = raw_send::getBlockDim(msgSrc0); - auto arrayLen = raw_send::getArrayLen(msgSrc0); + auto op = __SEIEED::raw_send::getMsgOp(msgDesc); + assert(op == __SEIEED::raw_send::msgOp::LOAD_2D); + uint64_t surfaceBase = + __SEIEED::raw_send::getSurfaceBaseAddr(msgSrc0); + auto surfaceDim = __SEIEED::raw_send::getSurfaceDim(msgSrc0); + auto blockOffset = __SEIEED::raw_send::getBlockOffsets(msgSrc0); + auto blockDim = __SEIEED::raw_send::getBlockDim(msgSrc0); + auto arrayLen = __SEIEED::raw_send::getArrayLen(msgSrc0); unsigned SurfaceWidth = surfaceDim[0] + 1; unsigned SurfaceHeight = surfaceDim[1] + 1; @@ -1514,9 +1543,10 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, int Height = blockDim[1] + 1; int NBlks = arrayLen + 1; - bool Transposed = - raw_send::getMsgField(msgDesc, raw_send::msgField::TRANSPOSE); - bool Transformed = raw_send::getMsgField(msgDesc, raw_send::msgField::VNNI); + bool Transposed = __SEIEED::raw_send::getMsgField( + msgDesc, __SEIEED::raw_send::msgField::TRANSPOSE); + bool Transformed = __SEIEED::raw_send::getMsgField( + msgDesc, __SEIEED::raw_send::msgField::VNNI); constexpr unsigned sizeofT = sizeof(Ty1); @@ -1694,12 +1724,13 @@ inline void __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgSrc1) { assert(sfid == 0xF); // UGM type only - auto op = raw_send::getMsgOp(msgDesc); - assert(op == raw_send::msgOp::STORE_2D); - uint64_t surfaceBase = raw_send::getSurfaceBaseAddr(msgSrc0); - auto surfaceDim = raw_send::getSurfaceDim(msgSrc0); - auto blockOffset = raw_send::getBlockOffsets(msgSrc0); - auto blockDim = raw_send::getBlockDim(msgSrc0); + auto op = __SEIEED::raw_send::getMsgOp(msgDesc); + assert(op == __SEIEED::raw_send::msgOp::STORE_2D); + uint64_t surfaceBase = + __SEIEED::raw_send::getSurfaceBaseAddr(msgSrc0); + auto surfaceDim = __SEIEED::raw_send::getSurfaceDim(msgSrc0); + auto blockOffset = __SEIEED::raw_send::getBlockOffsets(msgSrc0); + auto blockDim = __SEIEED::raw_send::getBlockDim(msgSrc0); unsigned SurfaceWidth = surfaceDim[0] + 1; unsigned SurfaceHeight = surfaceDim[1] + 1; @@ -1758,9 +1789,9 @@ inline void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0) { - auto op = raw_send::getMsgOp(msgDesc); + auto op = __SEIEED::raw_send::getMsgOp(msgDesc); - if (op == raw_send::msgOp::LOAD_2D) { + if (op == __SEIEED::raw_send::msgOp::LOAD_2D) { // Prefetch? return; } From c14a75551d4c79f692fd954953972ca722bebdbd Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 26 Jul 2021 12:20:10 -0700 Subject: [PATCH 09/40] Build failure fix after merging sycl branch --- sycl/source/detail/config.def | 1 + sycl/source/detail/config.hpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/config.def b/sycl/source/detail/config.def index 74f5cf2a693ab..0649c800eeb1c 100644 --- a/sycl/source/detail/config.def +++ b/sycl/source/detail/config.def @@ -35,3 +35,4 @@ CONFIG(SYCL_OVERRIDE_PI_OPENCL, 1024, __SYCL_OVERRIDE_PI_OPENCL) CONFIG(SYCL_OVERRIDE_PI_LEVEL_ZERO, 1024, __SYCL_OVERRIDE_PI_LEVEL_ZERO) CONFIG(SYCL_OVERRIDE_PI_CUDA, 1024, __SYCL_OVERRIDE_PI_CUDA) CONFIG(SYCL_OVERRIDE_PI_ROCM, 1024, __SYCL_OVERRIDE_PI_ROCM) +CONFIG(SYCL_OVERRIDE_PI_ESIMD_CPU, 1024, __SYCL_OVERRIDE_PI_ESIMD_CPU) diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index 9a59f9c1511f8..a83ad59cc9c85 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -187,7 +187,7 @@ static const std::array, 5> {"*", info::device_type::all}}}; // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST -static const std::array, 6> SyclBeMap = { +static const std::array, 7> SyclBeMap = { {{"host", backend::host}, {"opencl", backend::opencl}, {"level_zero", backend::level_zero}, From 2a9e7893d9f6c9a2dfc2cf8fdedd6bffc573eb5d Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 26 Aug 2021 14:04:56 -0700 Subject: [PATCH 10/40] Reverting a change already applied in PR#4011 --- sycl/CMakeLists.txt | 9 --------- 1 file changed, 9 deletions(-) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 227bd4b77ea1d..ecb8c9369dcc0 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -292,15 +292,6 @@ if(SYCL_BUILD_PI_ROCM) list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_rocm) endif() -if (SYCL_BUILD_PI_ESIMD_CPU) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-headers) - if (MSVC) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-libs libcmrt-dlls) - else() - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-sos) - endif() -endif() - # Use it as fake dependency in order to force another command(s) to execute. add_custom_command(OUTPUT __force_it COMMAND "${CMAKE_COMMAND}" -E echo From 49bc656a0650554234e3bcc3e184729b61ea2142 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 26 Aug 2021 14:23:06 -0700 Subject: [PATCH 11/40] Rebase fix / File path order fix --- .../ext/intel/experimental/esimd/detail/memory_intrin.hpp | 2 +- sycl/source/detail/config.cpp | 5 +++-- sycl/source/detail/pi.cpp | 3 +-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 9913bec0efba9..9b7561e5169c7 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -36,7 +36,7 @@ namespace cm_support { #include #include #include -#include +#include #endif // ifndef __SYCL_DEVICE_ONLY__ diff --git a/sycl/source/detail/config.cpp b/sycl/source/detail/config.cpp index baf686c269fc9..03f66b672eb61 100644 --- a/sycl/source/detail/config.cpp +++ b/sycl/source/detail/config.cpp @@ -124,13 +124,14 @@ getSyclDeviceTypeMap() { } // Array is used by SYCL_DEVICE_FILTER and SYCL_DEVICE_ALLOWLIST -const std::array, 6> &getSyclBeMap() { - static const std::array, 6> SyclBeMap = { +const std::array, 7> &getSyclBeMap() { + static const std::array, 7> SyclBeMap = { {{"host", backend::host}, {"opencl", backend::opencl}, {"level_zero", backend::level_zero}, {"cuda", backend::cuda}, {"rocm", backend::rocm}, + {"esimd_cpu", backend::esimd_cpu}, {"*", backend::all}}}; return SyclBeMap; } diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index f1ffc57a4de6f..fd8a7dd2de7f4 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -328,8 +328,7 @@ std::vector> findPlugins() { } if (!EsimdCpuFound && (Backend == backend::esimd_cpu || Backend == backend::all)) { - PluginNames.emplace_back(__SYCL_ESIMD_CPU_PLUGIN_NAME, - backend::esimd_cpu); + PluginNames.emplace_back(ESIMDCPUPluginName, backend::esimd_cpu); EsimdCpuFound = true; } if (!RocmFound && (Backend == backend::rocm || Backend == backend::all)) { From 69d1cfb73f51b9b3c49f1c37053fa7fc540f9f4a Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 30 Aug 2021 13:35:39 -0700 Subject: [PATCH 12/40] Handling 'void' kernel argument type - For fixing compilation failure from 'spec_const_aot' test case --- sycl/include/CL/sycl/handler.hpp | 33 ++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 02a96580505d5..d0df58fb2f497 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -528,6 +528,22 @@ class __SYCL_EXPORT handler { * type is unknown to the plugin. */ + // For 'void' kernel argument + template + KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { + NormalizedKernelType NormalizedKernel(KernelFunc); + auto NormalizedKernelFunc = + std::function(NormalizedKernel); + auto HostKernelPtr = + new detail::HostKernel( + NormalizedKernelFunc); + MHostKernel.reset(HostKernelPtr); + return &HostKernelPtr->MKernel.template target() + ->MKernelFunc; + } + + // For non-'void' kernel argument - id, item w/wo offset, nd_item template KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { NormalizedKernelType NormalizedKernel(KernelFunc); @@ -542,6 +558,23 @@ class __SYCL_EXPORT handler { ->MKernelFunc; } + template + typename std::enable_if::value, + KernelType *>::type + ResetHostKernel(const KernelType &KernelFunc) { + static_assert(Dims == 0, "Dimension of 'void' argument must be zero"); + struct NormalizedKernelType { + KernelType MKernelFunc; + NormalizedKernelType(const KernelType &KernelFunc) + : MKernelFunc(KernelFunc) {} + void operator()(void) { + MKernelFunc(); + } + }; + return ResetHostKernelHelper( + KernelFunc); + } + template typename std::enable_if>::value, KernelType *>::type From 94cb1617ef06e7d4d9046e0e5f7edf7518e34c02 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 30 Aug 2021 13:58:07 -0700 Subject: [PATCH 13/40] Clang-format error fix --- sycl/include/CL/sycl/handler.hpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index d0df58fb2f497..48024a8232248 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -532,12 +532,10 @@ class __SYCL_EXPORT handler { template KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { NormalizedKernelType NormalizedKernel(KernelFunc); - auto NormalizedKernelFunc = - std::function(NormalizedKernel); + auto NormalizedKernelFunc = std::function(NormalizedKernel); auto HostKernelPtr = - new detail::HostKernel( - NormalizedKernelFunc); + new detail::HostKernel(NormalizedKernelFunc); MHostKernel.reset(HostKernelPtr); return &HostKernelPtr->MKernel.template target() ->MKernelFunc; @@ -559,17 +557,14 @@ class __SYCL_EXPORT handler { } template - typename std::enable_if::value, - KernelType *>::type + typename std::enable_if::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { static_assert(Dims == 0, "Dimension of 'void' argument must be zero"); struct NormalizedKernelType { KernelType MKernelFunc; NormalizedKernelType(const KernelType &KernelFunc) : MKernelFunc(KernelFunc) {} - void operator()(void) { - MKernelFunc(); - } + void operator()(void) { MKernelFunc(); } }; return ResetHostKernelHelper( KernelFunc); From 80d4c5e18e00a6b7508182070b12d0f2f91bb017 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 6 Sep 2021 22:07:27 -0700 Subject: [PATCH 14/40] 'Group' argument fix - Build failure fix for 'pararllel_for_work_group' --- sycl/include/CL/sycl/handler.hpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index e18a7cc38dfcd..3ff7b0ee27b64 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -556,6 +556,7 @@ class __SYCL_EXPORT handler { ->MKernelFunc; } + // For 'void' kernel argument template typename std::enable_if::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { @@ -570,6 +571,7 @@ class __SYCL_EXPORT handler { KernelFunc); } + // For 'sycl::id' kernel argument template typename std::enable_if>::value, KernelType *>::type @@ -585,6 +587,8 @@ class __SYCL_EXPORT handler { return ResetHostKernelHelper( KernelFunc); } + + // For 'sycl::nd_item' kernel argument template typename std::enable_if>::value, KernelType *>::type @@ -599,6 +603,7 @@ class __SYCL_EXPORT handler { KernelFunc); } + // For 'sycl::item' kernel argument template typename std::enable_if>::value, KernelType *>::type @@ -617,6 +622,7 @@ class __SYCL_EXPORT handler { KernelFunc); } + // For 'sycl::item' kernel argument template typename std::enable_if>::value, KernelType *>::type @@ -635,6 +641,23 @@ class __SYCL_EXPORT handler { KernelFunc); } + // For 'sycl::group' kernel argument + template + typename std::enable_if>::value, + KernelType *>::type + ResetHostKernel(const KernelType &KernelFunc) { + struct NormalizedKernelType { + KernelType MKernelFunc; + NormalizedKernelType(const KernelType &KernelFunc) + : MKernelFunc(KernelFunc) {} + void operator()(const nd_item &Arg) { + MKernelFunc(Arg.get_group()); + } + }; + return ResetHostKernelHelper( + KernelFunc); + } + /// Stores lambda to the template-free object /// /// Also initializes kernel name, list of arguments and requirements using From b9f96633c21e6497ce2410d29d623350dcb86187 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Tue, 7 Sep 2021 16:44:02 -0700 Subject: [PATCH 15/40] Empty header file generation for toolchain building w/o CM --- sycl/CMakeLists.txt | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 1af0e2c8eb4d9..410b079f3b42e 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -291,17 +291,17 @@ if(SYCL_BUILD_PI_ROCM) list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_rocm) endif() -# TODO : Remove 'if (NOT MSVC)' when CM_EMU supports Windows -# environment -if (NOT MSVC) - if (SYCL_BUILD_PI_ESIMD_CPU) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS pi_esimd_cpu libcmrt-headers) - if (MSVC) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-libs libcmrt-dlls) - else() - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-sos) - endif() +if (SYCL_BUILD_PI_ESIMD_CPU) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS pi_esimd_cpu libcmrt-headers) + if (MSVC) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-libs libcmrt-dlls) + else() + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-sos) endif() +else() + # TODO/FIXME : Removing empty header file (cm_rt.h) generation when + # the ESIMD_CPU support is enabled by default + file (TOUCH ${SYCL_INCLUDE_BUILD_DIR}/sycl/CL/cm_rt.h) endif() # Use it as fake dependency in order to force another command(s) to execute. From 686a2eb3d1189a12067a547a3d6f7cb4d4b31e9b Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Wed, 8 Sep 2021 14:55:14 -0700 Subject: [PATCH 16/40] Enabling kernel execution with kernel_handler argument --- sycl/include/CL/sycl/handler.hpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 3ff7b0ee27b64..94fed8de4268e 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -565,7 +565,7 @@ class __SYCL_EXPORT handler { KernelType MKernelFunc; NormalizedKernelType(const KernelType &KernelFunc) : MKernelFunc(KernelFunc) {} - void operator()(void) { MKernelFunc(); } + void operator()(void) { detail::runKernelWithoutArg(MKernelFunc); } }; return ResetHostKernelHelper( KernelFunc); @@ -581,7 +581,7 @@ class __SYCL_EXPORT handler { NormalizedKernelType(const KernelType &KernelFunc) : MKernelFunc(KernelFunc) {} void operator()(const nd_item &Arg) { - MKernelFunc(Arg.get_global_id()); + detail::runKernelWithArg(MKernelFunc, Arg.get_global_id()); } }; return ResetHostKernelHelper( @@ -597,7 +597,9 @@ class __SYCL_EXPORT handler { KernelType MKernelFunc; NormalizedKernelType(const KernelType &KernelFunc) : MKernelFunc(KernelFunc) {} - void operator()(const nd_item &Arg) { MKernelFunc(Arg); } + void operator()(const nd_item &Arg) { + detail::runKernelWithArg(MKernelFunc, Arg); + } }; return ResetHostKernelHelper( KernelFunc); @@ -615,7 +617,7 @@ class __SYCL_EXPORT handler { void operator()(const nd_item &Arg) { sycl::item Item = detail::Builder::createItem( Arg.get_global_range(), Arg.get_global_id()); - MKernelFunc(Item); + detail::runKernelWithArg(MKernelFunc, Item); } }; return ResetHostKernelHelper( @@ -634,7 +636,7 @@ class __SYCL_EXPORT handler { void operator()(const nd_item &Arg) { sycl::item Item = detail::Builder::createItem( Arg.get_global_range(), Arg.get_global_id(), Arg.get_offset()); - MKernelFunc(Item); + detail::runKernelWithArg(MKernelFunc, Item); } }; return ResetHostKernelHelper( @@ -651,7 +653,7 @@ class __SYCL_EXPORT handler { NormalizedKernelType(const KernelType &KernelFunc) : MKernelFunc(KernelFunc) {} void operator()(const nd_item &Arg) { - MKernelFunc(Arg.get_group()); + detail::runKernelWithArg(MKernelFunc, Arg.get_group()); } }; return ResetHostKernelHelper( From 7d1e48132be573f60f8fce9c09920d10e6db0916 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Wed, 8 Sep 2021 17:46:31 -0700 Subject: [PATCH 17/40] Adding 'esimd_cpu' in ParseAllowList unit-test --- sycl/unittests/allowlist/ParseAllowList.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index 4c417618c4a62..6f9a3875cf53c 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -167,7 +167,8 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { sycl::detail::AllowListParsedT ExpectedValue{ {{"BackendName", "host"}}, {{"BackendName", "opencl"}}, {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, - {{"BackendName", "rocm"}}, {{"BackendName", "*"}}}; + {{"BackendName", "rocm"}}, {{"BackendName", "esimd_cpu"}}, + {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); } From a586d1efcb28e39bcbcb1c62e893ae6eb60b1daf Mon Sep 17 00:00:00 2001 From: dongkyunahn-intel Date: Mon, 13 Sep 2021 10:24:58 -0700 Subject: [PATCH 18/40] Update sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp Co-authored-by: kbobrovs --- .../sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp index 21cb6ad5c0826..aff4756819a95 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp @@ -9,6 +9,8 @@ #include +// This function implements atomic update of pre-existing variable in the absense +// of C++ 20's atomic_ref. template Ty atomic_add_fetch(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon From 1afc4dc6c3710f13caeae0d9bd4e86e7ad4233d2 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 13 Sep 2021 16:35:56 -0700 Subject: [PATCH 19/40] clang-format fix --- .../ext/intel/experimental/esimd/detail/atomic_intrin.hpp | 4 ++-- sycl/unittests/allowlist/ParseAllowList.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp index aff4756819a95..d52074137ef20 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/atomic_intrin.hpp @@ -9,8 +9,8 @@ #include -// This function implements atomic update of pre-existing variable in the absense -// of C++ 20's atomic_ref. +// This function implements atomic update of pre-existing variable in the +// absense of C++ 20's atomic_ref. template Ty atomic_add_fetch(Ty *ptr, Ty val) { #ifdef _WIN32 // TODO: Windows will be supported soon diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index ea889609b4968..34e5658a8c21a 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -167,7 +167,7 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { sycl::detail::AllowListParsedT ExpectedValue{ {{"BackendName", "host"}}, {{"BackendName", "opencl"}}, {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, - {{"BackendName", "hip"}}, {{"BackendName", "esimd_cpu"}}, + {{"BackendName", "hip"}}, {{"BackendName", "esimd_cpu"}}, {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); } From 37a0e78c983a367c5067025a03b201da3846f7a5 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Wed, 15 Sep 2021 22:25:30 -0700 Subject: [PATCH 20/40] Recovering isESIMD() argument - And fixing wrong template argument --- sycl/include/CL/sycl/handler.hpp | 51 +++++++++++++++++--------------- 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 3c5b9eb5cef69..0c8f9de2ff42c 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -565,27 +565,28 @@ class __SYCL_EXPORT handler { */ // For 'void' kernel argument - template + template KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { NormalizedKernelType NormalizedKernel(KernelFunc); auto NormalizedKernelFunc = std::function(NormalizedKernel); auto HostKernelPtr = new detail::HostKernel(NormalizedKernelFunc); + KernelName>(NormalizedKernelFunc); MHostKernel.reset(HostKernelPtr); return &HostKernelPtr->MKernel.template target() ->MKernelFunc; } // For non-'void' kernel argument - id, item w/wo offset, nd_item - template + template KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { NormalizedKernelType NormalizedKernel(KernelFunc); auto NormalizedKernelFunc = std::function &)>(NormalizedKernel); auto HostKernelPtr = new detail::HostKernel, Dims, KernelType>( + sycl::nd_item, Dims, KernelName>( NormalizedKernelFunc); MHostKernel.reset(HostKernelPtr); return &HostKernelPtr->MKernel.template target() @@ -593,7 +594,7 @@ class __SYCL_EXPORT handler { } // For 'void' kernel argument - template + template typename std::enable_if::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { static_assert(Dims == 0, "Dimension of 'void' argument must be zero"); @@ -603,12 +604,12 @@ class __SYCL_EXPORT handler { : MKernelFunc(KernelFunc) {} void operator()(void) { detail::runKernelWithoutArg(MKernelFunc); } }; - return ResetHostKernelHelper( - KernelFunc); + return ResetHostKernelHelper(KernelFunc); } // For 'sycl::id' kernel argument - template + template typename std::enable_if>::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { @@ -620,12 +621,12 @@ class __SYCL_EXPORT handler { detail::runKernelWithArg(MKernelFunc, Arg.get_global_id()); } }; - return ResetHostKernelHelper( - KernelFunc); + return ResetHostKernelHelper(KernelFunc); } // For 'sycl::nd_item' kernel argument - template + template typename std::enable_if>::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { @@ -637,12 +638,12 @@ class __SYCL_EXPORT handler { detail::runKernelWithArg(MKernelFunc, Arg); } }; - return ResetHostKernelHelper( - KernelFunc); + return ResetHostKernelHelper(KernelFunc); } // For 'sycl::item' kernel argument - template + template typename std::enable_if>::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { @@ -656,12 +657,12 @@ class __SYCL_EXPORT handler { detail::runKernelWithArg(MKernelFunc, Item); } }; - return ResetHostKernelHelper( - KernelFunc); + return ResetHostKernelHelper(KernelFunc); } // For 'sycl::item' kernel argument - template + template typename std::enable_if>::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { @@ -675,12 +676,12 @@ class __SYCL_EXPORT handler { detail::runKernelWithArg(MKernelFunc, Item); } }; - return ResetHostKernelHelper( - KernelFunc); + return ResetHostKernelHelper(KernelFunc); } // For 'sycl::group' kernel argument - template + template typename std::enable_if>::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { @@ -692,8 +693,8 @@ class __SYCL_EXPORT handler { detail::runKernelWithArg(MKernelFunc, Arg.get_group()); } }; - return ResetHostKernelHelper( - KernelFunc); + return ResetHostKernelHelper(KernelFunc); } /// Stores lambda to the template-free object @@ -714,7 +715,8 @@ class __SYCL_EXPORT handler { PI_INVALID_OPERATION); } KernelType *KernelPtr = - ResetHostKernel(KernelFunc); + ResetHostKernel( + KernelFunc); using KI = sycl::detail::KernelInfo; // Empty name indicates that the compilation happens without integration @@ -723,7 +725,8 @@ class __SYCL_EXPORT handler { // TODO support ESIMD in no-integration-header case too. MArgs.clear(); extractArgsAndReqsFromLambda(reinterpret_cast(KernelPtr), - KI::getNumParams(), &KI::getParamDesc(0)); + KI::getNumParams(), &KI::getParamDesc(0), + KI::isESIMD()); MKernelName = KI::getName(); MOSModuleHandle = detail::OSUtil::getOSModuleHandle(KI::getName()); } else { From 3710d07ffc9e97ea00bf8c41d067761483d210bb Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 16 Sep 2021 22:34:41 -0700 Subject: [PATCH 21/40] __esimd_raw_send* are removed - To be brought back later with unit tests --- .../esimd/detail/memory_intrin.hpp | 368 +----------------- 1 file changed, 3 insertions(+), 365 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 9b7561e5169c7..248b8147a8c4f 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -528,155 +528,6 @@ __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, /// ESIMD_CPU Emulation support using esimd_cpu plugin -__SYCL_INLINE_NAMESPACE(cl) { -namespace sycl { -namespace ext { -namespace intel { -namespace experimental { -namespace esimd { -namespace detail { -namespace raw_send { - -enum class msgField : short { - OP = 0, - VNNI, - ADDRSIZE, - DATASIZE, - VECTSIZE, - TRANSPOSE, - CACHE, - DSTLEN, - SRC0LEN, - ADDRTYPE -}; - -enum class msgOp : short { - DP_LOAD = 0x0, // scatter/vector load - LOAD_2D = 0x3, - DP_STORE = 0x4, // scatter/vector store - STORE_2D = 0x7, - OP_MAX = 0x3F -}; - -typedef struct _bitfields_ { - uint32_t offset; - uint32_t mask; -} bitfields; - -const bitfields BIT_FIELDS[10] = { - {0, 0x3F}, // OP / 6 bits - {7, 0x1}, // VNNI -> LOAD only - {7, 0x3}, // Address size - {9, 0x7}, // DATASIZE - {12, 0x7}, // VECTSIZE - {15, 0x1}, // TRANSPOSE -> LOAD only - {17, 0x7}, // CACHE - {20, 0x1F}, // DSTLEN - {25, 0xF}, // SRC0LEN, - {29, 0x3} // ADDRTYPE -}; -uint32_t inline getMsgField(uint32_t msg, msgField field) { - uint32_t idx = static_cast(field); - return ((msg >> BIT_FIELDS[idx].offset) & BIT_FIELDS[idx].mask); -} - -auto inline getMsgOp(uint32_t msg) { - msgOp ret; - ret = static_cast(getMsgField((uint32_t)msg, msgField::OP)); - return ret; -} - -template -uint64_t inline getSurfaceBaseAddr(__SEIEED::vector_type_t addrMsg) { - constexpr int sizeofT = sizeof(T); - uint64_t Ret = 0; - - if constexpr (sizeofT == 4) { - Ret = (uint64_t)addrMsg[1] << 32; - Ret |= (uint64_t)addrMsg[0]; - } else if constexpr (sizeofT == 8) { - Ret = addrMsg[0]; - } - - return Ret; -} - -template -uint64_t inline getLaneAddr(__SEIEED::vector_type_t addrMsg, - unsigned lane_id) { - // (matrix_ref addrMsg) - // vector_ref addr_ref = addrMsg.template select<1, 1, 2, 1>(0, 2 - // * lane_id).template format(); return addr_ref(0); - throw cl::sycl::feature_not_supported(); -} - -template -auto inline getSurfaceDim(__SEIEED::vector_type_t addrMsg) { - __SEIEED::vector_type_t Ret; - constexpr int sizeofT = sizeof(T); - - static_assert(sizeofT == 4, "Unsupported addrMsg format!!"); - - if constexpr (sizeofT == 4) { - for (int idx = 0; idx < 4; idx++) { - Ret[idx] = addrMsg[idx + 2]; - } - } - - return Ret; -} - -template -auto inline getBlockOffsets(__SEIEED::vector_type_t addrMsg) { - __SEIEED::vector_type_t Ret; - constexpr int sizeofT = sizeof(T); - - static_assert(sizeofT == 4, "Unsupported addrMsg format!!"); - - if constexpr (sizeofT == 4) { - for (int idx = 0; idx < 4; idx++) { - Ret[idx] = static_cast(addrMsg[idx + 5]); - } - } - - return Ret; -} - -template -auto inline getBlockDim(__SEIEED::vector_type_t addrMsg) { - __SEIEED::vector_type_t Ret; - constexpr int sizeofT = sizeof(T); - T RawValue = 0; - - static_assert(sizeofT == 4, "Unsupported addrMsg format!!"); - - if constexpr (sizeofT == 4) { - RawValue = addrMsg[7]; - Ret[0] = (unsigned char)(RawValue & 0xFF); // width - Ret[1] = (unsigned char)((RawValue >> 8) & 0xFF); // height - Ret[2] = (unsigned char)((RawValue >> 24) & 0xFF); // For ArrayLen - } - - assert(RawValue != 0); - - return Ret; -} - -template -auto inline getArrayLen(__SEIEED::vector_type_t addrMsg) { - auto blkDim = getBlockDim(addrMsg); - return (blkDim[2] >> 4); -} - -} // namespace raw_send -} // namespace detail -} // namespace esimd -} // namespace experimental -} // namespace intel -} // namespace ext -} // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) - template inline __SEIEED::vector_type_t @@ -1519,176 +1370,8 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t numDst, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, - __SEIEED::vector_type_t msgDst) { - assert(sfid == 0xF); // UGM type only - - __SEIEED::vector_type_t retv; - - auto op = __SEIEED::raw_send::getMsgOp(msgDesc); - assert(op == __SEIEED::raw_send::msgOp::LOAD_2D); - uint64_t surfaceBase = - __SEIEED::raw_send::getSurfaceBaseAddr(msgSrc0); - auto surfaceDim = __SEIEED::raw_send::getSurfaceDim(msgSrc0); - auto blockOffset = __SEIEED::raw_send::getBlockOffsets(msgSrc0); - auto blockDim = __SEIEED::raw_send::getBlockDim(msgSrc0); - auto arrayLen = __SEIEED::raw_send::getArrayLen(msgSrc0); - - unsigned SurfaceWidth = surfaceDim[0] + 1; - unsigned SurfaceHeight = surfaceDim[1] + 1; - unsigned SurfacePitch = surfaceDim[2] + 1; - - int X = blockOffset[0]; - int Y = blockOffset[1]; - int Width = blockDim[0] + 1; - int Height = blockDim[1] + 1; - int NBlks = arrayLen + 1; - - bool Transposed = __SEIEED::raw_send::getMsgField( - msgDesc, __SEIEED::raw_send::msgField::TRANSPOSE); - bool Transformed = __SEIEED::raw_send::getMsgField( - msgDesc, __SEIEED::raw_send::msgField::VNNI); - - constexpr unsigned sizeofT = sizeof(Ty1); - - char *buffBase = (char *)surfaceBase; - - // TODO : Acquire mutex for the surface pointed to by 'surfaceBase' - int vecIdx = 0; - int blkCount = 0; - - for (int xBase = X * sizeofT; blkCount < NBlks; xBase += sizeofT * Width) { - if (Transformed == true) { - constexpr int elems_per_DW = (sizeofT == 1) ? 4 : 2; /// VNNI_pack - if (Transposed == false) { /// Transform only load - int yRead = Y * SurfacePitch; - for (int u = 0; u < Height; - u += elems_per_DW, yRead += SurfacePitch * elems_per_DW) { - if ((yRead < 0) || (yRead >= SurfacePitch * SurfaceHeight)) { - /// Vertically out-of-bound, padding zero on out of boundary - for (int v = 0; v < Width; v += 1) { - for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { - retv[vecIdx] = (Ty1)(0); - } // k loop - } - // vecIdx += Width * elems_per_DW;; - continue; - } - - int xRead = xBase; - for (int v = 0; v < Width; v += 1, xRead += sizeofT) { - if ((xRead < 0) || (xRead >= SurfaceWidth)) { - /// Horizontally out-of-bound - for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { - retv[vecIdx] = (Ty1)(0); - } // k loop - // vecIdx += elems_per_DW; - continue; - } - - char *base = buffBase + yRead + xRead; - int offset = 0; - for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { - retv[vecIdx] = *((Ty1 *)(base + offset)); - // Increasing in Y-direction - offset += SurfacePitch; - } // k loop - } // v loop - } /// u loop - } // Transposed = false - else // Transposed == true - { /// Transform & Transpose load - int xRead = xBase; - for (int v = 0; v < Width; - v += elems_per_DW, xRead += sizeofT * elems_per_DW) { - if ((xRead < 0) || (xRead >= SurfaceWidth)) { - // Horizontally out-of-bound - for (int u = 0; u < Height; u += 1) { - for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { - retv[vecIdx] = (Ty1)(0); - } // k loop - } - // vecIdx += Height * elems_per_DW; - continue; - } - - int yRead = Y * SurfacePitch; - for (int u = 0; u < Height; u += 1, yRead += SurfacePitch) { - if ((yRead < 0) || (yRead >= SurfacePitch * SurfaceHeight)) { - /// Vertically out-of-bound - for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { - retv[vecIdx] = (Ty1)(0); - } // k loop - // vecIdx += elems_per_DW; - continue; - } - - char *base = buffBase + yRead + xRead; - int offset = 0; - for (int k = 0; k < elems_per_DW; k++, vecIdx += 1) { - retv[vecIdx] = *((Ty1 *)(base + offset)); - // Increasing in X-direction - offset += sizeofT; - } // k loop - } // u loop - } // v loop - } // Transposed == true - } // Transformed == true - else // (Transformed == false) - { - if (Transposed == false) { /// Linear load - no transform, no transpose - int yRead = Y * SurfacePitch; - for (int u = 0; u < Height; u += 1, yRead += SurfacePitch) { - if ((yRead < 0) || (yRead >= SurfacePitch * SurfaceHeight)) { - // Vertically Out-of-bound - for (int v = 0; v < Width; v += 1, vecIdx += 1) { - retv[vecIdx] = (Ty1)(0); - } - // vecIdx += Width; - continue; - } - - int xRead = xBase; - for (int v = 0; v < Width; v += 1, xRead += sizeofT, vecIdx += 1) { - if ((xRead >= 0) && (xRead < SurfaceWidth)) { - retv[vecIdx] = *((Ty1 *)(buffBase + yRead + xRead)); - } else { - // Horizontally out of bound - retv[vecIdx] = (Ty1)(0); - } - } // v loop - } // u loop - } /// Transposed == false - else // Transposed = true - { /// Transpose load - no transform - int xRead = xBase; - for (int v = 0; v < Width; v += 1, xRead += sizeofT) { - if ((xRead < 0) || (xRead > SurfaceWidth)) { - // Horizontally out-of-bound - for (int u = 0; u < Height; u += 1, vecIdx += 1) { - retv[vecIdx] = (Ty1)(0); - } - // vecIdx += Height; - continue; - } - - int yRead = Y * SurfacePitch; - for (int u = 0; u < Height; - u += 1, yRead += SurfacePitch, vecIdx += 1) { - if ((yRead >= 0) && (yRead < SurfacePitch * SurfaceHeight)) { - retv[vecIdx] = *((Ty1 *)(buffBase + yRead + xRead)); - } else { - // Vertically out of bound - retv[vecIdx] = (Ty1)(0); - } - } // u loop - } // v loop - } // Transposed == true - } // Transformed == false - blkCount += 1; - vecIdx = blkCount * Width * Height; - } // xBase loop - - return retv; + throw cl::sycl::feature_not_supported(); + return 0; } /// \brief Raw sends store. @@ -1723,45 +1406,7 @@ inline void __esimd_raw_sends_store(uint8_t modifier, uint8_t execSize, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, __SEIEED::vector_type_t msgSrc1) { - assert(sfid == 0xF); // UGM type only - auto op = __SEIEED::raw_send::getMsgOp(msgDesc); - assert(op == __SEIEED::raw_send::msgOp::STORE_2D); - uint64_t surfaceBase = - __SEIEED::raw_send::getSurfaceBaseAddr(msgSrc0); - auto surfaceDim = __SEIEED::raw_send::getSurfaceDim(msgSrc0); - auto blockOffset = __SEIEED::raw_send::getBlockOffsets(msgSrc0); - auto blockDim = __SEIEED::raw_send::getBlockDim(msgSrc0); - - unsigned SurfaceWidth = surfaceDim[0] + 1; - unsigned SurfaceHeight = surfaceDim[1] + 1; - unsigned SurfacePitch = surfaceDim[2] + 1; - - int X = blockOffset[0]; - int Y = blockOffset[1]; - int Width = blockDim[0] + 1; - int Height = blockDim[1] + 1; - - constexpr unsigned sizeofT = sizeof(Ty2); - - char *buffBase = (char *)surfaceBase; - - int vecIdx = 0; - int rowCount = 0; - for (int yWrite = Y * SurfacePitch; rowCount < Height; - yWrite += SurfacePitch) { - if (yWrite == SurfacePitch * SurfaceHeight) { - // Vertically Out-of-bound - break; - } - int writeCount = 0; - for (int xWrite = X * sizeofT; writeCount < Width; - xWrite += sizeofT, vecIdx += 1, writeCount += 1) { - if (xWrite >= 0 && xWrite < SurfaceWidth) { - *((Ty2 *)(buffBase + yWrite + xWrite)) = msgSrc1[vecIdx]; - } - } // xWrite loop - rowCount += 1; - } // yWrite loop + throw cl::sycl::feature_not_supported(); } /// \brief Raw send store. @@ -1789,13 +1434,6 @@ inline void __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0) { - auto op = __SEIEED::raw_send::getMsgOp(msgDesc); - - if (op == __SEIEED::raw_send::msgOp::LOAD_2D) { - // Prefetch? - return; - } - throw cl::sycl::feature_not_supported(); } From 6a41df5f5df2532b0d6a6f013a305fb7b1319b9f Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Fri, 17 Sep 2021 09:14:09 -0700 Subject: [PATCH 22/40] Typo fix - recovering a line removed by mistake --- .../sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 248b8147a8c4f..7d6d58807a6be 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -1370,6 +1370,7 @@ __esimd_raw_send_load(uint8_t modifier, uint8_t execSize, uint8_t numSrc0, uint8_t numDst, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0, + __SEIEED::vector_type_t msgDst) { throw cl::sycl::feature_not_supported(); return 0; } From f183ec2b49ab45186b0e9cbb547656f9e27c43dd Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Wed, 22 Sep 2021 12:34:28 -0700 Subject: [PATCH 23/40] New cpp file to contain getESIMDDeviceInterface - To prevent failure from odr.cpp - New ABI entry is added : getESIMDDeviceInterface --- .../emu/detail/esimdcpu_device_interface.hpp | 49 +------------ sycl/source/CMakeLists.txt | 4 ++ sycl/source/esimdcpu_device_interface.cpp | 69 +++++++++++++++++++ sycl/test/abi/sycl_symbols_linux.dump | 1 + 4 files changed, 75 insertions(+), 48 deletions(-) create mode 100644 sycl/source/esimdcpu_device_interface.cpp diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/emu/detail/esimdcpu_device_interface.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/emu/detail/esimdcpu_device_interface.hpp index ca24b20b38019..3e8716a505ae1 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/emu/detail/esimdcpu_device_interface.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/emu/detail/esimdcpu_device_interface.hpp @@ -65,55 +65,8 @@ struct ESIMDEmuPluginOpaqueData { uintptr_t version; void *data; }; -// The table below shows the correspondence between the \c version -// and the contents of the \c data field: -// version == 0, data is ESIMDDeviceInterface* -ESIMDDeviceInterface *getESIMDDeviceInterface() { - // TODO (performance) cache the interface pointer, can make a difference - // when calling fine-grained libCM APIs through it (like memory access in a - // tight loop) - void *PIOpaqueData = nullptr; - - PIOpaqueData = getPluginOpaqueData(nullptr); - - ESIMDEmuPluginOpaqueData *OpaqueData = - reinterpret_cast(PIOpaqueData); - - // First check if opaque data version is compatible. - if (OpaqueData->version != ESIMD_EMU_PLUGIN_OPAQUE_DATA_VERSION) { - // NOTE: the version check should always be '!=' as layouts of different - // versions of PluginOpaqueData is not backward compatible, unlike - // layout of the ESIMDDeviceInterface. - - std::cerr << __FUNCTION__ << std::endl - << "Opaque data returned by ESIMD Emu plugin is incompatible with" - << "the one used in current implementation." << std::endl - << "Returned version : " << OpaqueData->version << std::endl - << "Required version : " << ESIMD_EMU_PLUGIN_OPAQUE_DATA_VERSION - << std::endl; - throw cl::sycl::feature_not_supported(); - } - // Opaque data version is OK, can cast the 'data' field. - ESIMDDeviceInterface *Interface = - reinterpret_cast(OpaqueData->data); - - // Now check that device interface version is compatible. - if (Interface->version < ESIMD_DEVICE_INTERFACE_VERSION) { - std::cerr << __FUNCTION__ << std::endl - << "The device interface version provided from plug-in " - << "library is behind required device interface version" - << std::endl - << "Found version : " << Interface->version << std::endl - << "Required version :" << ESIMD_DEVICE_INTERFACE_VERSION - << std::endl; - throw cl::sycl::feature_not_supported(); - } - return Interface; -} - -#undef ESIMD_DEVICE_INTERFACE_VERSION -#undef ESIMD_EMU_PLUGIN_OPAQUE_DATA_VERSION +__SYCL_EXPORT ESIMDDeviceInterface *getESIMDDeviceInterface(); } // namespace detail } // namespace sycl diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index f8744de9e17a3..3f319b103aab1 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -180,6 +180,10 @@ set(SYCL_SOURCES "$<$:abi_replacements_windows.cpp>" ) +if (SYCL_BUILD_PI_ESIMD_CPU) + list(APPEND SYCL_SOURCES "esimdcpu_device_interface.cpp") +endif() + if (MSVC) # MSVC provides two incompatible build variants for its CRT: release and debug # To avoid potential issues in user code we also need to provide two kinds diff --git a/sycl/source/esimdcpu_device_interface.cpp b/sycl/source/esimdcpu_device_interface.cpp new file mode 100644 index 0000000000000..be28ede6c2f3f --- /dev/null +++ b/sycl/source/esimdcpu_device_interface.cpp @@ -0,0 +1,69 @@ +//==------------------- esimdcpu_device_interface.cpp ----------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +/// \file esimdcpu_device_interface.cpp +/// Definitions for ESIMD_CPU-device specific definitions. +/// +/// This interface is for ESIMD intrinsic emulation implementations +/// such as slm_access to access ESIMD_CPU specific-support therefore +/// it has to be defined and shared as include directory +/// +/// \ingroup sycl_pi_esimd_cpu + +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +__SYCL_EXPORT ESIMDDeviceInterface *getESIMDDeviceInterface() { + // TODO (performance) cache the interface pointer, can make a difference + // when calling fine-grained libCM APIs through it (like memory access in a + // tight loop) + void *PIOpaqueData = nullptr; + + PIOpaqueData = getPluginOpaqueData(nullptr); + + ESIMDEmuPluginOpaqueData *OpaqueData = + reinterpret_cast(PIOpaqueData); + + // First check if opaque data version is compatible. + if (OpaqueData->version != ESIMD_EMU_PLUGIN_OPAQUE_DATA_VERSION) { + // NOTE: the version check should always be '!=' as layouts of different + // versions of PluginOpaqueData is not backward compatible, unlike + // layout of the ESIMDDeviceInterface. + + std::cerr << __FUNCTION__ << std::endl + << "Opaque data returned by ESIMD Emu plugin is incompatible with" + << "the one used in current implementation." << std::endl + << "Returned version : " << OpaqueData->version << std::endl + << "Required version : " << ESIMD_EMU_PLUGIN_OPAQUE_DATA_VERSION + << std::endl; + throw feature_not_supported(); + } + // Opaque data version is OK, can cast the 'data' field. + ESIMDDeviceInterface *Interface = + reinterpret_cast(OpaqueData->data); + + // Now check that device interface version is compatible. + if (Interface->version < ESIMD_DEVICE_INTERFACE_VERSION) { + std::cerr << __FUNCTION__ << std::endl + << "The device interface version provided from plug-in " + << "library is behind required device interface version" + << std::endl + << "Found version : " << Interface->version << std::endl + << "Required version :" << ESIMD_DEVICE_INTERFACE_VERSION + << std::endl; + throw feature_not_supported(); + } + return Interface; +} +} // namespace detail +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 7bbd5f580fa52..666ce0324c814 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3850,6 +3850,7 @@ _ZN2cl4sycl6detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6devic _ZN2cl4sycl6detail22get_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE _ZN2cl4sycl6detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EENS0_12bundle_stateE _ZN2cl4sycl6detail22has_kernel_bundle_implERKNS0_7contextERKSt6vectorINS0_6deviceESaIS6_EERKS5_INS0_9kernel_idESaISB_EENS0_12bundle_stateE +_ZN2cl4sycl6detail23getESIMDDeviceInterfaceEv _ZN2cl4sycl6detail24find_device_intersectionERKSt6vectorINS0_13kernel_bundleILNS0_12bundle_stateE1EEESaIS5_EE _ZN2cl4sycl6detail27getPixelCoordLinearFiltModeENS0_3vecIfLi4EEENS0_15addressing_modeENS0_5rangeILi3EEERS3_ _ZN2cl4sycl6detail28getDeviceFunctionPointerImplERNS0_6deviceERNS0_7programEPKc From 4543b8f6407114bee8f2c3afdb1dc63be25e27c9 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Wed, 22 Sep 2021 14:29:37 -0700 Subject: [PATCH 24/40] Reordering backend initializations --- sycl/source/detail/config.cpp | 2 +- sycl/source/detail/pi.cpp | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/config.cpp b/sycl/source/detail/config.cpp index 3f2949dbea917..0d80911c064ea 100644 --- a/sycl/source/detail/config.cpp +++ b/sycl/source/detail/config.cpp @@ -175,8 +175,8 @@ const std::array, 7> &getSyclBeMap() { {"opencl", backend::opencl}, {"level_zero", backend::level_zero}, {"cuda", backend::cuda}, - {"hip", backend::hip}, {"esimd_cpu", backend::esimd_cpu}, + {"hip", backend::hip}, {"*", backend::all}}}; return SyclBeMap; } diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index d026be3a6eb59..9cb0b03860686 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -326,15 +326,15 @@ std::vector> findPlugins() { PluginNames.emplace_back(CUDAPluginName, backend::cuda); CudaFound = true; } - if (!HIPFound && (Backend == backend::hip || Backend == backend::all)) { - PluginNames.emplace_back(HIPPluginName, backend::hip); - HIPFound = true; - } if (!EsimdCpuFound && (Backend == backend::esimd_cpu || Backend == backend::all)) { PluginNames.emplace_back(ESIMDCPUPluginName, backend::esimd_cpu); EsimdCpuFound = true; } + if (!HIPFound && (Backend == backend::hip || Backend == backend::all)) { + PluginNames.emplace_back(HIPPluginName, backend::hip); + HIPFound = true; + } } } return PluginNames; @@ -438,6 +438,11 @@ static void initializePlugins(std::vector *Plugins) { // Use the CUDA plugin as the GlobalPlugin GlobalPlugin = std::make_shared(PluginInformation, backend::cuda, Library); + } else if (InteropBE == backend::esimd_cpu && + PluginNames[I].first.find("esimd_cpu") != std::string::npos) { + // Use the ESIMD_CPU plugin as the GlobalPlugin + GlobalPlugin = std::make_shared(PluginInformation, + backend::esimd_cpu, Library); } else if (InteropBE == backend::hip && PluginNames[I].first.find("hip") != std::string::npos) { // Use the HIP plugin as the GlobalPlugin @@ -448,11 +453,6 @@ static void initializePlugins(std::vector *Plugins) { // Use the LEVEL_ZERO plugin as the GlobalPlugin GlobalPlugin = std::make_shared(PluginInformation, backend::level_zero, Library); - } else if (InteropBE == backend::esimd_cpu && - PluginNames[I].first.find("esimd_cpu") != std::string::npos) { - // Use the ESIMD_CPU plugin as the GlobalPlugin - GlobalPlugin = std::make_shared(PluginInformation, - backend::esimd_cpu, Library); } Plugins->emplace_back( plugin(PluginInformation, PluginNames[I].second, Library)); From 21f11fec480b3567efcf6e19e86ffe1643da76d7 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Wed, 22 Sep 2021 15:37:06 -0700 Subject: [PATCH 25/40] Failure fixes - ParseAllowLists - Removing conditional statement for including 'esimdcpu_device_interface.cpp' --- sycl/source/CMakeLists.txt | 5 +---- sycl/unittests/allowlist/ParseAllowList.cpp | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 3f319b103aab1..4950fa7a77d1c 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -175,15 +175,12 @@ set(SYCL_SOURCES "sampler.cpp" "stream.cpp" "spirv_ops.cpp" + "esimdcpu_device_interface.cpp" "$<$:detail/windows_pi.cpp>" "$<$,$>:detail/posix_pi.cpp>" "$<$:abi_replacements_windows.cpp>" ) -if (SYCL_BUILD_PI_ESIMD_CPU) - list(APPEND SYCL_SOURCES "esimdcpu_device_interface.cpp") -endif() - if (MSVC) # MSVC provides two incompatible build variants for its CRT: release and debug # To avoid potential issues in user code we also need to provide two kinds diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index 34e5658a8c21a..cc70ecd54f718 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -167,7 +167,7 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { sycl::detail::AllowListParsedT ExpectedValue{ {{"BackendName", "host"}}, {{"BackendName", "opencl"}}, {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, - {{"BackendName", "hip"}}, {{"BackendName", "esimd_cpu"}}, + {{"BackendName", "esimd_cpu"}}, {{"BackendName", "hip"}}, {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); } From 30d738fd376fc52679f59db7b70e0105503654d9 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Fri, 24 Sep 2021 11:26:18 -0700 Subject: [PATCH 26/40] Windows ABI test failure fix --- sycl/test/abi/sycl_symbols_windows.dump | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 9b347ceb7b47a..ddde72ca4664d 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -1833,6 +1833,7 @@ ?fill@MemoryManager@detail@sycl@cl@@SAXPEAVSYCLMemObjI@234@PEAXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KPEBDIV?$range@$02@34@5V?$id@$02@34@IV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@7@AEAPEAU_pi_event@@@Z ?fill_usm@MemoryManager@detail@sycl@cl@@SAXPEAXV?$shared_ptr@Vqueue_impl@detail@sycl@cl@@@std@@_KHV?$vector@PEAU_pi_event@@V?$allocator@PEAU_pi_event@@@std@@@6@AEAPEAU_pi_event@@@Z ?finalize@handler@sycl@cl@@AEAA?AVevent@23@XZ +?getESIMDDeviceInterface@detail@sycl@cl@@YAPEAUESIMDDeviceInterface@123@XZ ?find_device_intersection@detail@sycl@cl@@YA?AV?$vector@Vdevice@sycl@cl@@V?$allocator@Vdevice@sycl@cl@@@std@@@std@@AEBV?$vector@V?$kernel_bundle@$00@sycl@cl@@V?$allocator@V?$kernel_bundle@$00@sycl@cl@@@std@@@5@@Z ?floor@__host_std@cl@@YA?AV?$vec@M$00@sycl@2@V342@@Z ?floor@__host_std@cl@@YA?AV?$vec@M$01@sycl@2@V342@@Z From ceb309dd7d8fabd5971649f8de2780952e5c5b61 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Wed, 29 Sep 2021 11:25:31 -0700 Subject: [PATCH 27/40] single_task debugging --- sycl/include/CL/sycl/handler.hpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 2476f24d85414..cd7483b5ef8ba 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -561,8 +561,10 @@ class __SYCL_EXPORT handler { */ // For 'void' kernel argument - template - KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { + template + typename std::enable_if::type + ResetHostKernelHelper(const KernelType &KernelFunc) { NormalizedKernelType NormalizedKernel(KernelFunc); auto NormalizedKernelFunc = std::function(NormalizedKernel); auto HostKernelPtr = @@ -576,7 +578,8 @@ class __SYCL_EXPORT handler { // For non-'void' kernel argument - id, item w/wo offset, nd_item template - KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { + typename std::enable_if::type + ResetHostKernelHelper(const KernelType &KernelFunc) { NormalizedKernelType NormalizedKernel(KernelFunc); auto NormalizedKernelFunc = std::function &)>(NormalizedKernel); @@ -593,14 +596,13 @@ class __SYCL_EXPORT handler { template typename std::enable_if::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { - static_assert(Dims == 0, "Dimension of 'void' argument must be zero"); struct NormalizedKernelType { KernelType MKernelFunc; NormalizedKernelType(const KernelType &KernelFunc) : MKernelFunc(KernelFunc) {} void operator()(void) { detail::runKernelWithoutArg(MKernelFunc); } }; - return ResetHostKernelHelper(KernelFunc); } @@ -1425,7 +1427,7 @@ class __SYCL_EXPORT handler { // known constant. MNDRDesc.set(range<1>{1}); - StoreLambda(KernelFunc); + StoreLambda(std::move(KernelFunc)); setType(detail::CG::Kernel); #endif } From b073a4e1e474880b89cd08e09dfe46928511b369 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 30 Sep 2021 23:09:16 -0700 Subject: [PATCH 28/40] Fixing void(void) type kernel failure - mad_sat.cpp, etc - NormalizedKernelType-struct is removed for void(void) type kernel - Previous reset-based initialization is applied --- sycl/include/CL/sycl/handler.hpp | 29 ++++------------------------- 1 file changed, 4 insertions(+), 25 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index cd7483b5ef8ba..574618bbb9302 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -560,26 +560,10 @@ class __SYCL_EXPORT handler { * type is unknown to the plugin. */ - // For 'void' kernel argument - template - typename std::enable_if::type - ResetHostKernelHelper(const KernelType &KernelFunc) { - NormalizedKernelType NormalizedKernel(KernelFunc); - auto NormalizedKernelFunc = std::function(NormalizedKernel); - auto HostKernelPtr = - new detail::HostKernel(NormalizedKernelFunc); - MHostKernel.reset(HostKernelPtr); - return &HostKernelPtr->MKernel.template target() - ->MKernelFunc; - } - // For non-'void' kernel argument - id, item w/wo offset, nd_item template - typename std::enable_if::type - ResetHostKernelHelper(const KernelType &KernelFunc) { + KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { NormalizedKernelType NormalizedKernel(KernelFunc); auto NormalizedKernelFunc = std::function &)>(NormalizedKernel); @@ -596,14 +580,9 @@ class __SYCL_EXPORT handler { template typename std::enable_if::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { - struct NormalizedKernelType { - KernelType MKernelFunc; - NormalizedKernelType(const KernelType &KernelFunc) - : MKernelFunc(KernelFunc) {} - void operator()(void) { detail::runKernelWithoutArg(MKernelFunc); } - }; - return ResetHostKernelHelper(KernelFunc); + MHostKernel.reset( + new detail::HostKernel(KernelFunc)); + return (KernelType *)(MHostKernel->getPtr()); } // For 'sycl::id' kernel argument From d4846bfaed65779b5704f1b16277c895efaafa80 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 30 Sep 2021 23:36:42 -0700 Subject: [PATCH 29/40] Merging fixes --- sycl/source/detail/config.def | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/config.def b/sycl/source/detail/config.def index 9344f1a083c1f..b23b6f57bd1d1 100644 --- a/sycl/source/detail/config.def +++ b/sycl/source/detail/config.def @@ -35,4 +35,3 @@ CONFIG(SYCL_CACHE_MIN_DEVICE_IMAGE_SIZE, 16, __SYCL_CACHE_MIN_DEVICE_IMAGE_SIZE) CONFIG(SYCL_CACHE_MAX_DEVICE_IMAGE_SIZE, 16, __SYCL_CACHE_MAX_DEVICE_IMAGE_SIZE) CONFIG(INTEL_ENABLE_OFFLOAD_ANNOTATIONS, 1, __SYCL_INTEL_ENABLE_OFFLOAD_ANNOTATIONS) CONFIG(SYCL_ENABLE_DEFAULT_CONTEXTS, 1, __SYCL_ENABLE_DEFAULT_CONTEXTS) -CONFIG(SYCL_OVERRIDE_PI_ESIMD_CPU, 1024, __SYCL_OVERRIDE_PI_ESIMD_CPU) From 7e65a1bb81b87deff3a9d571f4b9ab17f22da13a Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Fri, 1 Oct 2021 09:56:46 -0700 Subject: [PATCH 30/40] clang-format fix / void(sycl::group) type fix for host device --- sycl/include/CL/sycl/handler.hpp | 35 ++++++++++++-------------------- sycl/source/detail/pi.cpp | 3 ++- 2 files changed, 15 insertions(+), 23 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 6c162078682ef..50048623b1f78 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -561,7 +561,7 @@ class __SYCL_EXPORT handler { * type is unknown to the plugin. */ - // For non-'void' kernel argument - id, item w/wo offset, nd_item + // For 'id, item w/wo offset, nd_item' kernel arguments template KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) { @@ -577,15 +577,6 @@ class __SYCL_EXPORT handler { ->MKernelFunc; } - // For 'void' kernel argument - template - typename std::enable_if::value, KernelType *>::type - ResetHostKernel(const KernelType &KernelFunc) { - MHostKernel.reset( - new detail::HostKernel(KernelFunc)); - return (KernelType *)(MHostKernel->getPtr()); - } - // For 'sycl::id' kernel argument template typename std::enable_if>::value, @@ -658,21 +649,21 @@ class __SYCL_EXPORT handler { KernelName>(KernelFunc); } - // For 'sycl::group' kernel argument + /* 'wrapper'-based approach using 'NormalizedKernelType' struct is + * not applied for 'void(void)' type kernel and + * 'void(sycl::group)'. This is because 'void(void)' type does + * not have argument to normalize and 'void(sycl::group)' is + * not supported in ESIMD. + */ + // For 'void' and 'sycl::group' kernel argument template - typename std::enable_if>::value, + typename std::enable_if::value || + std::is_same>::value, KernelType *>::type ResetHostKernel(const KernelType &KernelFunc) { - struct NormalizedKernelType { - KernelType MKernelFunc; - NormalizedKernelType(const KernelType &KernelFunc) - : MKernelFunc(KernelFunc) {} - void operator()(const nd_item &Arg) { - detail::runKernelWithArg(MKernelFunc, Arg.get_group()); - } - }; - return ResetHostKernelHelper(KernelFunc); + MHostKernel.reset( + new detail::HostKernel(KernelFunc)); + return (KernelType *)(MHostKernel->getPtr()); } /// Stores lambda to the template-free object diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 416590861d237..ba025c5a08e97 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -312,7 +312,8 @@ std::vector> findPlugins() { } if (!EsimdCpuFound && (Backend == backend::esimd_cpu || Backend == backend::all)) { - PluginNames.emplace_back(__SYCL_ESIMD_CPU_PLUGIN_NAME, backend::esimd_cpu); + PluginNames.emplace_back(__SYCL_ESIMD_CPU_PLUGIN_NAME, + backend::esimd_cpu); EsimdCpuFound = true; } if (!HIPFound && (Backend == backend::hip || Backend == backend::all)) { From 41490ea492e421d2ad37a5865f4ed605aa5129b0 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Tue, 5 Oct 2021 09:51:06 -0700 Subject: [PATCH 31/40] clang-format error fix --- sycl/include/CL/sycl/handler.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index f3e93e1e1db59..a9bec296171b6 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -675,7 +675,6 @@ class __SYCL_EXPORT handler { /// kernel bundle contains. void verifyUsedKernelBundle(const std::string &KernelName); - /// Stores lambda to the template-free object /// /// Also initializes kernel name, list of arguments and requirements using From 9b57175ba817ad90f4b15f4004f5b4692d4e80a9 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 11 Oct 2021 12:02:51 -0700 Subject: [PATCH 32/40] Reverting changes in memory intrinsic implementations - For seemless integration with intel/llvm-test-suite - Reverted changes are to be back along with ESIMD_EMU macro in llvm-test-suite --- .../esimd/detail/memory_intrin.hpp | 543 ++++-------------- .../ext/intel/experimental/esimd/memory.hpp | 2 +- 2 files changed, 103 insertions(+), 442 deletions(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp index 7a9a6839df937..1751c32d37084 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/detail/memory_intrin.hpp @@ -19,27 +19,6 @@ #include -#ifndef __SYCL_DEVICE_ONLY__ -/// ESIMD_CPU Emulation support using esimd_cpu plugin - -/// Definition macro to be referenced in CM header files for -/// preventing build failure caused by symbol conflicts between llvm -/// and CM - e.g. vector. -#define __SYCL_EXPLICIT_SIMD_PLUGIN__ - -// Header files required for accessing CM-managed resources - image, -// buffer, runtime API etc. -namespace cm_support { -#include -} // namespace cm_support - -#include -#include -#include -#include - -#endif // ifndef __SYCL_DEVICE_ONLY__ - __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace ext { @@ -55,7 +34,7 @@ class AccessorPrivateProxy { static auto getNativeImageObj(const AccessorTy &Acc) { return Acc.getNativeImageObj(); } -#else // __SYCL_DEVICE_ONLY__ +#else template static auto getImageRange(const AccessorTy &Acc) { return Acc.getAccessRange(); @@ -63,10 +42,7 @@ class AccessorPrivateProxy { static auto getElemSize(const sycl::detail::AccessorBaseHost &Acc) { return Acc.getElemSize(); } - static void *getPtr(const sycl::detail::AccessorBaseHost &Acc) { - return Acc.getPtr(); - } -#endif // __SYCL_DEVICE_ONLY__ +#endif }; template __esimd_surf_read(int16_t scale, SurfIndAliasTy surf_ind, uint32_t global_offset, - __SEIEED::vector_type_t elem_offsets); + __SEIEED::vector_type_t elem_offsets) +#ifdef __SYCL_DEVICE_ONLY__ + ; +#else +{ + static_assert(N == 1 || N == 8 || N == 16); + static_assert(TySizeLog2 <= 2); + static_assert(std::is_integral::value || TySizeLog2 == 2); + throw cl::sycl::feature_not_supported(); +} +#endif // __SYCL_DEVICE_ONLY__ // Low-level surface-based scatter. Writes elements of a \ref simd object into a // surface at given offsets. Element can be a 1, 2 or 4-byte value, but it is @@ -219,7 +205,17 @@ SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void __esimd_surf_write(__SEIEED::simd_mask_storage_t pred, int16_t scale, SurfIndAliasTy surf_ind, uint32_t global_offset, __SEIEED::vector_type_t elem_offsets, - __SEIEED::vector_type_t vals); + __SEIEED::vector_type_t vals) +#ifdef __SYCL_DEVICE_ONLY__ + ; +#else +{ + static_assert(N == 1 || N == 8 || N == 16); + static_assert(TySizeLog2 <= 2); + static_assert(std::is_integral::value || TySizeLog2 == 2); + throw cl::sycl::feature_not_supported(); +} +#endif // __SYCL_DEVICE_ONLY__ // TODO bring the parameter order of __esimd* intrinsics in accordance with the // correponsing BE intrinsicics parameter order. @@ -517,11 +513,8 @@ __esimd_raw_send_store(uint8_t modifier, uint8_t execSize, __SEIEED::simd_mask_storage_t pred, uint8_t numSrc0, uint8_t sfid, uint32_t exDesc, uint32_t msgDesc, __SEIEED::vector_type_t msgSrc0); - #ifndef __SYCL_DEVICE_ONLY__ -/// ESIMD_CPU Emulation support using esimd_cpu plugin - template inline __SEIEED::vector_type_t @@ -669,31 +662,6 @@ inline void __esimd_flat_write4( } } -template -SYCL_EXTERNAL SYCL_ESIMD_FUNCTION __SEIEED::vector_type_t -__esimd_surf_read(int16_t scale, SurfIndAliasTy surf_ind, - uint32_t global_offset, - __SEIEED::vector_type_t elem_offsets) { - static_assert(N == 1 || N == 8 || N == 16); - static_assert(TySizeLog2 <= 2); - static_assert(std::is_integral::value || TySizeLog2 == 2); - throw cl::sycl::feature_not_supported(); -} - -template -SYCL_EXTERNAL SYCL_ESIMD_FUNCTION void -__esimd_surf_write(__SEIEED::vector_type_t pred, int16_t scale, - SurfIndAliasTy surf_ind, uint32_t global_offset, - __SEIEED::vector_type_t elem_offsets, - __SEIEED::vector_type_t vals) { - static_assert(N == 1 || N == 8 || N == 16); - static_assert(TySizeLog2 <= 2); - static_assert(std::is_integral::value || TySizeLog2 == 2); - throw cl::sycl::feature_not_supported(); -} - template inline __SEIEED::vector_type_t __esimd_flat_block_read_unaligned(uint64_t addr) { @@ -719,109 +687,48 @@ template inline __SEIEED::vector_type_t __esimd_media_block_load(unsigned modififer, TACC handle, unsigned plane, unsigned width, unsigned x, unsigned y) { - __SEIEED::vector_type_t vals; + // On host the input surface is modeled as sycl image 2d object, + // and the read/write access is done through accessor, + // which is passed in as the handle argument. + auto range = __SEIEED::AccessorPrivateProxy::getImageRange(handle); + unsigned bpp = __SEIEED::AccessorPrivateProxy::getElemSize(handle); + unsigned vpp = bpp / sizeof(Ty); + unsigned int i = x / bpp; + unsigned int j = y; + + assert(x % bpp == 0); + unsigned int xbound = range[0] - 1; + unsigned int ybound = range[1] - 1; - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - - char *readBase; - uint32_t bpp; - uint32_t imgWidth; - uint32_t imgHeight; - std::mutex *mutexLock; - - auto ImageHandle = __SEIEED::AccessorPrivateProxy::getPtr(handle); - - I->sycl_get_cm_image_params_ptr(ImageHandle, &readBase, &imgWidth, &imgHeight, - &bpp, &mutexLock); - - std::unique_lock lock(*mutexLock); - - int x_pos_a, y_pos_a, offset, index; - - // TODO : Remove intermediate 'in' matrix - std::vector> in(M, std::vector(N)); - int R = M; - int C = N; - for (int i = 0; i < R; i++) { - for (int j = 0; j < C; j++) { - x_pos_a = x + j * sizeof(Ty); - { y_pos_a = y + i; } - // We should check the boundary condition based on sizeof(Ty), x_pos_a is - // 0-based Note: Use a signed variable; otherwise sizeof(Ty) is unsigned - if ((x_pos_a + sizeof(Ty)) > imgWidth) { - // If we're trying to read outside the boundary, limit the value of - // x_pos_a Assumption -- We don't this situation: - // x_pos_a width's boundary - // | | - // <---type(Ty)---> - // At most x_pos_a+sizeof(Ty) is exactly at the boundary. - x_pos_a = imgWidth; - } - if (y_pos_a > imgHeight - 1) { - y_pos_a = imgHeight - 1; - } - if (y_pos_a < 0) { - y_pos_a = 0; - } - { - if (x_pos_a < 0) { - // Need to align x position to bbp - int offset = x % bpp; - x_pos_a -= offset; - } - while (x_pos_a < 0) { - // If we're trying to read outside the left boundary, increase x_pos_a - x_pos_a += bpp; - } + __SEIEED::vector_type_t vals; + for (int row = 0; row < M; row++) { + for (int col = 0; col < N; col += vpp) { + unsigned int xoff = (i > xbound) ? xbound : i; + unsigned int yoff = (j > ybound) ? ybound : j; + auto coords = cl::sycl::cl_int2(xoff, yoff); + cl::sycl::cl_uint4 data = handle.read(coords); + + __SEIEED::vector_type_t res; + for (int idx = 0; idx < 4; idx++) { + res[idx] = data[idx]; } - if (x_pos_a >= imgWidth) { - { - x_pos_a = x_pos_a - bpp; - for (uint byte_count = 0; byte_count < sizeof(Ty); byte_count++) { - if (x_pos_a >= imgWidth) { - x_pos_a = x_pos_a - bpp; - } - offset = y_pos_a * imgWidth + x_pos_a; - - /* - If destination size per element is less then or equal pixel size - of the surface move the pixel value accross the destination - elements. If destination size per element is greater then pixel - size of the surface replicate pixel value in the destination - element. - */ - if (sizeof(Ty) <= bpp) { - for (uint bpp_count = 0; j < C && bpp_count < bpp; - j++, bpp_count += sizeof(Ty)) { - in[i][j] = *((Ty *)(readBase + offset + bpp_count)); - } - j--; - break; - } else { - // ((unsigned char*)in.get_addr(i*C+j))[byte_count] = *((unsigned - // char*)((char*)buff_iter->p + offset)); - unsigned char *pTempBase = - ((unsigned char *)in[i].data()) + j * sizeof(Ty); - pTempBase[byte_count] = *((unsigned char *)(readBase + offset)); - } - - x_pos_a = x_pos_a + 1; - } - x_pos_a = imgWidth; - } - } else { - offset = y_pos_a * imgWidth + x_pos_a; - { in[i][j] = *((Ty *)(readBase + offset)); } + constexpr int refN = sizeof(cl::sycl::cl_uint4) / sizeof(Ty); + unsigned int stride = sizeof(cl::sycl::cl_uint4) / bpp; + using refTy = __SEIEED::vector_type_t; + auto ref = reinterpret_cast(res); + + unsigned int offset1 = col + row * N; + unsigned int offset2 = 0; + for (int idx = 0; idx < vpp; idx++) { + vals[offset1] = ref[offset2]; + offset1++; + offset2 += stride; } + i++; } - } - - for (auto i = 0, k = 0; i < M; i++) { - for (auto j = 0; j < N; j++) { - vals[k++] = in[i][j]; - } + i = x / bpp; + j++; } return vals; @@ -832,60 +739,45 @@ inline void __esimd_media_block_store(unsigned modififer, TACC handle, unsigned plane, unsigned width, unsigned x, unsigned y, __SEIEED::vector_type_t vals) { - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - - char *writeBase; - uint32_t bpp; - uint32_t imgWidth; - uint32_t imgHeight; - std::mutex *mutexLock; - - auto ImageHandle = __SEIEED::AccessorPrivateProxy::getPtr(handle); - - I->sycl_get_cm_image_params_ptr(ImageHandle, &writeBase, &imgWidth, - &imgHeight, &bpp, &mutexLock); - - int x_pos_a, y_pos_a, offset; - - assert((x % 4) == 0); - assert((N * sizeof(Ty)) % 4 == 0); - - // TODO : Remove intermediate 'out' matrix - std::vector> out(M, std::vector(N)); - - std::unique_lock lock(*mutexLock); + unsigned bpp = __SEIEED::AccessorPrivateProxy::getElemSize(handle); + unsigned vpp = bpp / sizeof(Ty); + auto range = __SEIEED::AccessorPrivateProxy::getImageRange(handle); + unsigned int i = x / bpp; + unsigned int j = y; + + assert(x % bpp == 0); + + for (int row = 0; row < M; row++) { + for (int col = 0; col < N; col += vpp) { + constexpr int Sz = sizeof(cl::sycl::cl_uint4) / sizeof(Ty); + __SEIEED::vector_type_t res = 0; + + unsigned int offset1 = col + row * N; + unsigned int offset2 = 0; + unsigned int stride = sizeof(cl::sycl::cl_uint4) / bpp; + for (int idx = 0; idx < vpp; idx++) { + res[offset2] = vals[offset1]; + offset1++; + offset2 += stride; + } - for (int i = 0, k = 0; i < M; i++) { - for (int j = 0; j < N; j++) { - out[i][j] = vals[k++]; - } - } + using refTy = __SEIEED::vector_type_t; + auto ref = reinterpret_cast(res); - for (int i = 0; i < M; i++) { - for (int j = 0; j < N; j++) { - x_pos_a = x + j * sizeof(Ty); - { y_pos_a = y + i; } - if ((int)x_pos_a < 0) { - continue; - } - if ((int)y_pos_a < 0) { - continue; - } - if ((int)(x_pos_a + sizeof(Ty)) > imgWidth) { - continue; + cl::sycl::cl_uint4 data; + for (int idx = 0; idx < 4; idx++) { + data[idx] = ref[idx]; } - if ((int)y_pos_a > imgHeight - 1) { - continue; + if (i < range[0] && j < range[1]) { + auto coords = cl::sycl::cl_int2(i, j); + handle.write(coords, data); } - offset = y_pos_a * imgWidth + x_pos_a; - *((Ty *)(writeBase + offset)) = out[i][j]; + i++; } + i = x / bpp; + j++; } - - /// TODO : Optimize - I->cm_fence_ptr(); } template @@ -922,27 +814,10 @@ __esimd_dp4(__SEIEED::vector_type_t v1, return retv; } -inline void __esimd_slm_init(size_t size) { - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); +/// TODO +inline void __esimd_barrier() {} - I->cm_slm_init_ptr(size); -} - -inline void __esimd_barrier() { - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - - I->cm_barrier_ptr(); -} - -inline void __esimd_sbarrier( - sycl::ext::intel::experimental::esimd::EsimdSbarrierType flag) { - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - - I->cm_sbarrier_ptr((uint32_t)flag); -} +inline void __esimd_sbarrier(__SEIEE::split_barrier_action flag) {} inline void __esimd_slm_fence(uint8_t cntl) {} @@ -951,17 +826,6 @@ inline __SEIEED::vector_type_t __esimd_slm_read(__SEIEED::vector_type_t addrs, __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - - char *SlmBase = I->__cm_emu_get_slm_ptr(); - for (int i = 0; i < N; ++i) { - if (pred[i]) { - Ty *addr = reinterpret_cast(addrs[i] + SlmBase); - retv[i] = *addr; - } - } - return retv; } @@ -969,49 +833,19 @@ __esimd_slm_read(__SEIEED::vector_type_t addrs, template inline void __esimd_slm_write(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) { - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - - char *SlmBase = I->__cm_emu_get_slm_ptr(); - for (int i = 0; i < N; ++i) { - if (pred[i]) { - Ty *addr = reinterpret_cast(addrs[i] + SlmBase); - *addr = vals[i]; - } - } -} + __SEIEED::simd_mask_storage_t pred) {} // slm_block_read reads a block of data from SLM template inline __SEIEED::vector_type_t __esimd_slm_block_read(uint32_t addr) { __SEIEED::vector_type_t retv; - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - char *SlmBase = I->__cm_emu_get_slm_ptr(); - addr <<= 4; - for (int i = 0; i < N; ++i) { - Ty *SlmAddr = reinterpret_cast(addr + SlmBase); - retv[i] = *SlmAddr; - addr += sizeof(Ty); - } return retv; } // slm_block_write writes a block of data to SLM template inline void __esimd_slm_block_write(uint32_t addr, - __SEIEED::vector_type_t vals) { - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - char *SlmBase = I->__cm_emu_get_slm_ptr(); - addr <<= 4; - for (int i = 0; i < N; ++i) { - Ty *SlmAddr = reinterpret_cast(addr + SlmBase); - *SlmAddr = vals[i]; - addr += sizeof(Ty); - } -} + __SEIEED::vector_type_t vals) {} // slm_read4 does SLM gather4 template @@ -1019,52 +853,6 @@ inline __SEIEED::vector_type_t __esimd_slm_read4(__SEIEED::vector_type_t addrs, __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - char *ReadBase = I->__cm_emu_get_slm_ptr(); - - unsigned int Next = 0; - if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::R)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *addr = reinterpret_cast(addrs[I] + ReadBase); - retv[Next] = *addr; - } - } - } - - ReadBase += sizeof(Ty); - - if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::G)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *addr = reinterpret_cast(addrs[I] + ReadBase); - retv[Next] = *addr; - } - } - } - - ReadBase += sizeof(Ty); - - if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::B)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *addr = reinterpret_cast(addrs[I] + ReadBase); - retv[Next] = *addr; - } - } - } - - ReadBase += sizeof(Ty); - - if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::A)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *addr = reinterpret_cast(addrs[I] + ReadBase); - retv[Next] = *addr; - } - } - } return retv; } @@ -1073,55 +861,7 @@ template inline void __esimd_slm_write4( __SEIEED::vector_type_t addrs, __SEIEED::vector_type_t vals, - __SEIEED::vector_type_t pred) { - - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - char *WriteBase = I->__cm_emu_get_slm_ptr(); - - unsigned int Next = 0; - if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::R)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *addr = reinterpret_cast(addrs[I] + WriteBase); - *addr = vals[Next]; - } - } - } - - WriteBase += sizeof(Ty); - - if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::G)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *addr = reinterpret_cast(addrs[I] + WriteBase); - *addr = vals[Next]; - } - } - } - - WriteBase += sizeof(Ty); - - if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::B)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *addr = reinterpret_cast(addrs[I] + WriteBase); - *addr = vals[Next]; - } - } - } - - WriteBase += sizeof(Ty); - - if (__SEIEE::is_channel_enabled(Mask, __SEIEE::rgba_channel::A)) { - for (int I = 0; I < N; I++, Next++) { - if (pred[I]) { - Ty *addr = reinterpret_cast(addrs[I] + WriteBase); - *addr = vals[Next]; - } - } - } -} + __SEIEED::simd_mask_storage_t pred) {} // slm_atomic: SLM atomic template <__SEIEE::atomic_op Op, typename Ty, int N> @@ -1129,23 +869,6 @@ inline __SEIEED::vector_type_t __esimd_slm_atomic0(__SEIEED::vector_type_t addrs, __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - char *WriteBase = I->__cm_emu_get_slm_ptr(); - - for (int i = 0; i < N; i++) { - if (pred[i]) { - Ty *p = reinterpret_cast(addrs[i] + WriteBase); - - switch (Op) { - case __SEIEE::atomic_op::inc: - retv[i] = atomic_add_fetch(p, 1); - break; - default: - throw cl::sycl::feature_not_supported(); - } - } - } return retv; } @@ -1184,21 +907,6 @@ __esimd_flat_atomic1(__SEIEED::vector_type_t addrs, __SEIEED::vector_type_t src0, __SEIEED::simd_mask_storage_t pred) { __SEIEED::vector_type_t retv; - - for (int i = 0; i < N; i++) { - if (pred[i]) { - Ty *p = reinterpret_cast(addrs[i]); - - switch (Op) { - case __SEIEE::atomic_op::add: - retv[i] = atomic_add_fetch(p, src0[i]); - break; - default: - throw cl::sycl::feature_not_supported(); - } - } - } - return retv; } @@ -1216,62 +924,15 @@ __esimd_flat_atomic2(__SEIEED::vector_type_t addrs, template inline __SEIEED::vector_type_t __esimd_block_read(SurfIndAliasTy surf_ind, uint32_t offset) { - __SEIEED::vector_type_t retv; - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - - char *readBase; - uint32_t width; - std::mutex *mutexLock; - - auto BufferHandle = __SEIEED::AccessorPrivateProxy::getPtr(surf_ind); - - I->sycl_get_cm_buffer_params_ptr(BufferHandle, &readBase, &width, &mutexLock); - - std::unique_lock lock(*mutexLock); - - for (int idx = 0; idx < N; idx++) { - if (offset >= width) { - retv[idx] = 0; - } else { - retv[idx] = *((Ty *)(readBase + offset)); - } - offset += (uint32_t)sizeof(Ty); - } - - return retv; + throw cl::sycl::feature_not_supported(); + return __SEIEED::vector_type_t(); } template inline void __esimd_block_write(SurfIndAliasTy surf_ind, uint32_t offset, __SEIEED::vector_type_t vals) { - sycl::detail::ESIMDDeviceInterface *I = - sycl::detail::getESIMDDeviceInterface(); - - char *writeBase; - uint32_t width; - std::mutex *mutexLock; - - auto BufferHandle = __SEIEED::AccessorPrivateProxy::getPtr(surf_ind); - - I->sycl_get_cm_buffer_params_ptr(BufferHandle, &writeBase, &width, - &mutexLock); - - std::unique_lock lock(*mutexLock); - offset <<= 4; - - for (int idx = 0; idx < N; idx++) { - if (offset < width) { - *((Ty *)(writeBase + offset)) = vals[idx]; - } else { - break; - } - offset += (uint32_t)sizeof(Ty); - } - - /// TODO : Optimize - I->cm_fence_ptr(); + throw cl::sycl::feature_not_supported(); } /// \brief esimd_get_value diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index 386c93e2684a0..c068a54023025 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -898,7 +898,7 @@ media_block_store(AccessorTy acc, unsigned x, unsigned y, simd vals) { #ifndef __SYCL_DEVICE_ONLY__ -inline void slm_init(uint32_t size) { __esimd_slm_init(size); } +inline void slm_init(uint32_t size) { } #endif From c7fad03900aa5a039f3bfcad6668f1050d5423ff Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 11 Oct 2021 12:05:28 -0700 Subject: [PATCH 33/40] Missing revert from previous reverting --- sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp index c068a54023025..8f3cb71d1429e 100644 --- a/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp +++ b/sycl/include/sycl/ext/intel/experimental/esimd/memory.hpp @@ -898,7 +898,7 @@ media_block_store(AccessorTy acc, unsigned x, unsigned y, simd vals) { #ifndef __SYCL_DEVICE_ONLY__ -inline void slm_init(uint32_t size) { } +inline void slm_init(uint32_t size) {} #endif From 06f132c050f5bd396bf20ed91691dddc0f010788 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Fri, 15 Oct 2021 11:37:34 -0700 Subject: [PATCH 34/40] Build error fix from esimd_cpu/emulator renaming --- sycl/CMakeLists.txt | 6 ++--- sycl/source/CMakeLists.txt | 2 +- sycl/source/detail/pi.cpp | 22 ++++++++++--------- ...pp => esimd_emulator_device_interface.cpp} | 14 ++++++------ 4 files changed, 23 insertions(+), 21 deletions(-) rename sycl/source/{esimdcpu_device_interface.cpp => esimd_emulator_device_interface.cpp} (83%) diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 065ed5c37920e..69ad4c3b1021d 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -309,8 +309,8 @@ if(SYCL_BUILD_PI_HIP) list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_hip) endif() -if (SYCL_BUILD_PI_ESIMD_CPU) - list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS pi_esimd_cpu libcmrt-headers) +if (SYCL_BUILD_PI_ESIMD_EMULATOR) + list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS pi_esimd_emulator libcmrt-headers) if (MSVC) list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-libs libcmrt-dlls) else() @@ -318,7 +318,7 @@ if (SYCL_BUILD_PI_ESIMD_CPU) endif() else() # TODO/FIXME : Removing empty header file (cm_rt.h) generation when - # the ESIMD_CPU support is enabled by default + # the ESIMD_EMULATOR support is enabled by default file (TOUCH ${SYCL_INCLUDE_BUILD_DIR}/sycl/CL/cm_rt.h) endif() diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index 688061feb96c9..bb53aaacb6267 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -174,7 +174,7 @@ set(SYCL_SOURCES "sampler.cpp" "stream.cpp" "spirv_ops.cpp" - "esimdcpu_device_interface.cpp" + "esimd_emulator_device_interface.cpp" "$<$:detail/windows_pi.cpp>" "$<$,$>:detail/posix_pi.cpp>" "$<$:abi_replacements_windows.cpp>" diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index fe1c908706e7a..620a5c9b44e50 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -284,7 +284,8 @@ std::vector> findPlugins() { PluginNames.emplace_back(__SYCL_LEVEL_ZERO_PLUGIN_NAME, backend::level_zero); PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::cuda); - PluginNames.emplace_back(__SYCL_ESIMD_CPU_PLUGIN_NAME, backend::esimd_cpu); + PluginNames.emplace_back(__SYCL_ESIMD_EMULATOR_PLUGIN_NAME, + backend::ext_intel_esimd_emulator); PluginNames.emplace_back(__SYCL_HIP_PLUGIN_NAME, backend::hip); } else { std::vector Filters = FilterList->get(); @@ -310,10 +311,10 @@ std::vector> findPlugins() { PluginNames.emplace_back(__SYCL_CUDA_PLUGIN_NAME, backend::cuda); CudaFound = true; } - if (!EsimdCpuFound && - (Backend == backend::esimd_cpu || Backend == backend::all)) { - PluginNames.emplace_back(__SYCL_ESIMD_CPU_PLUGIN_NAME, - backend::esimd_cpu); + if (!EsimdCpuFound && (Backend == backend::ext_intel_esimd_emulator || + Backend == backend::all)) { + PluginNames.emplace_back(__SYCL_ESIMD_EMULATOR_PLUGIN_NAME, + backend::ext_intel_esimd_emulator); EsimdCpuFound = true; } if (!HIPFound && (Backend == backend::hip || Backend == backend::all)) { @@ -423,11 +424,12 @@ static void initializePlugins(std::vector &Plugins) { // Use the CUDA plugin as the GlobalPlugin GlobalPlugin = std::make_shared(PluginInformation, backend::cuda, Library); - } else if (InteropBE == backend::esimd_cpu && - PluginNames[I].first.find("esimd_cpu") != std::string::npos) { - // Use the ESIMD_CPU plugin as the GlobalPlugin - GlobalPlugin = std::make_shared(PluginInformation, - backend::esimd_cpu, Library); + } else if (InteropBE == backend::ext_intel_esimd_emulator && + PluginNames[I].first.find("esimd_emulator") != + std::string::npos) { + // Use the ESIMD_EMULATOR plugin as the GlobalPlugin + GlobalPlugin = std::make_shared( + PluginInformation, backend::ext_intel_esimd_emulator, Library); } else if (InteropBE == backend::hip && PluginNames[I].first.find("hip") != std::string::npos) { // Use the HIP plugin as the GlobalPlugin diff --git a/sycl/source/esimdcpu_device_interface.cpp b/sycl/source/esimd_emulator_device_interface.cpp similarity index 83% rename from sycl/source/esimdcpu_device_interface.cpp rename to sycl/source/esimd_emulator_device_interface.cpp index be28ede6c2f3f..a5e343d9c00bb 100644 --- a/sycl/source/esimdcpu_device_interface.cpp +++ b/sycl/source/esimd_emulator_device_interface.cpp @@ -1,4 +1,4 @@ -//==------------------- esimdcpu_device_interface.cpp ----------------------==// +//==--------------- esimd_emulator_device_interface.cpp --------------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -7,16 +7,16 @@ //===----------------------------------------------------------------------===// /// \file esimdcpu_device_interface.cpp -/// Definitions for ESIMD_CPU-device specific definitions. +/// Definitions for ESIMD_EMULATOR-device specific definitions. /// /// This interface is for ESIMD intrinsic emulation implementations -/// such as slm_access to access ESIMD_CPU specific-support therefore +/// such as slm_access to access ESIMD_EMULATOR specific-support therefore /// it has to be defined and shared as include directory /// /// \ingroup sycl_pi_esimd_cpu #include -#include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -34,7 +34,7 @@ __SYCL_EXPORT ESIMDDeviceInterface *getESIMDDeviceInterface() { reinterpret_cast(PIOpaqueData); // First check if opaque data version is compatible. - if (OpaqueData->version != ESIMD_EMU_PLUGIN_OPAQUE_DATA_VERSION) { + if (OpaqueData->version != ESIMD_EMULATOR_PLUGIN_OPAQUE_DATA_VERSION) { // NOTE: the version check should always be '!=' as layouts of different // versions of PluginOpaqueData is not backward compatible, unlike // layout of the ESIMDDeviceInterface. @@ -43,8 +43,8 @@ __SYCL_EXPORT ESIMDDeviceInterface *getESIMDDeviceInterface() { << "Opaque data returned by ESIMD Emu plugin is incompatible with" << "the one used in current implementation." << std::endl << "Returned version : " << OpaqueData->version << std::endl - << "Required version : " << ESIMD_EMU_PLUGIN_OPAQUE_DATA_VERSION - << std::endl; + << "Required version : " + << ESIMD_EMULATOR_PLUGIN_OPAQUE_DATA_VERSION << std::endl; throw feature_not_supported(); } // Opaque data version is OK, can cast the 'data' field. From 4d96d4b67876de0f15cee3032d51500670b97420 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 18 Oct 2021 14:36:01 -0700 Subject: [PATCH 35/40] Missing changes for esimd_cpu/emulator renaming --- sycl/source/detail/config.cpp | 2 +- sycl/source/detail/config.hpp | 4 ++-- sycl/source/detail/device_filter.cpp | 2 +- sycl/source/detail/scheduler/commands.cpp | 7 ++++--- sycl/source/esimd_emulator_device_interface.cpp | 5 +++-- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/sycl/source/detail/config.cpp b/sycl/source/detail/config.cpp index 0d80911c064ea..12c75f8291879 100644 --- a/sycl/source/detail/config.cpp +++ b/sycl/source/detail/config.cpp @@ -175,7 +175,7 @@ const std::array, 7> &getSyclBeMap() { {"opencl", backend::opencl}, {"level_zero", backend::level_zero}, {"cuda", backend::cuda}, - {"esimd_cpu", backend::esimd_cpu}, + {"esimd_emulator", backend::ext_intel_esimd_emulator}, {"hip", backend::hip}, {"*", backend::all}}}; return SyclBeMap; diff --git a/sycl/source/detail/config.hpp b/sycl/source/detail/config.hpp index 355b41366dbce..4e149b930c912 100644 --- a/sycl/source/detail/config.hpp +++ b/sycl/source/detail/config.hpp @@ -139,7 +139,7 @@ template <> class SYCLConfig { {"PI_LEVEL_ZERO", backend::level_zero}, {"PI_LEVEL0", backend::level_zero}, // for backward compatibility {"PI_CUDA", backend::cuda}, - {"PI_ESIMD_CPU", backend::esimd_cpu}, + {"PI_ESIMD_EMULATOR", backend::ext_intel_esimd_emulator}, {"PI_HIP", backend::hip}}}; if (ValStr) { auto It = std::find_if( @@ -150,7 +150,7 @@ template <> class SYCLConfig { if (It == SyclBeMap.end()) pi::die("Invalid backend. " "Valid values are " - "PI_OPENCL/PI_LEVEL_ZERO/PI_CUDA/PI_ESIMD_CPU/PI_HIP"); + "PI_OPENCL/PI_LEVEL_ZERO/PI_CUDA/PI_ESIMD_EMULATOR/PI_HIP"); static backend Backend = It->second; BackendPtr = &Backend; } diff --git a/sycl/source/detail/device_filter.cpp b/sycl/source/detail/device_filter.cpp index de3ace69c9f5f..6b3746e7c57ac 100644 --- a/sycl/source/detail/device_filter.cpp +++ b/sycl/source/detail/device_filter.cpp @@ -91,7 +91,7 @@ device_filter::device_filter(const std::string &FilterString) { std::string Message = std::string("Invalid device filter: ") + FilterString + "\nPossible backend values are " - "{host,opencl,level_zero,cuda,hip,esimd_cpu*}.\n" + "{host,opencl,level_zero,cuda,hip,esimd_emulator*}.\n" "Possible device types are {host,cpu,gpu,acc,*}.\n" "Device number should be an non-negative integer.\n"; throw cl::sycl::invalid_parameter_error(Message, PI_INVALID_VALUE); diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 192686335b4a1..4f6f9f721de10 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2038,8 +2038,8 @@ cl_int ExecCGCommand::enqueueImp() { NDRDescT &NDRDesc = ExecKernel->MNDRDesc; - if (MQueue->is_host() || - (MQueue->getPlugin().getBackend() == backend::esimd_cpu)) { + if (MQueue->is_host() || (MQueue->getPlugin().getBackend() == + backend::ext_intel_esimd_emulator)) { for (ArgDesc &Arg : ExecKernel->MArgs) if (kernel_param_kind_t::kind_accessor == Arg.MType) { Requirement *Req = (Requirement *)(Arg.MPtr); @@ -2056,7 +2056,8 @@ cl_int ExecCGCommand::enqueueImp() { ExecKernel->MHostKernel->call(NDRDesc, getEvent()->getHostProfilingInfo()); } else { - assert(MQueue->getPlugin().getBackend() == backend::esimd_cpu); + assert(MQueue->getPlugin().getBackend() == + backend::ext_intel_esimd_emulator); MQueue->getPlugin().call( nullptr, reinterpret_cast(ExecKernel->MHostKernel->getPtr()), diff --git a/sycl/source/esimd_emulator_device_interface.cpp b/sycl/source/esimd_emulator_device_interface.cpp index a5e343d9c00bb..ad9f54b8b5a75 100644 --- a/sycl/source/esimd_emulator_device_interface.cpp +++ b/sycl/source/esimd_emulator_device_interface.cpp @@ -13,7 +13,7 @@ /// such as slm_access to access ESIMD_EMULATOR specific-support therefore /// it has to be defined and shared as include directory /// -/// \ingroup sycl_pi_esimd_cpu +/// \ingroup sycl_pi_esimd_emulator #include #include @@ -28,7 +28,8 @@ __SYCL_EXPORT ESIMDDeviceInterface *getESIMDDeviceInterface() { // tight loop) void *PIOpaqueData = nullptr; - PIOpaqueData = getPluginOpaqueData(nullptr); + PIOpaqueData = + getPluginOpaqueData(nullptr); ESIMDEmuPluginOpaqueData *OpaqueData = reinterpret_cast(PIOpaqueData); From e45087b05cf3099d1a864cf6190b1006fc6c69a0 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 18 Oct 2021 16:07:57 -0700 Subject: [PATCH 36/40] Another renaming change --- sycl/unittests/allowlist/ParseAllowList.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index cc70ecd54f718..1c6a5c64f8741 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -167,7 +167,7 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { sycl::detail::AllowListParsedT ExpectedValue{ {{"BackendName", "host"}}, {{"BackendName", "opencl"}}, {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, - {{"BackendName", "esimd_cpu"}}, {{"BackendName", "hip"}}, + {{"BackendName", "esimd_emulator"}}, {{"BackendName", "hip"}}, {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); } From c1e6f9c9dae72ccda7323badab8fe47afa17f3c2 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Mon, 18 Oct 2021 16:09:44 -0700 Subject: [PATCH 37/40] clang-format fix --- sycl/unittests/allowlist/ParseAllowList.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index 1c6a5c64f8741..76d21ba2678f0 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -165,9 +165,12 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { sycl::detail::AllowListParsedT ActualValue = sycl::detail::parseAllowList(AllowList); sycl::detail::AllowListParsedT ExpectedValue{ - {{"BackendName", "host"}}, {{"BackendName", "opencl"}}, - {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, - {{"BackendName", "esimd_emulator"}}, {{"BackendName", "hip"}}, + {{"BackendName", "host"}}, + {{"BackendName", "opencl"}}, + {{"BackendName", "level_zero"}}, + {{"BackendName", "cuda"}}, + {{"BackendName", "esimd_emulator"}}, + {{"BackendName", "hip"}}, {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); } From 8ee0bd822d355686feeb0f2be8629409aff47459 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Tue, 16 Nov 2021 11:11:33 -0800 Subject: [PATCH 38/40] Re-privatizing 'MKernel' for backward compatibility - Allowing access 'MKernel' from sycl::handler class - +Reverting unnecessary std:move for Kernel function --- sycl/include/CL/sycl/detail/cg_types.hpp | 5 ++++- sycl/include/CL/sycl/handler.hpp | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg_types.hpp b/sycl/include/CL/sycl/detail/cg_types.hpp index 3a782ff0ca0e2..fe5e52f0d2ab9 100644 --- a/sycl/include/CL/sycl/detail/cg_types.hpp +++ b/sycl/include/CL/sycl/detail/cg_types.hpp @@ -245,9 +245,12 @@ class HostTask { template class HostKernel : public HostKernelBase { using IDBuilder = sycl::detail::Builder; + KernelType MKernel; + // Allowing accessing MKernel from 'ResetHostKernelHelper' method of + // 'sycl::handler' + friend class sycl::handler; public: - KernelType MKernel; HostKernel(KernelType Kernel) : MKernel(Kernel) {} void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override { // adjust ND range for serial host: diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index e7dfbda4ad40c..c3ddbc1930258 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -1434,7 +1434,7 @@ class __SYCL_EXPORT handler { // known constant. MNDRDesc.set(range<1>{1}); - StoreLambda(std::move(KernelFunc)); + StoreLambda(KernelFunc); setType(detail::CG::Kernel); #endif } From b85ba9f2d10fbfcf0da45fdf507dc6b06a1adf44 Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 18 Nov 2021 12:24:36 -0800 Subject: [PATCH 39/40] ParseAllowList failure fix --- sycl/unittests/allowlist/ParseAllowList.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index 76d21ba2678f0..8688ce0a4289b 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -169,8 +169,8 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { {{"BackendName", "opencl"}}, {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, - {{"BackendName", "esimd_emulator"}}, {{"BackendName", "hip"}}, + {{"BackendName", "esimd_emulator"}}, {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); } From 7a3e96819555c9c36c0130202a2c90ce4e82736a Mon Sep 17 00:00:00 2001 From: Dongkyun Ahn Date: Thu, 18 Nov 2021 12:29:17 -0800 Subject: [PATCH 40/40] clang-format fix --- sycl/unittests/allowlist/ParseAllowList.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sycl/unittests/allowlist/ParseAllowList.cpp b/sycl/unittests/allowlist/ParseAllowList.cpp index 8688ce0a4289b..0d2a8edb91ba7 100644 --- a/sycl/unittests/allowlist/ParseAllowList.cpp +++ b/sycl/unittests/allowlist/ParseAllowList.cpp @@ -165,12 +165,9 @@ TEST(ParseAllowListTests, CheckAllValidBackendNameValuesAreProcessed) { sycl::detail::AllowListParsedT ActualValue = sycl::detail::parseAllowList(AllowList); sycl::detail::AllowListParsedT ExpectedValue{ - {{"BackendName", "host"}}, - {{"BackendName", "opencl"}}, - {{"BackendName", "level_zero"}}, - {{"BackendName", "cuda"}}, - {{"BackendName", "hip"}}, - {{"BackendName", "esimd_emulator"}}, + {{"BackendName", "host"}}, {{"BackendName", "opencl"}}, + {{"BackendName", "level_zero"}}, {{"BackendName", "cuda"}}, + {{"BackendName", "hip"}}, {{"BackendName", "esimd_emulator"}}, {{"BackendName", "*"}}}; EXPECT_EQ(ExpectedValue, ActualValue); }