intel · dongkyunahn-intel · Jun 9, 2021 · Jun 28, 2021 · Jun 29, 2021 · Jun 28, 2021
@@ -285,6 +285,15 @@ if(SYCL_BUILD_PI_ROCM)
   list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libspirv-builtins pi_rocm)
 endif()
 
+if (SYCL_BUILD_PI_ESIMD_CPU)
+  list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-headers)
+  if (MSVC)
+    list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-libs libcmrt-dlls)
+  else()
+    list(APPEND SYCL_TOOLCHAIN_DEPLOY_COMPONENTS libcmrt-sos)
+  endif()
+endif()
+
 # Use it as fake dependency in order to force another command(s) to execute.
 add_custom_command(OUTPUT __force_it
   COMMAND "${CMAKE_COMMAND}" -E echo

@@ -671,6 +671,11 @@ class image_accessor
                        AccessorBaseHost::getPtr() /*Ptr to Image*/);
 #endif
   }
+
+#ifndef __SYCL_DEVICE_ONLY__
+public:
+  void *get_pointer() const { return detail::AccessorBaseHost::getPtr(); }
+#endif // __SYCL_DEVICE_ONLY__
 };
 
 template <typename DataT, int Dimensions, access::mode AccessMode,

@@ -243,9 +243,9 @@ class HostTask {
 template <class KernelType, class KernelArgType, int Dims, typename KernelName>
 class HostKernel : public HostKernelBase {
   using IDBuilder = sycl::detail::Builder;
-  KernelType MKernel;
 
 public:
+  KernelType MKernel;
   HostKernel(KernelType Kernel) : MKernel(Kernel) {}
   void call(const NDRDescT &NDRDesc, HostProfilingInfo *HPI) override {
     // adjust ND range for serial host:

@@ -514,6 +514,98 @@ class __SYCL_EXPORT handler {
     return {z, y, x};
   }
 
+  /* The kernel passed to StoreLambda can take an id, an item or an nd_item as
+   * its argument. Since esimd plugin directly invokes the kernel (doesn’t use
+   * piKernelSetArg), the kernel argument type must be known to the plugin.
+   * However, passing kernel argument type to the plugin requires changing ABI
+   * in HostKernel class. To overcome this problem, helpers below wrap the
+   * “original” kernel with a functor that always takes an nd_item as argument.
+   * A functor is used instead of a lambda because extractArgsAndReqsFromLambda
+   * needs access to the “original” kernel and keeps references to its internal
+   * data, i.e. the kernel passed as argument cannot be local in scope. The
+   * functor itself is again encapsulated in a std::function since functor’s
+   * type is unknown to the plugin.
+   */
+
+  template <class KernelType, class NormalizedKernelType, int Dims>
+  KernelType *ResetHostKernelHelper(const KernelType &KernelFunc) {
+    NormalizedKernelType NormalizedKernel(KernelFunc);
+    auto NormalizedKernelFunc =
+        std::function<void(const sycl::nd_item<Dims> &)>(NormalizedKernel);
+    auto HostKernelPtr =
+        new detail::HostKernel<decltype(NormalizedKernelFunc),
+                               sycl::nd_item<Dims>, Dims, KernelType>(
+            NormalizedKernelFunc);
+    MHostKernel.reset(HostKernelPtr);
+    return &HostKernelPtr->MKernel.template target<NormalizedKernelType>()
+                ->MKernelFunc;
+  }
+
+  template <class KernelType, typename ArgT, int Dims>
+  typename std::enable_if<std::is_same<ArgT, sycl::id<Dims>>::value,
+                          KernelType *>::type
+  ResetHostKernel(const KernelType &KernelFunc) {
+    struct NormalizedKernelType {
+      KernelType MKernelFunc;
+      NormalizedKernelType(const KernelType &KernelFunc)
+          : MKernelFunc(KernelFunc) {}
+      void operator()(const nd_item<Dims> &Arg) {
+        MKernelFunc(Arg.get_global_id());
+      }
+    };
+    return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
+        KernelFunc);
+  }
+  template <class KernelType, typename ArgT, int Dims>
+  typename std::enable_if<std::is_same<ArgT, sycl::nd_item<Dims>>::value,
+                          KernelType *>::type
+  ResetHostKernel(const KernelType &KernelFunc) {
+    struct NormalizedKernelType {
+      KernelType MKernelFunc;
+      NormalizedKernelType(const KernelType &KernelFunc)
+          : MKernelFunc(KernelFunc) {}
+      void operator()(const nd_item<Dims> &Arg) { MKernelFunc(Arg); }
+    };
+    return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
+        KernelFunc);
+  }
+
+  template <class KernelType, typename ArgT, int Dims>
+  typename std::enable_if<std::is_same<ArgT, sycl::item<Dims, false>>::value,
+                          KernelType *>::type
+  ResetHostKernel(const KernelType &KernelFunc) {
+    struct NormalizedKernelType {
+      KernelType MKernelFunc;
+      NormalizedKernelType(const KernelType &KernelFunc)
+          : MKernelFunc(KernelFunc) {}
+      void operator()(const nd_item<Dims> &Arg) {
+        sycl::item<Dims, false> Item = detail::Builder::createItem<Dims, false>(
+            Arg.get_global_range(), Arg.get_global_id());
+        MKernelFunc(Item);
+      }
+    };
+    return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
+        KernelFunc);
+  }
+
+  template <class KernelType, typename ArgT, int Dims>
+  typename std::enable_if<std::is_same<ArgT, sycl::item<Dims, true>>::value,
+                          KernelType *>::type
+  ResetHostKernel(const KernelType &KernelFunc) {
+    struct NormalizedKernelType {
+      KernelType MKernelFunc;
+      NormalizedKernelType(const KernelType &KernelFunc)
+          : MKernelFunc(KernelFunc) {}
+      void operator()(const nd_item<Dims> &Arg) {
+        sycl::item<Dims, true> Item = detail::Builder::createItem<Dims, true>(
+            Arg.get_global_range(), Arg.get_global_id(), Arg.get_offset());
+        MKernelFunc(Item);
+      }
+    };
+    return ResetHostKernelHelper<KernelType, struct NormalizedKernelType, Dims>(
+        KernelFunc);
+  }
+
   /// Stores lambda to the template-free object
   ///
   /// Also initializes kernel name, list of arguments and requirements using
@@ -530,18 +622,17 @@ class __SYCL_EXPORT handler {
           "kernel_handler is not yet supported by host device.",
           PI_INVALID_OPERATION);
     }
-    MHostKernel.reset(
-        new detail::HostKernel<KernelType, LambdaArgType, Dims, KernelName>(
-            KernelFunc));
+    KernelType *KernelPtr =
+        ResetHostKernel<KernelType, LambdaArgType, Dims>(KernelFunc);
 
     using KI = sycl::detail::KernelInfo<KernelName>;
     // Empty name indicates that the compilation happens without integration
     // header, so don't perform things that require it.
     if (KI::getName() != nullptr && KI::getName()[0] != '\0') {
       // TODO support ESIMD in no-integration-header case too.
       MArgs.clear();
-      extractArgsAndReqsFromLambda(MHostKernel->getPtr(), KI::getNumParams(),
-                                   &KI::getParamDesc(0), KI::isESIMD());
+      extractArgsAndReqsFromLambda(reinterpret_cast<char *>(KernelPtr),
+                                   KI::getNumParams(), &KI::getParamDesc(0));
       MKernelName = KI::getName();
       MOSModuleHandle = detail::OSUtil::getOSModuleHandle(KI::getName());
     } else {

@@ -0,0 +1,19 @@
+//==-------- atomic_intrin.hpp - Atomic intrinsic definition file ----------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#pragma once
+
+#include <CL/sycl/exception.hpp>
+
+template <typename Ty> Ty atomic_add_fetch(Ty *ptr, Ty val) {
+#ifdef _WIN32
+  // TODO: Windows will be supported soon
+  throw cl::sycl::feature_not_supported();
+#else
+  return __atomic_add_fetch(ptr, val, __ATOMIC_RELAXED);
+#endif
+}
@@ -0,0 +1,163 @@
+//==---------- cmrt_if_defs.hpp - CM-Runtime interface header file ---------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+/// \file cmrt_if_defs.hpp
+///
+/// Interface definitions for esimd_cpu pi module to communitcate with
+/// underlying CM emulation runtime library
+///
+/// \ingroup sycl_pi_esimd_cpu
+
+#pragma once
+
+/// CMRT Inteface Defines
+
+#include <unordered_map>
+
+// Base class to store common data
+struct _pi_object {
+  _pi_object() : RefCount{1} {}
+
+  std::atomic<pi_uint32> RefCount;
+};
+struct _pi_platform {
+  _pi_platform() {}
+
+  // Keep Version information.
+  std::string CmEmuVersion;
+};
+
+struct _pi_device : _pi_object {
+  _pi_device(pi_platform plt) : Platform{plt} {}
+
+  pi_platform Platform;
+};
+
+struct _pi_context : _pi_object {
+  _pi_context(pi_device DeviceArg, cm_support::CmDevice *CmDeviceArg)
+      : Device{DeviceArg}, CmDevicePtr{CmDeviceArg} {}
+
+  /// One-to-one mapping between Context and Device
+  pi_device Device;
+
+  cm_support::CmDevice *CmDevicePtr = nullptr;
+
+  /// Map SVM memory starting address to corresponding
+  /// CmBufferSVM object. CmBufferSVM object is needed to release memory.
+  std::unordered_map<void *, cm_support::CmBufferSVM *> Addr2CmBufferSVM;
+};
+
+struct _pi_queue : _pi_object {
+  _pi_queue(pi_context ContextArg, cm_support::CmQueue *CmQueueArg)
+      : Context{ContextArg}, CmQueuePtr{CmQueueArg} {}
+
+  // Keeps the PI context to which this queue belongs.
+  pi_context Context = nullptr;
+  cm_support::CmQueue *CmQueuePtr = nullptr;
+};
+
+struct _pi_mem : _pi_object {
+  _pi_mem() {}
+
+  pi_context Context;
+
+  char *MapHostPtr = nullptr;
+
+  // Mutex for load/store accessing
+  std::mutex mutexLock;
+
+  // Surface index used by CM
+  int SurfaceIndex;
+
+  // Supplementary data to keep track of the mappings of this memory
+  // created with piEnqueueMemBufferMap and piEnqueueMemImageMap.
+  struct Mapping {
+    // The offset in the buffer giving the start of the mapped region.
+    size_t Offset;
+    // The size of the mapped region.
+    size_t Size;
+  };
+
+  /*
+  // Method to get type of the derived object (image or buffer)
+  virtual bool isImage() const = 0;
+  */
+
+  virtual ~_pi_mem() = default;
+
+  _pi_mem_type getMemType() const { return MemType; };
+
+  /*
+  // Thread-safe methods to work with memory mappings
+  pi_result addMapping(void *MappedTo, size_t Size, size_t Offset);
+  pi_result removeMapping(void *MappedTo, Mapping &MapInfo);
+  */
+
+protected:
+  _pi_mem(pi_context ctxt, char *HostPtr, _pi_mem_type MemTypeArg,
+          int SurfaceIdxArg)
+      : Context{ctxt}, MapHostPtr{HostPtr},
+        SurfaceIndex{SurfaceIdxArg}, Mappings{}, MemType{MemTypeArg} {}
+
+private:
+  // The key is the host pointer representing an active mapping.
+  // The value is the information needed to maintain/undo the mapping.
+  std::unordered_map<void *, Mapping> Mappings;
+
+  // TODO: we'd like to create a thread safe map class instead of mutex + map,
+  // that must be carefully used together.
+  // The mutex that is used for thread-safe work with Mappings.
+  std::mutex MappingsMutex;
+
+  _pi_mem_type MemType;
+};
+
+struct _pi_buffer final : _pi_mem {
+  // Buffer/Sub-buffer constructor
+  _pi_buffer(pi_context ctxt, char *HostPtr, cm_support::CmBuffer *CmBufArg,
+             int SurfaceIdxArg, size_t SizeArg)
+      : _pi_mem(ctxt, HostPtr, PI_MEM_TYPE_BUFFER, SurfaceIdxArg),
+        CmBufferPtr{CmBufArg}, Size{SizeArg} {}
+
+  cm_support::CmBuffer *CmBufferPtr;
+  size_t Size;
+};
+
+struct _pi_image final : _pi_mem {
+  // Image constructor
+  _pi_image(pi_context ctxt, char *HostPtr, cm_support::CmSurface2D *CmSurfArg,
+            int SurfaceIdxArg, size_t WidthArg, size_t HeightArg, size_t BPPArg)
+      : _pi_mem(ctxt, HostPtr, PI_MEM_TYPE_IMAGE2D, SurfaceIdxArg),
+        CmSurfacePtr{CmSurfArg}, Width{WidthArg}, Height{HeightArg},
+        BytesPerPixel{BPPArg} {}
+
+  cm_support::CmSurface2D *CmSurfacePtr;
+  size_t Width;
+  size_t Height;
+  size_t BytesPerPixel;
+};
+
+struct _pi_event : _pi_object {
+  _pi_event() {}
+
+  cm_support::CmEvent *CmEventPtr = nullptr;
+  cm_support::CmQueue *OwnerQueue = nullptr;
+  pi_context Context = nullptr;
+  bool IsDummyEvent = false;
+};
+
+struct _pi_program : _pi_object {
+  _pi_program() {}
+
+  // Keep the context of the program.
+  pi_context Context;
+};
+
+struct _pi_kernel : _pi_object {
+  _pi_kernel() {}
+};