diff --git a/sycl/include/CL/sycl/detail/aligned_allocator.hpp b/sycl/include/CL/sycl/detail/aligned_allocator.hpp index 624d435d5c6d1..4536a716d847a 100644 --- a/sycl/include/CL/sycl/detail/aligned_allocator.hpp +++ b/sycl/include/CL/sycl/detail/aligned_allocator.hpp @@ -12,7 +12,6 @@ #include #include -#include #include #include #include diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index b4504489662c0..40fae085e083e 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -28,7 +28,6 @@ #include #include -#include #include #include #include @@ -250,7 +249,7 @@ class __SYCL_EXPORT handler { typename std::remove_reference::type>::type> F *storePlainArg(T &&Arg) { MArgsStorage.emplace_back(sizeof(T)); - F *Storage = (F *)MArgsStorage.back().data(); + auto Storage = reinterpret_cast(MArgsStorage.back().data()); *Storage = Arg; return Storage; } @@ -308,8 +307,8 @@ class __SYCL_EXPORT handler { /// Streams are then forwarded to command group and flushed in the scheduler. /// /// \param Stream is a pointer to SYCL stream. - void addStream(shared_ptr_class Stream) { - MStreamStorage.push_back(std::move(Stream)); + void addStream(const shared_ptr_class &Stream) { + MStreamStorage.push_back(Stream); } /// Saves buffers created by handling reduction feature in handler. @@ -318,8 +317,8 @@ class __SYCL_EXPORT handler { /// The 'MSharedPtrStorage' suits that need. /// /// @param ReduObj is a pointer to object that must be stored. - void addReduction(shared_ptr_class ReduObj) { - MSharedPtrStorage.push_back(std::move(ReduObj)); + void addReduction(const shared_ptr_class &ReduObj) { + MSharedPtrStorage.push_back(ReduObj); } ~handler() = default; @@ -327,19 +326,7 @@ class __SYCL_EXPORT handler { bool is_host() { return MIsHost; } void associateWithHandler(detail::AccessorBaseHost *AccBase, - access::target AccTarget) { - detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); - detail::Requirement *Req = AccImpl.get(); - // Add accessor to the list of requirements. - MRequirements.push_back(Req); - // Store copy of the accessor. - MAccStorage.push_back(std::move(AccImpl)); - // Add an accessor to the handler list of associated accessors. - // For associated accessors index does not means nothing. - MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor, - Req, static_cast(AccTarget), - /*index*/ 0); - } + access::target AccTarget); // Recursively calls itself until arguments pack is fully processed. // The version for regular(standard layout) argument. @@ -387,7 +374,7 @@ class __SYCL_EXPORT handler { } template void setArgHelper(int ArgIndex, T &&Arg) { - void *StoredArg = (void *)storePlainArg(Arg); + auto StoredArg = static_cast(storePlainArg(Arg)); if (!std::is_same::value && std::is_pointer::value) { MArgs.emplace_back(detail::kernel_param_kind_t::kind_pointer, StoredArg, @@ -399,7 +386,7 @@ class __SYCL_EXPORT handler { } void setArgHelper(int ArgIndex, sampler &&Arg) { - void *StoredArg = (void *)storePlainArg(Arg); + auto StoredArg = static_cast(storePlainArg(Arg)); MArgs.emplace_back(detail::kernel_param_kind_t::kind_sampler, StoredArg, sizeof(sampler), ArgIndex); } @@ -791,8 +778,8 @@ class __SYCL_EXPORT handler { /// Registers event dependencies on this command group. /// /// \param Events is a vector of valid SYCL events to wait on. - void depends_on(vector_class Events) { - for (event &Event : Events) { + void depends_on(const vector_class &Events) { + for (const event &Event : Events) { MEvents.push_back(detail::getSyclObjImpl(Event)); } } @@ -1572,8 +1559,8 @@ class __SYCL_EXPORT handler { detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); MRequirements.push_back(AccImpl.get()); - MSrcPtr = (void *)AccImpl.get(); - MDstPtr = (void *)Dst; + MSrcPtr = static_cast(AccImpl.get()); + MDstPtr = static_cast(Dst); // Store copy of accessor to the local storage to make sure it is alive // until we finish MAccStorage.push_back(std::move(AccImpl)); @@ -1679,7 +1666,7 @@ class __SYCL_EXPORT handler { detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Acc; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); - MDstPtr = (void *)AccImpl.get(); + MDstPtr = static_cast(AccImpl.get()); MRequirements.push_back(AccImpl.get()); MAccStorage.push_back(std::move(AccImpl)); } @@ -1708,12 +1695,12 @@ class __SYCL_EXPORT handler { detail::AccessorBaseHost *AccBase = (detail::AccessorBaseHost *)&Dst; detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); - MDstPtr = (void *)AccImpl.get(); + MDstPtr = static_cast(AccImpl.get()); MRequirements.push_back(AccImpl.get()); MAccStorage.push_back(std::move(AccImpl)); MPattern.resize(sizeof(T)); - T *PatternPtr = (T *)MPattern.data(); + auto PatternPtr = reinterpret_cast(MPattern.data()); *PatternPtr = Pattern; } else { @@ -1741,14 +1728,7 @@ class __SYCL_EXPORT handler { /// /// \param WaitList is a vector of valid SYCL events that need to complete /// before barrier command can be executed. - void barrier(const vector_class &WaitList) { - throwIfActionIsCreated(); - MCGType = detail::CG::BARRIER_WAITLIST; - MEventsWaitWithBarrier.resize(WaitList.size()); - std::transform( - WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(), - [](const event &Event) { return detail::getSyclObjImpl(Event); }); - } + void barrier(const vector_class &WaitList); /// Copies data from one memory region to another, both pointed by /// USM pointers. @@ -1756,26 +1736,14 @@ class __SYCL_EXPORT handler { /// \param Dest is a USM pointer to the destination memory. /// \param Src is a USM pointer to the source memory. /// \param Count is a number of bytes to copy. - void memcpy(void *Dest, const void *Src, size_t Count) { - throwIfActionIsCreated(); - MSrcPtr = const_cast(Src); - MDstPtr = Dest; - MLength = Count; - MCGType = detail::CG::COPY_USM; - } + void memcpy(void *Dest, const void *Src, size_t Count); /// Fills the memory pointed by a USM pointer with the value specified. /// /// \param Dest is a USM pointer to the memory to fill. /// \param Value is a value to be set. Value is cast as an unsigned char. /// \param Count is a number of bytes to fill. - void memset(void *Dest, int Value, size_t Count) { - throwIfActionIsCreated(); - MDstPtr = Dest; - MPattern.push_back((char)Value); - MLength = Count; - MCGType = detail::CG::FILL_USM; - } + void memset(void *Dest, int Value, size_t Count); /// Provides hints to the runtime library that data should be made available /// on a device earlier than Unified Shared Memory would normally require it @@ -1783,12 +1751,7 @@ class __SYCL_EXPORT handler { /// /// \param Ptr is a USM pointer to the memory to be prefetched to the device. /// \param Count is a number of bytes to be prefetched. - void prefetch(const void *Ptr, size_t Count) { - throwIfActionIsCreated(); - MDstPtr = const_cast(Ptr); - MLength = Count; - MCGType = detail::CG::PREFETCH_USM; - } + void prefetch(const void *Ptr, size_t Count); private: shared_ptr_class MQueue; @@ -1830,7 +1793,7 @@ class __SYCL_EXPORT handler { unique_ptr_class MHostTask; detail::OSModuleHandle MOSModuleHandle = detail::OSUtil::ExeModuleHandle; // Storage for a lambda or function when using InteropTasks - std::unique_ptr MInteropTask; + unique_ptr_class MInteropTask; /// The list of events that order this operation. vector_class MEvents; /// The list of valid SYCL events that need to complete diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 1532ab3da8666..bab4b0c10c7ba 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -289,7 +289,7 @@ Command *Scheduler::GraphBuilder::insertMemoryMove(MemObjRecord *Record, // Since no alloca command for the sub buffer requirement was found in the // current context, need to find a parent alloca command for it (it must be // there) - auto IsSuitableAlloca = [Record, Req](AllocaCommandBase *AllocaCmd) { + auto IsSuitableAlloca = [Record](AllocaCommandBase *AllocaCmd) { bool Res = sameCtx(AllocaCmd->getQueue()->getContextImplPtr(), Record->MCurContext) && // Looking for a parent buffer alloca command @@ -455,7 +455,7 @@ Command *Scheduler::GraphBuilder::addHostAccessor(Requirement *Req) { Command *Scheduler::GraphBuilder::addCGUpdateHost( std::unique_ptr CommandGroup, QueueImplPtr HostQueue) { - CGUpdateHost *UpdateHost = (CGUpdateHost *)CommandGroup.get(); + auto UpdateHost = static_cast(CommandGroup.get()); Requirement *Req = UpdateHost->getReqToUpdate(); MemObjRecord *Record = getOrInsertMemObjRecord(HostQueue, Req); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 6fc47bcf6b5de..5a54760e813e7 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include #include #include @@ -113,6 +115,21 @@ event handler::finalize() { return MLastEvent; } +void handler::associateWithHandler(detail::AccessorBaseHost *AccBase, + access::target AccTarget) { + detail::AccessorImplPtr AccImpl = detail::getSyclObjImpl(*AccBase); + detail::Requirement *Req = AccImpl.get(); + // Add accessor to the list of requirements. + MRequirements.push_back(Req); + // Store copy of the accessor. + MAccStorage.push_back(std::move(AccImpl)); + // Add an accessor to the handler list of associated accessors. + // For associated accessors index does not means nothing. + MAssociatedAccesors.emplace_back(detail::kernel_param_kind_t::kind_accessor, + Req, static_cast(AccTarget), + /*index*/ 0); +} + void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource) { @@ -277,5 +294,37 @@ void handler::extractArgsAndReqsFromLambda( string_class handler::getKernelName() { return MKernel->get_info(); } + +void handler::barrier(const vector_class &WaitList) { + throwIfActionIsCreated(); + MCGType = detail::CG::BARRIER_WAITLIST; + MEventsWaitWithBarrier.resize(WaitList.size()); + std::transform( + WaitList.begin(), WaitList.end(), MEventsWaitWithBarrier.begin(), + [](const event &Event) { return detail::getSyclObjImpl(Event); }); +} + +void handler::memcpy(void *Dest, const void *Src, size_t Count) { + throwIfActionIsCreated(); + MSrcPtr = const_cast(Src); + MDstPtr = Dest; + MLength = Count; + MCGType = detail::CG::COPY_USM; +} + +void handler::memset(void *Dest, int Value, size_t Count) { + throwIfActionIsCreated(); + MDstPtr = Dest; + MPattern.push_back(static_cast(Value)); + MLength = Count; + MCGType = detail::CG::FILL_USM; +} + +void handler::prefetch(const void *Ptr, size_t Count) { + throwIfActionIsCreated(); + MDstPtr = const_cast(Ptr); + MLength = Count; + MCGType = detail::CG::PREFETCH_USM; +} } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 9ff2e1195cac9..80686a1e3029a 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3621,8 +3621,8 @@ _ZN2cl4sycl5eventC1Ev _ZN2cl4sycl5eventC2EP9_cl_eventRKNS0_7contextE _ZN2cl4sycl5eventC2ESt10shared_ptrINS0_6detail10event_implEE _ZN2cl4sycl5eventC2Ev -_ZN2cl4sycl5intel6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl5intel6detail16reduGetMaxWGSizeESt10shared_ptrINS0_6detail10queue_implEEm +_ZN2cl4sycl5intel6detail17reduComputeWGSizeEmmRm _ZN2cl4sycl5queue10mem_adviseEPKvm14_pi_mem_advice _ZN2cl4sycl5queue10wait_proxyERKNS0_6detail13code_locationE _ZN2cl4sycl5queue11submit_implESt8functionIFvRNS0_7handlerEEERKNS0_6detail13code_locationE @@ -3728,15 +3728,15 @@ _ZN2cl4sycl6detail12sampler_implD2Ev _ZN2cl4sycl6detail12split_stringERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEEc _ZN2cl4sycl6detail13MemoryManager12prefetch_usmEPvSt10shared_ptrINS1_10queue_implEEmSt6vectorIP9_pi_eventSaIS9_EERS9_ _ZN2cl4sycl6detail13MemoryManager13releaseMemObjESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvS8_ -_ZN2cl4sycl6detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12context_implEEPvbRK14_pi_image_descRK16_pi_image_formatRKNS0_13property_listE _ZN2cl4sycl6detail13MemoryManager16allocateMemImageESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRK14_pi_image_descRK16_pi_image_formatRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN2cl4sycl6detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event -_ZN2cl4sycl6detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE +_ZN2cl4sycl6detail13MemoryManager17allocateMemBufferESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event _ZN2cl4sycl6detail13MemoryManager18allocateHostMemoryEPNS1_11SYCLMemObjIEPvbmRKNS0_13property_listE -_ZN2cl4sycl6detail13MemoryManager19wrapIntoImageBufferESt10shared_ptrINS1_12context_implEEPvPNS1_11SYCLMemObjIE _ZN2cl4sycl6detail13MemoryManager18releaseImageBufferESt10shared_ptrINS1_12context_implEEPv -_ZN2cl4sycl6detail13MemoryManager17allocateMemBufferESt10shared_ptrINS1_12context_implEEPNS1_11SYCLMemObjIEPvbmRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event +_ZN2cl4sycl6detail13MemoryManager19allocateImageObjectESt10shared_ptrINS1_12context_implEEPvbRK14_pi_image_descRK16_pi_image_formatRKNS0_13property_listE +_ZN2cl4sycl6detail13MemoryManager19wrapIntoImageBufferESt10shared_ptrINS1_12context_implEEPvPNS1_11SYCLMemObjIE +_ZN2cl4sycl6detail13MemoryManager20allocateBufferObjectESt10shared_ptrINS1_12context_implEEPvbmRKNS0_13property_listE _ZN2cl4sycl6detail13MemoryManager20allocateMemSubBufferESt10shared_ptrINS1_12context_implEEPvmmNS0_5rangeILi3EEESt6vectorIS3_INS1_10event_implEESaISB_EERP9_pi_event +_ZN2cl4sycl6detail13MemoryManager24allocateInteropMemObjectESt10shared_ptrINS1_12context_implEEPvRKS3_INS1_10event_implEERKS5_RKNS0_13property_listERP9_pi_event _ZN2cl4sycl6detail13MemoryManager3mapEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEENS0_6access4modeEjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ _ZN2cl4sycl6detail13MemoryManager4copyEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEjNS0_5rangeILi3EEESA_NS0_2idILi3EEEjS5_S8_jSA_SA_SC_jSt6vectorIP9_pi_eventSaISF_EERSF_ _ZN2cl4sycl6detail13MemoryManager4fillEPNS1_11SYCLMemObjIEPvSt10shared_ptrINS1_10queue_implEEmPKcjNS0_5rangeILi3EEESC_NS0_2idILi3EEEjSt6vectorIP9_pi_eventSaISH_EERSH_ @@ -3819,8 +3819,13 @@ _ZN2cl4sycl7contextC2ESt10shared_ptrINS0_6detail12context_implEE _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb _ZN2cl4sycl7handler13getKernelNameB5cxx11Ev _ZN2cl4sycl7handler18extractArgsAndReqsEv +_ZN2cl4sycl7handler20associateWithHandlerEPNS0_6detail16AccessorBaseHostENS0_6access6targetE _ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tE +_ZN2cl4sycl7handler6memcpyEPvPKvm +_ZN2cl4sycl7handler6memsetEPvim +_ZN2cl4sycl7handler7barrierERKSt6vectorINS0_5eventESaIS3_EE _ZN2cl4sycl7handler8finalizeEv +_ZN2cl4sycl7handler8prefetchEPKvm _ZN2cl4sycl7program17build_with_sourceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES7_ _ZN2cl4sycl7program19compile_with_sourceENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES7_ _ZN2cl4sycl7program22build_with_kernel_nameENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEES7_l