diff --git a/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp index 79190bcc20b99..ab3870a7a7b0b 100644 --- a/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp +++ b/sycl/include/CL/sycl/ONEAPI/experimental/spec_constant.hpp @@ -17,6 +17,7 @@ #pragma once +#include #include #include @@ -41,11 +42,15 @@ template class spec_constant { spec_constant(T Cst) : Val(Cst) {} T Val; -#endif +#else + char padding[sizeof(T)]; +#endif // __SYCL_DEVICE_ONLY__ friend class cl::sycl::program; public: - T get() const { // explicit access. + template + typename sycl::detail::enable_if_t::value, V> + get() const { // explicit access. #ifdef __SYCL_DEVICE_ONLY__ const char *TName = __builtin_unique_stable_name(ID); return __sycl_getSpecConstantValue(TName); @@ -54,6 +59,19 @@ template class spec_constant { #endif // __SYCL_DEVICE_ONLY__ } + template + typename sycl::detail::enable_if_t::value && + std::is_pod::value, + V> + get() const { // explicit access. +#ifdef __SYCL_DEVICE_ONLY__ + const char *TName = __builtin_unique_stable_name(ID); + return __sycl_getCompositeSpecConstantValue(TName); +#else + return Val; +#endif // __SYCL_DEVICE_ONLY__ + } + operator T() const { // implicit conversion. return get(); } diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 27ea52ca5833d..ab0feafd51019 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -640,7 +640,12 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; /// Name must be consistent with /// PropertySetRegistry::SYCL_SPECIALIZATION_CONSTANTS defined in /// PropertySetIO.h -#define __SYCL_PI_PROPERTY_SET_SPEC_CONST_MAP "SYCL/specialization constants" +#define __SYCL_PI_PROPERTY_SET_SCALAR_SPEC_CONST_MAP \ + "SYCL/specialization constants" +/// PropertySetRegistry::SYCL_COMPOSITE_SPECIALIZATION_CONSTANTS defined in +/// PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_COMPOSITE_SPEC_CONST_MAP \ + "SYCL/composite specialization constants" /// PropertySetRegistry::SYCL_DEVICELIB_REQ_MASK defined in PropertySetIO.h #define __SYCL_PI_PROPERTY_SET_DEVICELIB_REQ_MASK "SYCL/devicelib req mask" /// PropertySetRegistry::SYCL_KERNEL_PARAM_OPT_INFO defined in PropertySetIO.h diff --git a/sycl/include/CL/sycl/detail/pi.hpp b/sycl/include/CL/sycl/detail/pi.hpp index 3509f9a73cbb4..0c98a69528369 100644 --- a/sycl/include/CL/sycl/detail/pi.hpp +++ b/sycl/include/CL/sycl/detail/pi.hpp @@ -359,11 +359,32 @@ class DeviceBinaryImage { return Format; } - /// Gets the iterator range over specialization constants in this this binary - /// image. For each property pointed to by an iterator within the range, the - /// name of the property is the specializaion constant symbolic ID and the - /// value is 32-bit unsigned integer ID. - const PropertyRange &getSpecConstants() const { return SpecConstIDMap; } + /// Gets the iterator range over scalar specialization constants in this + /// binary image. For each property pointed to by an iterator within the + /// range, the name of the property is the specialization constant symbolic ID + /// and the value is 32-bit unsigned integer ID. + const PropertyRange &getScalarSpecConstants() const { + return ScalarSpecConstIDMap; + } + /// Gets the iterator range over composite specialization constants in this + /// binary image. For each property pointed to by an iterator within the + /// range, the name of the property is the specialization constant symbolic ID + /// and the value is a list of tuples of 32-bit unsigned integer values, which + /// encode scalar specialization constants, that form the composite one. + /// Each tuple consists of ID of scalar specialization constant, its location + /// within a composite (offset in bytes from the beginning) and its size. + /// For example, for the following structure: + /// struct A { int a; float b; }; + /// struct POD { A a[2]; int b; }; + /// List of tuples will look like: + /// { ID0, 0, 4 }, // .a[0].a + /// { ID1, 4, 4 }, // .a[0].b + /// { ID2, 8, 4 }, // .a[1].a + /// { ID3, 12, 4 }, // .a[1].b + /// { ID4, 16, 4 }, // .b + const PropertyRange &getCompositeSpecConstants() const { + return CompositeSpecConstIDMap; + } const PropertyRange &getDeviceLibReqMask() const { return DeviceLibReqMask; } const PropertyRange &getKernelParamOptInfo() const { return KernelParamOptInfo; @@ -376,7 +397,8 @@ class DeviceBinaryImage { pi_device_binary Bin; pi::PiDeviceBinaryType Format = PI_DEVICE_BINARY_TYPE_NONE; - DeviceBinaryImage::PropertyRange SpecConstIDMap; + DeviceBinaryImage::PropertyRange ScalarSpecConstIDMap; + DeviceBinaryImage::PropertyRange CompositeSpecConstIDMap; DeviceBinaryImage::PropertyRange DeviceLibReqMask; DeviceBinaryImage::PropertyRange KernelParamOptInfo; }; diff --git a/sycl/include/CL/sycl/detail/sycl_fe_intrins.hpp b/sycl/include/CL/sycl/detail/sycl_fe_intrins.hpp index e9aad7554471e..0d58bb792cae3 100644 --- a/sycl/include/CL/sycl/detail/sycl_fe_intrins.hpp +++ b/sycl/include/CL/sycl/detail/sycl_fe_intrins.hpp @@ -18,4 +18,7 @@ template SYCL_EXTERNAL T __sycl_getSpecConstantValue(const char *ID); +template +SYCL_EXTERNAL T __sycl_getCompositeSpecConstantValue(const char *ID); + #endif diff --git a/sycl/include/CL/sycl/program.hpp b/sycl/include/CL/sycl/program.hpp index fdf5edb65d01f..80d53121ef60c 100644 --- a/sycl/include/CL/sycl/program.hpp +++ b/sycl/include/CL/sycl/program.hpp @@ -343,8 +343,8 @@ class __SYCL_EXPORT program { template ONEAPI::experimental::spec_constant set_spec_constant(T Cst) { constexpr const char *Name = detail::SpecConstantInfo::getName(); - static_assert(std::is_integral::value || - std::is_floating_point::value, + static_assert(std::is_arithmetic::value || + (std::is_class::value && std::is_pod::value), "unsupported specialization constant type"); #ifdef __SYCL_DEVICE_ONLY__ (void)Cst; diff --git a/sycl/source/detail/pi.cpp b/sycl/source/detail/pi.cpp index 0ff4081e96ade..38ac1097f5fee 100644 --- a/sycl/source/detail/pi.cpp +++ b/sycl/source/detail/pi.cpp @@ -591,7 +591,9 @@ void DeviceBinaryImage::init(pi_device_binary Bin) { // try to determine the format; may remain "NONE" Format = getBinaryImageFormat(Bin->BinaryStart, getSize()); - SpecConstIDMap.init(Bin, __SYCL_PI_PROPERTY_SET_SPEC_CONST_MAP); + ScalarSpecConstIDMap.init(Bin, __SYCL_PI_PROPERTY_SET_SCALAR_SPEC_CONST_MAP); + CompositeSpecConstIDMap.init(Bin, + __SYCL_PI_PROPERTY_SET_COMPOSITE_SPEC_CONST_MAP); DeviceLibReqMask.init(Bin, __SYCL_PI_PROPERTY_SET_DEVICELIB_REQ_MASK); KernelParamOptInfo.init(Bin, __SYCL_PI_PROPERTY_SET_KERNEL_PARAM_OPT_INFO); } diff --git a/sycl/source/detail/program_impl.cpp b/sycl/source/detail/program_impl.cpp index aea275b25e494..aac073888840c 100644 --- a/sycl/source/detail/program_impl.cpp +++ b/sycl/source/detail/program_impl.cpp @@ -523,26 +523,55 @@ void program_impl::flush_spec_constants(const RTDeviceBinaryImage &Img, RT::PiProgram NativePrg) const { // iterate via all specialization constants the program's image depends on, // and set each to current runtime value (if any) - const pi::DeviceBinaryImage::PropertyRange &SCRange = Img.getSpecConstants(); + const pi::DeviceBinaryImage::PropertyRange &ScalarSCRange = + Img.getScalarSpecConstants(); + const pi::DeviceBinaryImage::PropertyRange &CompositeSCRange = + Img.getCompositeSpecConstants(); ContextImplPtr Ctx = getSyclObjImpl(get_context()); using SCItTy = pi::DeviceBinaryImage::PropertyRange::ConstIterator; auto LockGuard = Ctx->getKernelProgramCache().acquireCachedPrograms(); + NativePrg = NativePrg ? NativePrg : getHandleRef(); - for (SCItTy SCIt : SCRange) { - const char *SCName = (*SCIt)->Name; - auto SCEntry = SpecConstRegistry.find(SCName); + for (SCItTy SCIt : ScalarSCRange) { + auto SCEntry = SpecConstRegistry.find((*SCIt)->Name); if (SCEntry == SpecConstRegistry.end()) // spec constant has not been set in user code - SPIR-V will use default continue; const spec_constant_impl &SC = SCEntry->second; assert(SC.isSet() && "uninitialized spec constant"); - pi_device_binary_property SCProp = *SCIt; - pi_uint32 ID = pi::DeviceBinaryProperty(SCProp).asUint32(); - NativePrg = NativePrg ? NativePrg : getHandleRef(); + pi_uint32 ID = pi::DeviceBinaryProperty(*SCIt).asUint32(); Ctx->getPlugin().call( NativePrg, ID, SC.getSize(), SC.getValuePtr()); } + + for (SCItTy SCIt : CompositeSCRange) { + auto SCEntry = SpecConstRegistry.find((*SCIt)->Name); + if (SCEntry == SpecConstRegistry.end()) + // spec constant has not been set in user code - SPIR-V will use default + continue; + const spec_constant_impl &SC = SCEntry->second; + assert(SC.isSet() && "uninitialized spec constant"); + pi::ByteArray Descriptors = pi::DeviceBinaryProperty(*SCIt).asByteArray(); + // First 8 bytes are consumed by size of the property + assert(Descriptors.size() > 8 && "Unexpected property size"); + // Expected layout is vector of 3-component tuples (flattened into a vector + // of scalars), where each tuple consists of: ID of a scalar spec constant, + // which is a member of the composite; offset, which is used to calculate + // location of scalar member within the composite; size of a scalar member + // of the composite. + assert(((Descriptors.size() - 8) / sizeof(std::uint32_t)) % 3 == 0 && + "unexpected layout of composite spec const descriptors"); + auto *It = reinterpret_cast(&Descriptors[8]); + auto *End = reinterpret_cast(&Descriptors[0] + + Descriptors.size()); + while (It != End) { + Ctx->getPlugin().call( + NativePrg, /* ID */ It[0], /* Size */ It[2], + SC.getValuePtr() + /* Offset */ It[1]); + It += 3; + } + } } pi_native_handle program_impl::getNative() const { diff --git a/sycl/source/detail/spec_constant_impl.cpp b/sycl/source/detail/spec_constant_impl.cpp index f61db6fe4a0bc..898fb584f07b7 100644 --- a/sycl/source/detail/spec_constant_impl.cpp +++ b/sycl/source/detail/spec_constant_impl.cpp @@ -21,10 +21,10 @@ namespace sycl { namespace detail { void spec_constant_impl::set(size_t Size, const void *Val) { - if ((Size > sizeof(Bytes)) || (Size == 0)) + if (0 == Size) throw sycl::runtime_error("invalid spec constant size", PI_INVALID_VALUE); - this->Size = Size; - std::memcpy(Bytes, Val, Size); + auto *BytePtr = reinterpret_cast(Val); + this->Bytes.assign(BytePtr, BytePtr + Size); } void stableSerializeSpecConstRegistry(const SpecConstRegistryT &Reg, diff --git a/sycl/source/detail/spec_constant_impl.hpp b/sycl/source/detail/spec_constant_impl.hpp index 5bbf8ad224901..55db21a0cd611 100644 --- a/sycl/source/detail/spec_constant_impl.hpp +++ b/sycl/source/detail/spec_constant_impl.hpp @@ -14,6 +14,7 @@ #include #include +#include __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { @@ -22,20 +23,18 @@ namespace detail { // Represents a specialization constant value in SYCL runtime. class spec_constant_impl { public: - spec_constant_impl() : Size(0), Bytes{0} {}; + spec_constant_impl() = default; spec_constant_impl(size_t Size, const void *Val) { set(Size, Val); } void set(size_t Size, const void *Val); - size_t getSize() const { return Size; } - const unsigned char *getValuePtr() const { return Bytes; } - bool isSet() const { return Size != 0; } + size_t getSize() const { return Bytes.size(); } + const char *getValuePtr() const { return Bytes.data(); } + bool isSet() const { return !Bytes.empty(); } private: - size_t Size; // the size of the spec constant value - // TODO invent more flexible approach to support values of arbitrary type: - unsigned char Bytes[8]; // memory to hold the value bytes + std::vector Bytes; }; std::ostream &operator<<(std::ostream &Out, const spec_constant_impl &V); diff --git a/sycl/test/on-device/spec_const/composite-in-functor.cpp b/sycl/test/on-device/spec_const/composite-in-functor.cpp new file mode 100644 index 0000000000000..cdcb423317ca6 --- /dev/null +++ b/sycl/test/on-device/spec_const/composite-in-functor.cpp @@ -0,0 +1,86 @@ +// UNSUPPORTED: cuda +// +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %RUN_ON_HOST %t.out | FileCheck %s +// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER +// +// The test checks that the specialization constant feature works correctly with +// composite types: toolchain processes them correctly and runtime can correctly +// execute the program. +// +// CHECK: 1 : 2 +// CHECK-NEXT: 3 +// CHECK-NEXT: 4 : 5 + +#include + +using namespace cl::sycl; + +struct A { + float x; + float y[2]; +}; + +struct pod_t { + int f1[2]; + A f2; +}; + +class my_kernel_t { +public: + using sc_t = + sycl::ONEAPI::experimental::spec_constant; + + my_kernel_t(const sc_t &sc, const cl::sycl::stream &strm) + : sc_(sc), strm_(strm) {} + + void operator()(cl::sycl::id<1> i) const { + auto p = sc_.get(); + strm_ << p.f1[0] << " : " << p.f1[1] << "\n"; + strm_ << p.f2.x << "\n"; + strm_ << p.f2.y[0] << " : " << p.f2.y[1] << "\n"; + strm_ << sycl::endl; + } + + sc_t sc_; + cl::sycl::stream strm_; +}; + +int main() { + cl::sycl::queue q(default_selector{}, [](exception_list l) { + for (auto ep : l) { + try { + std::rethrow_exception(ep); + } catch (cl::sycl::exception &e0) { + std::cout << e0.what(); + } catch (std::exception &e1) { + std::cout << e1.what(); + } catch (...) { + std::cout << "*** catch (...)\n"; + } + } + }); + + pod_t pod; + pod.f1[0] = 1; + pod.f1[1] = 2; + pod.f2.x = 3; + pod.f2.y[0] = 4; + pod.f2.y[1] = 5; + + cl::sycl::program p(q.get_context()); + auto sc = p.set_spec_constant(pod); + p.build_with_kernel_type(); + + q.submit([&](cl::sycl::handler &cgh) { + cl::sycl::stream strm(1024, 256, cgh); + my_kernel_t func(sc, strm); + + auto sycl_kernel = p.get_kernel(); + cgh.parallel_for(sycl_kernel, cl::sycl::range<1>(1), func); + }); + q.wait(); + + return 0; +} diff --git a/sycl/test/on-device/spec_const/composite-type.cpp b/sycl/test/on-device/spec_const/composite-type.cpp new file mode 100644 index 0000000000000..6da819f3bbf20 --- /dev/null +++ b/sycl/test/on-device/spec_const/composite-type.cpp @@ -0,0 +1,97 @@ +// UNSUPPORTED: cuda +// +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %RUN_ON_HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// +// The test checks that the specialization constant feature works correctly with +// composite types: toolchain processes them correctly and runtime can correctly +// execute the program. + +#include + +#include +#include + +using namespace sycl; +class Test; + +struct A { + int a; + float b; +}; + +struct POD { + A a[2]; + int b; +}; + +using MyPODConst = POD; + +int global_val = 10; + +// Fetch a value at runtime. +int get_value() { return global_val; } + +int main(int argc, char **argv) { + cl::sycl::queue q(default_selector{}, [](exception_list l) { + for (auto ep : l) { + try { + std::rethrow_exception(ep); + } catch (cl::sycl::exception &e0) { + std::cout << e0.what(); + } catch (std::exception &e1) { + std::cout << e1.what(); + } catch (...) { + std::cout << "*** catch (...)\n"; + } + } + }); + + std::cout << "Running on " << q.get_device().get_info() + << "\n"; + std::cout << "global_val = " << global_val << "\n"; + cl::sycl::program program(q.get_context()); + + int goldi = (int)get_value(); + float goldf = (float)get_value(); + + POD gold = {{{goldi, goldf}, {goldi, goldf}}, goldi}; + + cl::sycl::ONEAPI::experimental::spec_constant pod = + program.set_spec_constant(gold); + + program.build_with_kernel_type(); + + POD result; + try { + cl::sycl::buffer bufi(&result, 1); + + q.submit([&](cl::sycl::handler &cgh) { + auto acci = bufi.get_access(cgh); + cgh.single_task(program.get_kernel(), + [=]() { acci[0] = pod.get(); }); + }); + } catch (cl::sycl::exception &e) { + std::cout << "*** Exception caught: " << e.what() << "\n"; + return 1; + } + + bool passed = false; + + std::cout << result.a[0].a << " " << result.a[0].b << "\n"; + std::cout << result.a[1].a << " " << result.a[1].b << "\n"; + std::cout << result.b << "\n\n"; + + std::cout << gold.a[0].a << " " << gold.a[0].b << "\n"; + std::cout << gold.a[1].a << " " << gold.a[1].b << "\n"; + std::cout << gold.b << "\n\n"; + + if (0 == std::memcmp(&result, &gold, sizeof(POD))) { + passed = true; + } + + std::cout << (passed ? "passed\n" : "FAILED\n"); + return passed ? 0 : 1; +}