Skip to content
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions clang/include/clang/Basic/DiagnosticSemaKinds.td
Original file line number Diff line number Diff line change
Expand Up @@ -3192,6 +3192,9 @@ def warn_dllimport_dropped_from_inline_function : Warning<
InGroup<IgnoredAttributes>;
def warn_attribute_ignored : Warning<"%0 attribute ignored">,
InGroup<IgnoredAttributes>;
def warn_attribute_on_direct_kernel_callee_only : Warning<"%0 attribute allowed"
" only on function directly called from kernel; attribute ignored">,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

only on function -> only on a function

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

"called from kernel" is also a bit awkward. called from a kernel? called from a kernel function?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

InGroup<IgnoredAttributes>;
def warn_nothrow_attribute_ignored : Warning<"'nothrow' attribute conflicts with"
" exception specification; attribute ignored">,
InGroup<IgnoredAttributes>;
Expand Down
1 change: 1 addition & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -12994,6 +12994,7 @@ class Sema final {

bool isKnownGoodSYCLDecl(const Decl *D);
void checkSYCLDeviceVarDecl(VarDecl *Var);
void copySYCLKernelAttrs(const CXXRecordDecl *KernelObj);
void ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, MangleContext &MC);
void MarkDevice();

Expand Down
148 changes: 104 additions & 44 deletions clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,36 @@ static int64_t getIntExprValue(const Expr *E, ASTContext &Ctx) {
return E->getIntegerConstantExpr(Ctx)->getSExtValue();
}

// Collect function attributes related to SYCL
static void collectSYCLAttributes(Sema &S, FunctionDecl *FD,
llvm::SmallVector<Attr *, 4> &Attrs,
bool DirectlyCalled = true) {
if (!FD->hasAttrs())
return;

llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) {
return isa<IntelReqdSubGroupSizeAttr, ReqdWorkGroupSizeAttr,
SYCLIntelKernelArgsRestrictAttr, SYCLIntelNumSimdWorkItemsAttr,
SYCLIntelSchedulerTargetFmaxMhzAttr,
SYCLIntelMaxWorkGroupSizeAttr, SYCLIntelMaxGlobalWorkDimAttr,
SYCLIntelNoGlobalWorkOffsetAttr, SYCLSimdAttr>(A);
});

// Allow the kernel attribute "use_stall_enable_clusters" only on lambda
// functions and function objects called directly from a kernel.
// For all other cases, emit a warning and ignore.
if (auto *A = FD->getAttr<SYCLIntelUseStallEnableClustersAttr>()) {
if (DirectlyCalled) {
Attrs.push_back(A);
} else {
S.Diag(A->getLocation(),
diag::warn_attribute_on_direct_kernel_callee_only)
<< A;
FD->dropAttr<SYCLIntelUseStallEnableClustersAttr>();
}
}
}

class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
// Used to keep track of the constexpr depth, so we know whether to skip
// diagnostics.
Expand Down Expand Up @@ -477,7 +507,7 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
// Returns the kernel body function found during traversal.
FunctionDecl *
CollectPossibleKernelAttributes(FunctionDecl *SYCLKernel,
llvm::SmallPtrSet<Attr *, 4> &Attrs) {
llvm::SmallVector<Attr *, 4> &Attrs) {
typedef std::pair<FunctionDecl *, FunctionDecl *> ChildParentPair;
llvm::SmallPtrSet<FunctionDecl *, 16> Visited;
llvm::SmallVector<ChildParentPair, 16> WorkList;
Expand Down Expand Up @@ -508,55 +538,23 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
"function can be called");
KernelBody = FD;
}

WorkList.pop_back();
if (!Visited.insert(FD).second)
continue; // We've already seen this Decl

if (auto *A = FD->getAttr<IntelReqdSubGroupSizeAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<ReqdWorkGroupSizeAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelKernelArgsRestrictAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelNumSimdWorkItemsAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelSchedulerTargetFmaxMhzAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelMaxWorkGroupSizeAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelMaxGlobalWorkDimAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLIntelNoGlobalWorkOffsetAttr>())
Attrs.insert(A);

if (auto *A = FD->getAttr<SYCLSimdAttr>())
Attrs.insert(A);

// Allow the kernel attribute "use_stall_enable_clusters" only on lambda
// functions and function objects that are called directly from a kernel
// (i.e. the one passed to the single_task or parallel_for functions).
// For all other cases, emit a warning and ignore.
if (auto *A = FD->getAttr<SYCLIntelUseStallEnableClustersAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
} else {
SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A;
FD->dropAttr<SYCLIntelUseStallEnableClustersAttr>();
}
}
// Gather all attributes of FD that are SYCL related.
// Some attributes are allowed only on lambda functions and function
// objects called directly from a kernel (i.e. the one passed to the
// single_task or parallel_for functions).
bool DirectlyCalled = (ParentFD == SYCLKernel);
collectSYCLAttributes(SemaRef, FD, Attrs, DirectlyCalled);

// Attribute "loop_fuse" can be applied explicitly on kernel function.
// Attribute should not be propagated from device functions to kernel.
if (auto *A = FD->getAttr<SYCLIntelLoopFuseAttr>()) {
if (ParentFD == SYCLKernel) {
Attrs.insert(A);
Attrs.push_back(A);
}
}

Expand Down Expand Up @@ -3149,6 +3147,62 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc,
KernelFunc->setInvalidDecl();
}

// For a wrapped parallel_for, copy attributes from original
// kernel to wrapped kernel.
void Sema::copySYCLKernelAttrs(const CXXRecordDecl *KernelObj) {
// Get the operator() function of the wrapper
CXXMethodDecl *OpParens = nullptr;
for (auto *MD : KernelObj->methods()) {
if (MD->getOverloadedOperator() == OO_Call) {
OpParens = MD;
break;
}
}
assert(OpParens && "invalid kernel object");

typedef std::pair<FunctionDecl *, FunctionDecl *> ChildParentPair;
llvm::SmallPtrSet<FunctionDecl *, 16> Visited;
llvm::SmallVector<ChildParentPair, 16> WorkList;
WorkList.push_back({OpParens, nullptr});
FunctionDecl *KernelBody = nullptr;

CallGraph SYCLCG;
SYCLCG.addToCallGraph(getASTContext().getTranslationUnitDecl());
while (!WorkList.empty()) {
FunctionDecl *FD = WorkList.back().first;
FunctionDecl *ParentFD = WorkList.back().second;

if ((ParentFD == OpParens) && isSYCLKernelBodyFunction(FD)) {
KernelBody = FD;
break;
}

WorkList.pop_back();
if (!Visited.insert(FD).second)
continue; // We've already seen this Decl

CallGraphNode *N = SYCLCG.getNode(FD);
if (!N)
continue;

for (const CallGraphNode *CI : *N) {
if (auto *Callee = dyn_cast<FunctionDecl>(CI->getDecl())) {
Callee = Callee->getMostRecentDecl();
if (!Visited.count(Callee))
WorkList.push_back({Callee, FD});
}
}
}

assert(KernelBody && "improper parallel_for wrap");
if (KernelBody) {
llvm::SmallVector<Attr *, 4> Attrs;
collectSYCLAttributes(*this, KernelBody, Attrs);
if (!Attrs.empty())
llvm::for_each(Attrs, [OpParens](Attr *A) { OpParens->addAttr(A); });
}
}

// Generates the OpenCL kernel using KernelCallerFunc (kernel caller
// function) defined is SYCL headers.
// Generated OpenCL kernel contains the body of the kernel caller function,
Expand Down Expand Up @@ -3181,14 +3235,20 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc,
if (KernelObj->isInvalidDecl())
return;

bool IsSIMDKernel = isESIMDKernelType(KernelObj);

// Calculate both names, since Integration headers need both.
std::string CalculatedName, StableName;
std::tie(CalculatedName, StableName) =
constructKernelName(*this, KernelCallerFunc, MC);
StringRef KernelName(getLangOpts().SYCLUnnamedLambda ? StableName
: CalculatedName);

// Attributes of a user-written SYCL kernel must be copied to the internally
// generated alternative kernel, identified by a known string in its name.
if (StableName.find("__pf_kernel_wrapper") != std::string::npos)
copySYCLKernelAttrs(KernelObj);

bool IsSIMDKernel = isESIMDKernelType(KernelObj);

SyclKernelDeclCreator kernel_decl(*this, KernelName, KernelObj->getLocation(),
KernelCallerFunc->isInlined(),
IsSIMDKernel);
Expand Down Expand Up @@ -3226,7 +3286,7 @@ void Sema::MarkDevice(void) {
Marker.CollectKernelSet(SYCLKernel, SYCLKernel, VisitedSet);

// Let's propagate attributes from device functions to a SYCL kernels
llvm::SmallPtrSet<Attr *, 4> Attrs;
llvm::SmallVector<Attr *, 4> Attrs;
// This function collects all kernel attributes which might be applied to
// a device functions, but need to be propagated down to callers, i.e.
// SYCL kernels
Expand Down
2 changes: 1 addition & 1 deletion clang/test/SemaSYCL/stall_enable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
using namespace cl::sycl;
queue q;

[[intel::use_stall_enable_clusters]] void test() {} //expected-warning{{'use_stall_enable_clusters' attribute ignored}}
[[intel::use_stall_enable_clusters]] void test() {} //expected-warning{{'use_stall_enable_clusters' attribute allowed only on function directly called from kernel}}

#ifdef TRIGGER_ERROR
[[intel::use_stall_enable_clusters(1)]] void bar1() {} // expected-error{{'use_stall_enable_clusters' attribute takes no arguments}}
Expand Down
22 changes: 22 additions & 0 deletions sycl/test/kernel_param/attr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
// RUN: %clangxx -fsycl-device-only -Xclang -fsycl-is-device -emit-llvm %s -S -o %t.ll -I %sycl_include
// RUN: FileCheck %s --input-file %t.ll

// Check copying of parallel_for kernel attributes to wrapper kernel.

#include <CL/sycl.hpp>
using namespace cl::sycl;

int main() {
range<1> Size{10};
{
queue myQueue;
myQueue.submit([&](handler &cgh) {
cgh.parallel_for<class C>(Size, [=](item<1> ITEM)
[[intel::reqd_work_group_size(4)]]{});
});
}

return 0;
}

// CHECK: define {{.*}}spir_kernel void @{{.*}}__pf_kernel_wrapper{{.*}}reqd_work_group_size