-
Notifications
You must be signed in to change notification settings - Fork 807
[SYCL][FPGA] Allowing max-concurrency attribute on functions. #3362
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1854,10 +1854,10 @@ def SYCLIntelFPGAInitiationInterval : StmtAttr { | |
let Documentation = [SYCLIntelFPGAInitiationIntervalAttrDocs]; | ||
} | ||
|
||
def SYCLIntelFPGAMaxConcurrency : StmtAttr { | ||
def SYCLIntelFPGAMaxConcurrency : InheritableAttr { | ||
let Spellings = [CXX11<"intelfpga","max_concurrency">, | ||
CXX11<"intel","max_concurrency">]; | ||
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt], | ||
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt, Function], | ||
ErrorDiag, "'for', 'while', and 'do' statements">; | ||
|
||
let Args = [ExprArgument<"NThreadsExpr">]; | ||
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2799,10 +2799,10 @@ def SYCLIntelFPGAMaxConcurrencyAttrDocs : Documentation { | |
let Category = DocCatVariable; | ||
let Heading = "intel::max_concurrency"; | ||
let Content = [{ | ||
This attribute applies to a loop. Indicates that the loop should allow no more | ||
than N threads or iterations to execute it simultaneously. N must be a non | ||
negative integer. '0' indicates the max_concurrency case to be unbounded. Cannot | ||
be applied multiple times to the same loop. | ||
This attribute applies to a loop or a function. Indicates that the loop/function | ||
should allow no more than N threads or iterations to execute it simultaneously. | ||
N must be a non negative integer. '0' indicates the max_concurrency case to be | ||
unbounded. Cannot be applied multiple times to the same loop. | ||
|
||
.. code-block:: c++ | ||
|
||
|
@@ -2811,10 +2811,13 @@ be applied multiple times to the same loop. | |
[[intel::max_concurrency(2)]] for (int i = 0; i != 10; ++i) a[i] = 0; | ||
} | ||
|
||
[[intel::component_max_concurrency(2)]] void foo1 { } | ||
|
||
|
||
template<int N> | ||
void bar() { | ||
[[intel::max_concurrency(N)]] for(;;) { } | ||
} | ||
[[intel::component_max_concurrency(N)]] void bar1() { } | ||
|
||
|
||
}]; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10295,6 +10295,11 @@ class Sema final { | |
/// declaration. | ||
void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID); | ||
|
||
/// AddSYCLIntelFPGAMaxConcurrencyAttr - Adds a max_component attribute to a | ||
|
||
/// particular declaration. | ||
void AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D, | ||
const AttributeCommonInfo &CI, | ||
Expr *E); | ||
bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type); | ||
bool checkAllowedSYCLInitializer(VarDecl *VD, | ||
bool CheckValueDependent = false); | ||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -730,6 +730,15 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, | |||||
llvm::ConstantAsMetadata::get(Builder.getInt32(1))}; | ||||||
Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs)); | ||||||
} | ||||||
|
||||||
if (const SYCLIntelFPGAMaxConcurrencyAttr *A = | ||||||
|
if (const SYCLIntelFPGAMaxConcurrencyAttr *A = | |
if (const auto *A = |
Because the type is spelled out explicitly in the initializer, this is a good time to use auto
.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you are missing an assert here before ArgVal.
assert(CE && "Not an integer constant expression");
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
const auto *CE = dyn_cast<ConstantExpr>(A->getNThreadsExpr()); | |
const auto *CE = cast<ConstantExpr>(A->getNThreadsExpr()); |
No need for a dyn_cast<>
followed by an assert
.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt(); | |
llvm::APSInt ArgVal = CE->getResultAsAPSInt(); |
The function doesn't return an Optional
, so I assume this was unintentional.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6294,6 +6294,45 @@ static void handleSYCLIntelPipeIOAttr(Sema &S, Decl *D, | |
S.addSYCLIntelPipeIOAttr(D, Attr, E); | ||
} | ||
|
||
void Sema::AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D, | ||
const AttributeCommonInfo &CI, | ||
Expr *E) { | ||
if (!E->isValueDependent()) { | ||
llvm::APSInt ArgVal; | ||
ExprResult Res = VerifyIntegerConstantExpression(E, &ArgVal); | ||
if (Res.isInvalid()) | ||
return; | ||
E = Res.get(); | ||
|
||
// This attribute requires a strictly positive value. | ||
if (ArgVal <= 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Document description says non-negative values are allowed, and that zero means unbounded. |
||
Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer) | ||
<< CI << /*positive*/ 0; | ||
return; | ||
} | ||
|
||
if (const auto *DeclAttr = D->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) { | ||
const auto *DeclExpr = | ||
dyn_cast<ConstantExpr>(DeclAttr->getNThreadsExpr()); | ||
if (DeclExpr && ArgVal != DeclExpr->getResultAsAPSInt()) { | ||
Diag(CI.getLoc(), diag::warn_duplicate_attribute) << CI; | ||
Diag(DeclAttr->getLoc(), diag::note_previous_attribute); | ||
return; | ||
} | ||
} | ||
} | ||
|
||
D->addAttr(::new (Context) SYCLIntelFPGAMaxConcurrencyAttr(Context, CI, E)); | ||
} | ||
|
||
static void handleSYCLIntelFPGAMaxConcurrencyAttr(Sema &S, Decl *D, | ||
const ParsedAttr &A) { | ||
S.CheckDeprecatedSYCLAttributeSpelling(A); | ||
|
||
Expr *E = A.getArgAsExpr(0); | ||
S.AddSYCLIntelFPGAMaxConcurrencyAttr(D, A, E); | ||
} | ||
|
||
namespace { | ||
struct IntrinToName { | ||
uint32_t Id; | ||
|
@@ -9547,6 +9586,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, | |
case ParsedAttr::AT_SYCLIntelPipeIO: | ||
handleSYCLIntelPipeIOAttr(S, D, AL); | ||
break; | ||
case ParsedAttr::AT_SYCLIntelFPGAMaxConcurrency: | ||
handleSYCLIntelFPGAMaxConcurrencyAttr(S, D, AL); | ||
break; | ||
|
||
// Swift attributes. | ||
case ParsedAttr::AT_SwiftAsyncName: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -559,6 +559,8 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> { | |
Attrs.insert(A); | ||
} | ||
} | ||
if (auto *A = FD->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) | ||
Attrs.insert(A); | ||
|
||
|
||
// TODO: vec_len_hint should be handled here | ||
|
||
|
@@ -3316,7 +3318,8 @@ void Sema::MarkDevice(void) { | |
case attr::Kind::SYCLIntelNoGlobalWorkOffset: | ||
case attr::Kind::SYCLIntelUseStallEnableClusters: | ||
case attr::Kind::SYCLIntelLoopFuse: | ||
case attr::Kind::SYCLSimd: { | ||
case attr::Kind::SYCLSimd: | ||
case attr::Kind::SYCLIntelFPGAMaxConcurrency: { | ||
if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody && | ||
!KernelBody->getAttr<SYCLSimdAttr>()) { | ||
// Usual kernel can't call ESIMD functions. | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice -Wno-sycl-2017-compat -emit-llvm -o - %s | FileCheck %s | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please add a comment here describing what the test does. |
||
#include "sycl.hpp" | ||
|
||
// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]] | ||
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_1:[0-9]+]] | ||
|
||
// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name1"() #0 {{.*}} !max_concurrency !16 | ||
// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name2"() #0 {{.*}} !max_concurrency ![[NUM2:[0-9]+]] | ||
// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name3"() #0 {{.*}} !max_concurrency ![[NUM3:[0-9]+]] | ||
// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name4"() #0 {{.*}} !max_concurrency ![[NUM1:[0-9]+]] | ||
|
||
template <int A> | ||
void max_concurrency() { | ||
int a[10]; | ||
// CHECK: ![[MD_MC]] = distinct !{![[MD_MC]], ![[MP:[0-9]+]], ![[MD_max_concurrency:[0-9]+]]} | ||
// CHECK-NEXT: ![[MP]] = !{!"llvm.loop.mustprogress"} | ||
// CHECK-NEXT: ![[MD_max_concurrency]] = !{!"llvm.loop.max_concurrency.count", i32 5} | ||
[[intel::max_concurrency(A)]] for (int i = 0; i != 10; ++i) | ||
a[i] = 0; | ||
// CHECK: ![[MD_MC_1]] = distinct !{![[MD_MC_1]], ![[MP]], ![[MD_max_concurrency_1:[0-9]+]]} | ||
// CHECK-NEXT: ![[MD_max_concurrency_1]] = !{!"llvm.loop.max_concurrency.count", i32 4} | ||
[[intel::max_concurrency(4)]] for (int i = 0; i != 10; ++i) | ||
a[i] = 0; | ||
} | ||
|
||
// CHECK: !16 = !{i32 4} | ||
// CHECK: !17 = !{i32 2} | ||
// CHECK: !18 = !{i32 3} | ||
|
||
template <typename name, typename Func> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please use updated guidelines for new FE tests: |
||
__attribute__((sycl_kernel)) void kernel_single_task_1(const Func &kernelFunc) { | ||
kernelFunc(); | ||
} | ||
|
||
using namespace cl::sycl; | ||
queue q; | ||
|
||
class Functor1 { | ||
public: | ||
[[intel::max_concurrency(4)]] void operator()() const {} | ||
}; | ||
|
||
[[intel::max_concurrency(2)]] void foo() {} | ||
|
||
class Functor2 { | ||
public: | ||
void operator()() const { | ||
foo(); | ||
} | ||
}; | ||
|
||
template <int NT> | ||
class Functor3 { | ||
public: | ||
[[intel::max_concurrency(NT)]] void operator()() const {} | ||
}; | ||
|
||
template <int NT> | ||
[[intel::reqd_sub_group_size(NT)]] void func() {} | ||
|
||
|
||
int main() { | ||
kernel_single_task_1<class kernel_function>([]() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. New guidelines suggest using kernel_single_task from the sycl.hpp header instead. |
||
max_concurrency<5>(); | ||
}); | ||
|
||
q.submit([&](handler &h) { | ||
Functor1 f1; | ||
h.single_task<class kernel_name1>(f1); | ||
|
||
Functor2 f2; | ||
h.single_task<class kernel_name2>(f2); | ||
|
||
|
||
h.single_task<class kernel_name3>( | ||
[]() [[intel::max_concurrency(3)]]{}); | ||
|
||
Functor3<4> f3; | ||
h.single_task<class kernel_name4>(f3); | ||
|
||
h.single_task<class kernel_name5>([]() { | ||
func<2>(); | ||
}); | ||
|
||
}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you also add an example that uses '0'? |
||
|
||
|
||
return 0; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.