Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions clang/include/clang/Basic/Attr.td
Original file line number Diff line number Diff line change
Expand Up @@ -1854,10 +1854,10 @@ def SYCLIntelFPGAInitiationInterval : StmtAttr {
let Documentation = [SYCLIntelFPGAInitiationIntervalAttrDocs];
}

def SYCLIntelFPGAMaxConcurrency : StmtAttr {
def SYCLIntelFPGAMaxConcurrency : InheritableAttr {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def SYCLIntelFPGAMaxConcurrency : InheritableAttr {
def SYCLIntelFPGAMaxConcurrency : DeclOrStmtAttr {

let Spellings = [CXX11<"intelfpga","max_concurrency">,
CXX11<"intel","max_concurrency">];
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt],
let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt, Function],
ErrorDiag, "'for', 'while', and 'do' statements">;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The text of the diagnostic needs to be updated as well.

let Args = [ExprArgument<"NThreadsExpr">];
let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost];
Expand Down
11 changes: 7 additions & 4 deletions clang/include/clang/Basic/AttrDocs.td
Original file line number Diff line number Diff line change
Expand Up @@ -2799,10 +2799,10 @@ def SYCLIntelFPGAMaxConcurrencyAttrDocs : Documentation {
let Category = DocCatVariable;
let Heading = "intel::max_concurrency";
let Content = [{
This attribute applies to a loop. Indicates that the loop should allow no more
than N threads or iterations to execute it simultaneously. N must be a non
negative integer. '0' indicates the max_concurrency case to be unbounded. Cannot
be applied multiple times to the same loop.
This attribute applies to a loop or a function. Indicates that the loop/function
should allow no more than N threads or iterations to execute it simultaneously.
N must be a non negative integer. '0' indicates the max_concurrency case to be
unbounded. Cannot be applied multiple times to the same loop.

.. code-block:: c++

Expand All @@ -2811,10 +2811,13 @@ be applied multiple times to the same loop.
[[intel::max_concurrency(2)]] for (int i = 0; i != 10; ++i) a[i] = 0;
}

[[intel::component_max_concurrency(2)]] void foo1 { }
Copy link
Contributor

@smanna12 smanna12 Mar 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are adding new spelling here. You did not add it inside Attr.td file and also CodeGen test is using existing loop attribute spelling [[intel::max_concurrency()]]. Are we going to use same loop attribute spelling as function attribute?


template<int N>
void bar() {
[[intel::max_concurrency(N)]] for(;;) { }
}
[[intel::component_max_concurrency(N)]] void bar1() { }
Copy link
Contributor

@smanna12 smanna12 Mar 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here, new attribute spelling, you did not use this spelling


}];
}
Expand Down
4 changes: 3 additions & 1 deletion clang/include/clang/Basic/AttributeCommonInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,9 @@ class AttributeCommonInfo {
ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim ||
ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset ||
ParsedAttr == AT_SYCLIntelUseStallEnableClusters ||
ParsedAttr == AT_SYCLIntelLoopFuse || ParsedAttr == AT_SYCLSimd)
ParsedAttr == AT_SYCLIntelLoopFuse ||
ParsedAttr == AT_SYCLSimd ||
ParsedAttr == AT_SYCLIntelFPGAMaxConcurrency)
return true;

return false;
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Sema/Sema.h
Original file line number Diff line number Diff line change
Expand Up @@ -10295,6 +10295,11 @@ class Sema final {
/// declaration.
void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID);

/// AddSYCLIntelFPGAMaxConcurrencyAttr - Adds a max_component attribute to a
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

max_component -- did you mean max_concurrency?

/// particular declaration.
void AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D,
const AttributeCommonInfo &CI,
Expr *E);
bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type);
bool checkAllowedSYCLInitializer(VarDecl *VD,
bool CheckValueDependent = false);
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/CodeGen/CodeGenFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,15 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD,
llvm::ConstantAsMetadata::get(Builder.getInt32(1))};
Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs));
}

if (const SYCLIntelFPGAMaxConcurrencyAttr *A =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (const SYCLIntelFPGAMaxConcurrencyAttr *A =
if (const auto *A =

Because the type is spelled out explicitly in the initializer, this is a good time to use auto.

FD->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
const auto *CE = dyn_cast<ConstantExpr>(A->getNThreadsExpr());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you are missing an assert here before ArgVal.
assert(CE && "Not an integer constant expression");

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const auto *CE = dyn_cast<ConstantExpr>(A->getNThreadsExpr());
const auto *CE = cast<ConstantExpr>(A->getNThreadsExpr());

No need for a dyn_cast<> followed by an assert.

Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Optional<llvm::APSInt> ArgVal = CE->getResultAsAPSInt();
llvm::APSInt ArgVal = CE->getResultAsAPSInt();

The function doesn't return an Optional, so I assume this was unintentional.

llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get(
Builder.getInt32(ArgVal->getSExtValue()))};
Fn->setMetadata("max_concurrency", llvm::MDNode::get(Context, AttrMDArgs));
}
}

/// Determine whether the function F ends with a return stmt.
Expand Down
42 changes: 42 additions & 0 deletions clang/lib/Sema/SemaDeclAttr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6294,6 +6294,45 @@ static void handleSYCLIntelPipeIOAttr(Sema &S, Decl *D,
S.addSYCLIntelPipeIOAttr(D, Attr, E);
}

void Sema::AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D,
const AttributeCommonInfo &CI,
Expr *E) {
if (!E->isValueDependent()) {
llvm::APSInt ArgVal;
ExprResult Res = VerifyIntegerConstantExpression(E, &ArgVal);
if (Res.isInvalid())
return;
E = Res.get();

// This attribute requires a strictly positive value.
if (ArgVal <= 0) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Document description says non-negative values are allowed, and that zero means unbounded.

Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer)
<< CI << /*positive*/ 0;
return;
}

if (const auto *DeclAttr = D->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>()) {
const auto *DeclExpr =
dyn_cast<ConstantExpr>(DeclAttr->getNThreadsExpr());
if (DeclExpr && ArgVal != DeclExpr->getResultAsAPSInt()) {
Diag(CI.getLoc(), diag::warn_duplicate_attribute) << CI;
Diag(DeclAttr->getLoc(), diag::note_previous_attribute);
return;
}
}
}

D->addAttr(::new (Context) SYCLIntelFPGAMaxConcurrencyAttr(Context, CI, E));
}

static void handleSYCLIntelFPGAMaxConcurrencyAttr(Sema &S, Decl *D,
const ParsedAttr &A) {
S.CheckDeprecatedSYCLAttributeSpelling(A);

Expr *E = A.getArgAsExpr(0);
S.AddSYCLIntelFPGAMaxConcurrencyAttr(D, A, E);
}

namespace {
struct IntrinToName {
uint32_t Id;
Expand Down Expand Up @@ -9547,6 +9586,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
case ParsedAttr::AT_SYCLIntelPipeIO:
handleSYCLIntelPipeIOAttr(S, D, AL);
break;
case ParsedAttr::AT_SYCLIntelFPGAMaxConcurrency:
handleSYCLIntelFPGAMaxConcurrencyAttr(S, D, AL);
break;

// Swift attributes.
case ParsedAttr::AT_SwiftAsyncName:
Expand Down
5 changes: 4 additions & 1 deletion clang/lib/Sema/SemaSYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,8 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
Attrs.insert(A);
}
}
if (auto *A = FD->getAttr<SYCLIntelFPGAMaxConcurrencyAttr>())
Attrs.insert(A);
Copy link
Contributor

@smanna12 smanna12 Mar 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we going to propagate this attribute to the caller?


// TODO: vec_len_hint should be handled here

Expand Down Expand Up @@ -3316,7 +3318,8 @@ void Sema::MarkDevice(void) {
case attr::Kind::SYCLIntelNoGlobalWorkOffset:
case attr::Kind::SYCLIntelUseStallEnableClusters:
case attr::Kind::SYCLIntelLoopFuse:
case attr::Kind::SYCLSimd: {
case attr::Kind::SYCLSimd:
case attr::Kind::SYCLIntelFPGAMaxConcurrency: {
if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody &&
!KernelBody->getAttr<SYCLSimdAttr>()) {
// Usual kernel can't call ESIMD functions.
Expand Down
17 changes: 17 additions & 0 deletions clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,17 @@ static void instantiateIntelSYCLFunctionAttr(
S.addIntelSingleArgAttr<AttrName>(New, *Attr, Result.getAs<Expr>());
}

template <typename AttrName>
static void instantiateSYCLIntelFPGAMaxConcurrencyAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const SYCLIntelFPGAMaxConcurrencyAttr *A, Decl *New) {
EnterExpressionEvaluationContext Unevaluated(
S, Sema::ExpressionEvaluationContext::ConstantEvaluated);
ExprResult Result = S.SubstExpr(A->getNThreadsExpr(), TemplateArgs);
if (!Result.isInvalid())
S.AddSYCLIntelFPGAMaxConcurrencyAttr(New, *A, Result.getAs<Expr>());
}

static void instantiateIntelFPGAPrivateCopiesAttr(
Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
const IntelFPGAPrivateCopiesAttr *A, Decl *New) {
Expand Down Expand Up @@ -940,6 +951,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
*this, TemplateArgs, SYCLIntelMaxWorkGroupSize, New);
continue;
}
if (const auto *SYCLIntelMaxConcurrency =
dyn_cast<SYCLIntelFPGAMaxConcurrencyAttr>(TmplAttr)) {
instantiateSYCLIntelFPGAMaxConcurrencyAttr<SYCLIntelFPGAMaxConcurrencyAttr>(
*this, TemplateArgs, SYCLIntelMaxConcurrency, New);
continue;
}
// Existing DLL attribute on the instantiation takes precedence.
if (TmplAttr->getKind() == attr::DLLExport ||
TmplAttr->getKind() == attr::DLLImport) {
Expand Down
16 changes: 0 additions & 16 deletions clang/test/CodeGenSYCL/intel-fpga-loops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
// CHECK: br label %for.cond, !llvm.loop ![[MD_DLP:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_II:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_II_2:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_2:[0-9]+]]
// CHECK: br label %for.cond, !llvm.loop ![[MD_LC:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_LC_2:[0-9]+]]
// CHECK: br label %for.cond13, !llvm.loop ![[MD_LC_3:[0-9]+]]
Expand Down Expand Up @@ -35,19 +33,6 @@ void ii() {
a[i] = 0;
}

template <int A>
void max_concurrency() {
int a[10];
// CHECK: ![[MD_MC]] = distinct !{![[MD_MC]], ![[MP]], ![[MD_max_concurrency:[0-9]+]]}
// CHECK-NEXT: ![[MD_max_concurrency]] = !{!"llvm.loop.max_concurrency.count", i32 0}
[[intel::max_concurrency(A)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// CHECK: ![[MD_MC_2]] = distinct !{![[MD_MC_2]], ![[MP]], ![[MD_max_concurrency_2:[0-9]+]]}
// CHECK-NEXT: ![[MD_max_concurrency_2]] = !{!"llvm.loop.max_concurrency.count", i32 4}
[[intel::max_concurrency(4)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

template <int A>
void loop_coalesce() {
int a[10];
Expand Down Expand Up @@ -100,7 +85,6 @@ int main() {
kernel_single_task<class kernel_function>([]() {
disable_loop_pipelining();
ii<4>();
max_concurrency<0>();
loop_coalesce<2>();
max_interleaving<3>();
speculated_iterations<4>();
Expand Down
91 changes: 91 additions & 0 deletions clang/test/CodeGenSYCL/max-concurrency.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice -Wno-sycl-2017-compat -emit-llvm -o - %s | FileCheck %s

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a comment here describing what the test does.

#include "sycl.hpp"

// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]]
// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_1:[0-9]+]]

// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name1"() #0 {{.*}} !max_concurrency !16
// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name2"() #0 {{.*}} !max_concurrency ![[NUM2:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name3"() #0 {{.*}} !max_concurrency ![[NUM3:[0-9]+]]
// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name4"() #0 {{.*}} !max_concurrency ![[NUM1:[0-9]+]]

template <int A>
void max_concurrency() {
int a[10];
// CHECK: ![[MD_MC]] = distinct !{![[MD_MC]], ![[MP:[0-9]+]], ![[MD_max_concurrency:[0-9]+]]}
// CHECK-NEXT: ![[MP]] = !{!"llvm.loop.mustprogress"}
// CHECK-NEXT: ![[MD_max_concurrency]] = !{!"llvm.loop.max_concurrency.count", i32 5}
[[intel::max_concurrency(A)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
// CHECK: ![[MD_MC_1]] = distinct !{![[MD_MC_1]], ![[MP]], ![[MD_max_concurrency_1:[0-9]+]]}
// CHECK-NEXT: ![[MD_max_concurrency_1]] = !{!"llvm.loop.max_concurrency.count", i32 4}
[[intel::max_concurrency(4)]] for (int i = 0; i != 10; ++i)
a[i] = 0;
}

// CHECK: !16 = !{i32 4}
// CHECK: !17 = !{i32 2}
// CHECK: !18 = !{i32 3}

template <typename name, typename Func>
Copy link
Contributor

@smanna12 smanna12 Mar 16, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use updated guidelines for new FE tests:
Please refer section “Guidelines for adding DPC++ in-tree LIT tests (DPC++ Clang FE tests)” in the https://github.com/intel/llvm/blob/sycl/CONTRIBUTING.md guide for the suggested guidelines

__attribute__((sycl_kernel)) void kernel_single_task_1(const Func &kernelFunc) {
kernelFunc();
}

using namespace cl::sycl;
queue q;

class Functor1 {
public:
[[intel::max_concurrency(4)]] void operator()() const {}
};

[[intel::max_concurrency(2)]] void foo() {}

class Functor2 {
public:
void operator()() const {
foo();
}
};

template <int NT>
class Functor3 {
public:
[[intel::max_concurrency(NT)]] void operator()() const {}
};

template <int NT>
[[intel::reqd_sub_group_size(NT)]] void func() {}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean to use [[intel::max_concurrency(NT)]] here?


int main() {
kernel_single_task_1<class kernel_function>([]() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

New guidelines suggest using kernel_single_task from the sycl.hpp header instead.

max_concurrency<5>();
});

q.submit([&](handler &h) {
Functor1 f1;
h.single_task<class kernel_name1>(f1);

Functor2 f2;
h.single_task<class kernel_name2>(f2);


h.single_task<class kernel_name3>(
[]() [[intel::max_concurrency(3)]]{});

Functor3<4> f3;
h.single_task<class kernel_name4>(f3);

h.single_task<class kernel_name5>([]() {
func<2>();
});

});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you also add an example that uses '0'?



return 0;
}