From ad38dc696a3f579e048a8ca599246373389c3138 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Mon, 15 Mar 2021 16:02:15 -0400 Subject: [PATCH 1/3] [SYCL][FPGA] Allowing max-concurrency attribute on functions. --- clang/include/clang/Basic/Attr.td | 4 +- clang/include/clang/Basic/AttrDocs.td | 11 ++- .../include/clang/Basic/AttributeCommonInfo.h | 4 +- clang/include/clang/Sema/Sema.h | 5 + clang/lib/CodeGen/CodeGenFunction.cpp | 9 ++ clang/lib/Sema/SemaDeclAttr.cpp | 42 +++++++++ clang/lib/Sema/SemaSYCL.cpp | 5 +- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 17 ++++ clang/test/CodeGenSYCL/intel-fpga-loops.cpp | 16 ---- clang/test/CodeGenSYCL/max-concurrency.cpp | 91 +++++++++++++++++++ 10 files changed, 180 insertions(+), 24 deletions(-) create mode 100644 clang/test/CodeGenSYCL/max-concurrency.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index f2d7ba59a183c..b4463b40dbfd1 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1854,10 +1854,10 @@ def SYCLIntelFPGAInitiationInterval : StmtAttr { let Documentation = [SYCLIntelFPGAInitiationIntervalAttrDocs]; } -def SYCLIntelFPGAMaxConcurrency : StmtAttr { +def SYCLIntelFPGAMaxConcurrency : InheritableAttr { let Spellings = [CXX11<"intelfpga","max_concurrency">, CXX11<"intel","max_concurrency">]; - let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt], + let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt, Function], ErrorDiag, "'for', 'while', and 'do' statements">; let Args = [ExprArgument<"NThreadsExpr">]; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index c0821ebd48491..62757b50e1911 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2799,10 +2799,10 @@ def SYCLIntelFPGAMaxConcurrencyAttrDocs : Documentation { let Category = DocCatVariable; let Heading = "intel::max_concurrency"; let Content = [{ -This attribute applies to a loop. Indicates that the loop should allow no more -than N threads or iterations to execute it simultaneously. N must be a non -negative integer. '0' indicates the max_concurrency case to be unbounded. Cannot -be applied multiple times to the same loop. +This attribute applies to a loop or a function. Indicates that the loop/function +should allow no more than N threads or iterations to execute it simultaneously. +N must be a non negative integer. '0' indicates the max_concurrency case to be +unbounded. Cannot be applied multiple times to the same loop. .. code-block:: c++ @@ -2811,10 +2811,13 @@ be applied multiple times to the same loop. [[intel::max_concurrency(2)]] for (int i = 0; i != 10; ++i) a[i] = 0; } + [[intel::component_max_concurrency(2)]] void foo1 { } + template void bar() { [[intel::max_concurrency(N)]] for(;;) { } } + [[intel::component_max_concurrency(N)]] void bar1() { } }]; } diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index e5f237c773627..4dfa71d733b20 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -167,7 +167,9 @@ class AttributeCommonInfo { ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim || ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset || ParsedAttr == AT_SYCLIntelUseStallEnableClusters || - ParsedAttr == AT_SYCLIntelLoopFuse || ParsedAttr == AT_SYCLSimd) + ParsedAttr == AT_SYCLIntelLoopFuse || + ParsedAttr == AT_SYCLSimd || + ParsedAttr == AT_SYCLIntelFPGAMaxConcurrency) return true; return false; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 2a3b9e3aaf6f2..b1d66ef633cf2 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10295,6 +10295,11 @@ class Sema final { /// declaration. void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID); + /// AddSYCLIntelFPGAMaxConcurrencyAttr - Adds a max_component attribute to a + /// particular declaration. + void AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D, + const AttributeCommonInfo &CI, + Expr *E); bool checkNSReturnsRetainedReturnType(SourceLocation loc, QualType type); bool checkAllowedSYCLInitializer(VarDecl *VD, bool CheckValueDependent = false); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 7343c96890e86..5e47bb791c546 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -730,6 +730,15 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::ConstantAsMetadata::get(Builder.getInt32(1))}; Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs)); } + + if (const SYCLIntelFPGAMaxConcurrencyAttr *A = + FD->getAttr()) { + const auto *CE = dyn_cast(A->getNThreadsExpr()); + Optional ArgVal = CE->getResultAsAPSInt(); + llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get( + Builder.getInt32(ArgVal->getSExtValue()))}; + Fn->setMetadata("max_concurrency", llvm::MDNode::get(Context, AttrMDArgs)); + } } /// Determine whether the function F ends with a return stmt. diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 1ab711a56efb0..f83bb6c1bc0a0 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6294,6 +6294,45 @@ static void handleSYCLIntelPipeIOAttr(Sema &S, Decl *D, S.addSYCLIntelPipeIOAttr(D, Attr, E); } +void Sema::AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D, + const AttributeCommonInfo &CI, + Expr *E) { + if (!E->isValueDependent()) { + llvm::APSInt ArgVal; + ExprResult Res = VerifyIntegerConstantExpression(E, &ArgVal); + if (Res.isInvalid()) + return; + E = Res.get(); + + // This attribute requires a strictly positive value. + if (ArgVal <= 0) { + Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer) + << CI << /*positive*/ 0; + return; + } + + if (const auto *DeclAttr = D->getAttr()) { + const auto *DeclExpr = + dyn_cast(DeclAttr->getNThreadsExpr()); + if (DeclExpr && ArgVal != DeclExpr->getResultAsAPSInt()) { + Diag(CI.getLoc(), diag::warn_duplicate_attribute) << CI; + Diag(DeclAttr->getLoc(), diag::note_previous_attribute); + return; + } + } + } + + D->addAttr(::new (Context) SYCLIntelFPGAMaxConcurrencyAttr(Context, CI, E)); +} + +static void handleSYCLIntelFPGAMaxConcurrencyAttr(Sema &S, Decl *D, + const ParsedAttr &A) { + S.CheckDeprecatedSYCLAttributeSpelling(A); + + Expr *E = A.getArgAsExpr(0); + S.AddSYCLIntelFPGAMaxConcurrencyAttr(D, A, E); +} + namespace { struct IntrinToName { uint32_t Id; @@ -9547,6 +9586,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_SYCLIntelPipeIO: handleSYCLIntelPipeIOAttr(S, D, AL); break; + case ParsedAttr::AT_SYCLIntelFPGAMaxConcurrency: + handleSYCLIntelFPGAMaxConcurrencyAttr(S, D, AL); + break; // Swift attributes. case ParsedAttr::AT_SwiftAsyncName: diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index a971793928f4c..e449220dff9de 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -559,6 +559,8 @@ class MarkDeviceFunction : public RecursiveASTVisitor { Attrs.insert(A); } } + if (auto *A = FD->getAttr()) + Attrs.insert(A); // TODO: vec_len_hint should be handled here @@ -3316,7 +3318,8 @@ void Sema::MarkDevice(void) { case attr::Kind::SYCLIntelNoGlobalWorkOffset: case attr::Kind::SYCLIntelUseStallEnableClusters: case attr::Kind::SYCLIntelLoopFuse: - case attr::Kind::SYCLSimd: { + case attr::Kind::SYCLSimd: + case attr::Kind::SYCLIntelFPGAMaxConcurrency: { if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody && !KernelBody->getAttr()) { // Usual kernel can't call ESIMD functions. diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index d56ca1b873c12..ff2413d7e2fa9 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -694,6 +694,17 @@ static void instantiateIntelSYCLFunctionAttr( S.addIntelSingleArgAttr(New, *Attr, Result.getAs()); } +template +static void instantiateSYCLIntelFPGAMaxConcurrencyAttr( + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, + const SYCLIntelFPGAMaxConcurrencyAttr *A, Decl *New) { + EnterExpressionEvaluationContext Unevaluated( + S, Sema::ExpressionEvaluationContext::ConstantEvaluated); + ExprResult Result = S.SubstExpr(A->getNThreadsExpr(), TemplateArgs); + if (!Result.isInvalid()) + S.AddSYCLIntelFPGAMaxConcurrencyAttr(New, *A, Result.getAs()); +} + static void instantiateIntelFPGAPrivateCopiesAttr( Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs, const IntelFPGAPrivateCopiesAttr *A, Decl *New) { @@ -940,6 +951,12 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, *this, TemplateArgs, SYCLIntelMaxWorkGroupSize, New); continue; } + if (const auto *SYCLIntelMaxConcurrency = + dyn_cast(TmplAttr)) { + instantiateSYCLIntelFPGAMaxConcurrencyAttr( + *this, TemplateArgs, SYCLIntelMaxConcurrency, New); + continue; + } // Existing DLL attribute on the instantiation takes precedence. if (TmplAttr->getKind() == attr::DLLExport || TmplAttr->getKind() == attr::DLLImport) { diff --git a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp index 72f4d934e2e33..61fd4f1417239 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp @@ -3,8 +3,6 @@ // CHECK: br label %for.cond, !llvm.loop ![[MD_DLP:[0-9]+]] // CHECK: br label %for.cond, !llvm.loop ![[MD_II:[0-9]+]] // CHECK: br label %for.cond2, !llvm.loop ![[MD_II_2:[0-9]+]] -// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]] -// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_2:[0-9]+]] // CHECK: br label %for.cond, !llvm.loop ![[MD_LC:[0-9]+]] // CHECK: br label %for.cond2, !llvm.loop ![[MD_LC_2:[0-9]+]] // CHECK: br label %for.cond13, !llvm.loop ![[MD_LC_3:[0-9]+]] @@ -35,19 +33,6 @@ void ii() { a[i] = 0; } -template -void max_concurrency() { - int a[10]; - // CHECK: ![[MD_MC]] = distinct !{![[MD_MC]], ![[MP]], ![[MD_max_concurrency:[0-9]+]]} - // CHECK-NEXT: ![[MD_max_concurrency]] = !{!"llvm.loop.max_concurrency.count", i32 0} - [[intel::max_concurrency(A)]] for (int i = 0; i != 10; ++i) - a[i] = 0; - // CHECK: ![[MD_MC_2]] = distinct !{![[MD_MC_2]], ![[MP]], ![[MD_max_concurrency_2:[0-9]+]]} - // CHECK-NEXT: ![[MD_max_concurrency_2]] = !{!"llvm.loop.max_concurrency.count", i32 4} - [[intel::max_concurrency(4)]] for (int i = 0; i != 10; ++i) - a[i] = 0; -} - template void loop_coalesce() { int a[10]; @@ -100,7 +85,6 @@ int main() { kernel_single_task([]() { disable_loop_pipelining(); ii<4>(); - max_concurrency<0>(); loop_coalesce<2>(); max_interleaving<3>(); speculated_iterations<4>(); diff --git a/clang/test/CodeGenSYCL/max-concurrency.cpp b/clang/test/CodeGenSYCL/max-concurrency.cpp new file mode 100644 index 0000000000000..7feae3d186456 --- /dev/null +++ b/clang/test/CodeGenSYCL/max-concurrency.cpp @@ -0,0 +1,91 @@ +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice -Wno-sycl-2017-compat -emit-llvm -o - %s | FileCheck %s + +#include "sycl.hpp" + +// CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]] +// CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_1:[0-9]+]] + +// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name1"() #0 {{.*}} !max_concurrency !16 +// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name2"() #0 {{.*}} !max_concurrency ![[NUM2:[0-9]+]] +// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name3"() #0 {{.*}} !max_concurrency ![[NUM3:[0-9]+]] +// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name4"() #0 {{.*}} !max_concurrency ![[NUM1:[0-9]+]] + +template +void max_concurrency() { + int a[10]; + // CHECK: ![[MD_MC]] = distinct !{![[MD_MC]], ![[MP:[0-9]+]], ![[MD_max_concurrency:[0-9]+]]} + // CHECK-NEXT: ![[MP]] = !{!"llvm.loop.mustprogress"} + // CHECK-NEXT: ![[MD_max_concurrency]] = !{!"llvm.loop.max_concurrency.count", i32 5} + [[intel::max_concurrency(A)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + // CHECK: ![[MD_MC_1]] = distinct !{![[MD_MC_1]], ![[MP]], ![[MD_max_concurrency_1:[0-9]+]]} + // CHECK-NEXT: ![[MD_max_concurrency_1]] = !{!"llvm.loop.max_concurrency.count", i32 4} + [[intel::max_concurrency(4)]] for (int i = 0; i != 10; ++i) + a[i] = 0; +} + +// CHECK: !16 = !{i32 4} +// CHECK: !17 = !{i32 2} +// CHECK: !18 = !{i32 3} + +template +__attribute__((sycl_kernel)) void kernel_single_task_1(const Func &kernelFunc) { + kernelFunc(); +} + +using namespace cl::sycl; +queue q; + +class Functor1 { +public: + [[intel::max_concurrency(4)]] void operator()() const {} +}; + +[[intel::max_concurrency(2)]] void foo() {} + +class Functor2 { +public: + void operator()() const { + foo(); + } +}; + +template +class Functor3 { +public: + [[intel::max_concurrency(NT)]] void operator()() const {} +}; + +template +[[intel::reqd_sub_group_size(NT)]] void func() {} + +int main() { + kernel_single_task_1([]() { + max_concurrency<5>(); + }); + + q.submit([&](handler &h) { + Functor1 f1; + h.single_task(f1); + + Functor2 f2; + h.single_task(f2); + + + h.single_task( + []() [[intel::max_concurrency(3)]]{}); + + Functor3<4> f3; + h.single_task(f3); + + h.single_task([]() { + func<2>(); + }); + + }); + + + return 0; +} + + From 4129be109434ef9c38127467a63cd4c6524f1f61 Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Wed, 17 Mar 2021 16:37:48 -0400 Subject: [PATCH 2/3] Responded to all comments in review. --- clang/include/clang/Basic/Attr.td | 13 ++- clang/include/clang/Basic/AttrDocs.td | 4 +- .../include/clang/Basic/AttributeCommonInfo.h | 3 +- clang/include/clang/Sema/Sema.h | 2 +- clang/lib/CodeGen/CodeGenFunction.cpp | 11 +-- clang/lib/Sema/SemaSYCL.cpp | 29 +++++- .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 5 +- clang/test/CodeGenSYCL/max-concurrency.cpp | 18 ++-- clang/test/SemaSYCL/intel-fpga-loops.cpp | 2 +- clang/test/SemaSYCL/max-concurrency.cpp | 98 +++++++++++++++++++ 10 files changed, 153 insertions(+), 32 deletions(-) create mode 100644 clang/test/SemaSYCL/max-concurrency.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index b4463b40dbfd1..b26389a812771 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1854,11 +1854,14 @@ def SYCLIntelFPGAInitiationInterval : StmtAttr { let Documentation = [SYCLIntelFPGAInitiationIntervalAttrDocs]; } -def SYCLIntelFPGAMaxConcurrency : InheritableAttr { - let Spellings = [CXX11<"intelfpga","max_concurrency">, - CXX11<"intel","max_concurrency">]; - let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt, Function], - ErrorDiag, "'for', 'while', and 'do' statements">; +def SYCLIntelFPGAMaxConcurrency : DeclOrStmtAttr { + let Spellings = [ + CXX11<"intelfpga", "max_concurrency">, CXX11<"intel", "max_concurrency"> + ]; + let Subjects = + SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt, Function], + ErrorDiag, + "'for', 'while', 'do' statements, and (device) functions">; let Args = [ExprArgument<"NThreadsExpr">]; let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let HasCustomTypeTransform = 1; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 62757b50e1911..c6550be98d28f 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2811,13 +2811,13 @@ unbounded. Cannot be applied multiple times to the same loop. [[intel::max_concurrency(2)]] for (int i = 0; i != 10; ++i) a[i] = 0; } - [[intel::component_max_concurrency(2)]] void foo1 { } + [[intel::max_concurrency(2)]] void foo1 { } template void bar() { [[intel::max_concurrency(N)]] for(;;) { } } - [[intel::component_max_concurrency(N)]] void bar1() { } + [[intel::max_concurrency(N)]] void bar1() { } }]; } diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index 4dfa71d733b20..e46742782ae1b 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -167,8 +167,7 @@ class AttributeCommonInfo { ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim || ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset || ParsedAttr == AT_SYCLIntelUseStallEnableClusters || - ParsedAttr == AT_SYCLIntelLoopFuse || - ParsedAttr == AT_SYCLSimd || + ParsedAttr == AT_SYCLIntelLoopFuse || ParsedAttr == AT_SYCLSimd || ParsedAttr == AT_SYCLIntelFPGAMaxConcurrency) return true; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index b1d66ef633cf2..f365240c3e26e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10295,7 +10295,7 @@ class Sema final { /// declaration. void addSYCLIntelPipeIOAttr(Decl *D, const AttributeCommonInfo &CI, Expr *ID); - /// AddSYCLIntelFPGAMaxConcurrencyAttr - Adds a max_component attribute to a + /// AddSYCLIntelFPGAMaxConcurrencyAttr - Adds a max_concurrency attribute to a /// particular declaration. void AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D, const AttributeCommonInfo &CI, diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 5e47bb791c546..b8b0a8574ec05 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -731,12 +731,11 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, Fn->setMetadata("stall_enable", llvm::MDNode::get(Context, AttrMDArgs)); } - if (const SYCLIntelFPGAMaxConcurrencyAttr *A = - FD->getAttr()) { - const auto *CE = dyn_cast(A->getNThreadsExpr()); - Optional ArgVal = CE->getResultAsAPSInt(); - llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get( - Builder.getInt32(ArgVal->getSExtValue()))}; + if (const auto *A = FD->getAttr()) { + const auto *CE = cast(A->getNThreadsExpr()); + llvm::APSInt ArgVal = CE->getResultAsAPSInt(); + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Builder.getInt32(ArgVal.getSExtValue()))}; Fn->setMetadata("max_concurrency", llvm::MDNode::get(Context, AttrMDArgs)); } } diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index e449220dff9de..1ea58a27c9b97 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -559,8 +559,13 @@ class MarkDeviceFunction : public RecursiveASTVisitor { Attrs.insert(A); } } + + // Attribute "max_concurrency" is applied to device functions only. The + // attribute is not propagated to the caller. if (auto *A = FD->getAttr()) - Attrs.insert(A); + if (ParentFD == SYCLKernel) { + Attrs.insert(A); + } // TODO: vec_len_hint should be handled here @@ -3311,6 +3316,25 @@ void Sema::MarkDevice(void) { } break; } + case attr::Kind::SYCLIntelFPGAMaxConcurrency: { + auto *SIMCA = cast(A); + if (auto *Existing = + SYCLKernel->getAttr()) { + ASTContext &Ctx = getASTContext(); + if (Existing->getNThreadsExpr() > SIMCA->getNThreadsExpr()) { + Diag(SYCLKernel->getLocation(), + diag::err_conflicting_sycl_kernel_attributes); + Diag(Existing->getLocation(), diag::note_conflicting_attribute); + Diag(SIMCA->getLocation(), diag::note_conflicting_attribute); + SYCLKernel->setInvalidDecl(); + } else { + SYCLKernel->addAttr(A); + } + } else { + SYCLKernel->addAttr(A); + } + break; + } case attr::Kind::SYCLIntelKernelArgsRestrict: case attr::Kind::SYCLIntelNumSimdWorkItems: case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz: @@ -3318,8 +3342,7 @@ void Sema::MarkDevice(void) { case attr::Kind::SYCLIntelNoGlobalWorkOffset: case attr::Kind::SYCLIntelUseStallEnableClusters: case attr::Kind::SYCLIntelLoopFuse: - case attr::Kind::SYCLSimd: - case attr::Kind::SYCLIntelFPGAMaxConcurrency: { + case attr::Kind::SYCLSimd: { if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody && !KernelBody->getAttr()) { // Usual kernel can't call ESIMD functions. diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index ff2413d7e2fa9..d9178935fd0ee 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -953,8 +953,9 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, } if (const auto *SYCLIntelMaxConcurrency = dyn_cast(TmplAttr)) { - instantiateSYCLIntelFPGAMaxConcurrencyAttr( - *this, TemplateArgs, SYCLIntelMaxConcurrency, New); + instantiateSYCLIntelFPGAMaxConcurrencyAttr< + SYCLIntelFPGAMaxConcurrencyAttr>(*this, TemplateArgs, + SYCLIntelMaxConcurrency, New); continue; } // Existing DLL attribute on the instantiation takes precedence. diff --git a/clang/test/CodeGenSYCL/max-concurrency.cpp b/clang/test/CodeGenSYCL/max-concurrency.cpp index 7feae3d186456..395c3623dc3f0 100644 --- a/clang/test/CodeGenSYCL/max-concurrency.cpp +++ b/clang/test/CodeGenSYCL/max-concurrency.cpp @@ -1,13 +1,12 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice -Wno-sycl-2017-compat -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -disable-llvm-passes -triple spir64-unknown-unknown-sycldevice -sycl-std=2020 -emit-llvm -o - %s | FileCheck %s #include "sycl.hpp" // CHECK: br label %for.cond, !llvm.loop ![[MD_MC:[0-9]+]] // CHECK: br label %for.cond2, !llvm.loop ![[MD_MC_1:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name1"() #0 {{.*}} !max_concurrency !16 -// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name2"() #0 {{.*}} !max_concurrency ![[NUM2:[0-9]+]] -// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name3"() #0 {{.*}} !max_concurrency ![[NUM3:[0-9]+]] +// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name1"() #0 {{.*}} !max_concurrency ![[NUM1:[0-9]+]] +// CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name3"() #0 {{.*}} !max_concurrency ![[NUM2:[0-9]+]] // CHECK: define {{.*}}spir_kernel void @"{{.*}}kernel_name4"() #0 {{.*}} !max_concurrency ![[NUM1:[0-9]+]] template @@ -24,9 +23,8 @@ void max_concurrency() { a[i] = 0; } -// CHECK: !16 = !{i32 4} -// CHECK: !17 = !{i32 2} -// CHECK: !18 = !{i32 3} +// CHECK: ![[NUM1]] = !{i32 4} +// CHECK: ![[NUM2]] = !{i32 3} template __attribute__((sycl_kernel)) void kernel_single_task_1(const Func &kernelFunc) { @@ -34,7 +32,6 @@ __attribute__((sycl_kernel)) void kernel_single_task_1(const Func &kernelFunc) { } using namespace cl::sycl; -queue q; class Functor1 { public: @@ -57,9 +54,11 @@ class Functor3 { }; template -[[intel::reqd_sub_group_size(NT)]] void func() {} +[[intel::max_concurrency(NT)]] void func() {} int main() { + queue q; + kernel_single_task_1([]() { max_concurrency<5>(); }); @@ -71,7 +70,6 @@ int main() { Functor2 f2; h.single_task(f2); - h.single_task( []() [[intel::max_concurrency(3)]]{}); diff --git a/clang/test/SemaSYCL/intel-fpga-loops.cpp b/clang/test/SemaSYCL/intel-fpga-loops.cpp index 2f3b580f14ab3..53af8b7b2efdf 100644 --- a/clang/test/SemaSYCL/intel-fpga-loops.cpp +++ b/clang/test/SemaSYCL/intel-fpga-loops.cpp @@ -10,7 +10,7 @@ void foo() { [[intel::ivdep]] int a[10]; // expected-error@+1 {{'initiation_interval' attribute only applies to 'for', 'while', and 'do' statements}} [[intel::initiation_interval(2)]] int c[10]; - // expected-error@+1 {{'max_concurrency' attribute only applies to 'for', 'while', and 'do' statements}} + // expected-error@+1 {{'max_concurrency' attribute only applies to 'for', 'while', 'do' statements, and (device) functions}} [[intel::max_concurrency(2)]] int d[10]; // expected-error@+1 {{'disable_loop_pipelining' attribute only applies to 'for', 'while', and 'do' statements}} [[intel::disable_loop_pipelining]] int g[10]; diff --git a/clang/test/SemaSYCL/max-concurrency.cpp b/clang/test/SemaSYCL/max-concurrency.cpp new file mode 100644 index 0000000000000..f744ddefbc09d --- /dev/null +++ b/clang/test/SemaSYCL/max-concurrency.cpp @@ -0,0 +1,98 @@ +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -fsyntax-only -ast-dump -verify -pedantic %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +class Functor1 { +public: + [[intel::max_concurrency(4)]] void operator()() const {} +}; + +[[intel::max_concurrency]] void foo() {} // expected-error {{'max_concurrency' attribute takes one argument}} + +class Functor2 { +public: + void operator()() const { + foo(); + } +}; + +template +class Functor3 { +public: + [[intel::max_concurrency(NT)]] void operator()() const {} + // expected-error@+1 {{'max_concurrency' attribute only applies to 'for', 'while', 'do' statements, and (device) functions}} + [[intel::max_concurrency(2)]] int a[10]; +}; + +// expected-error@+1 {{'max_concurrency' attribute takes one argument}} +[[intel::max_concurrency(3, 3)]] void goo() {} + +class Functor4 { +public: + void operator() () const { + goo(); + } +}; + +// expected-error@+1 {{'max_concurrency' attribute requires a positive integral compile time constant expression}} +[[intel::max_concurrency(-1)]] void bar() {} +class Functor5 { +public: + void operator() () const { + bar(); + } +}; + +// expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'const char [16]'}} +[[intel::max_concurrency("numberofthreads")]] void zoo() {} +class Functor6 { +public: + void operator() () const { + zoo(); + } +}; + +template +[[intel::max_concurrency(NT)]] void func() {} + +int main() { + queue q; + + q.submit([&](handler &h) { + Functor1 f1; + h.single_task(f1); + + Functor2 f2; + h.single_task(f2); + + h.single_task( + []() [[intel::max_concurrency(3)]]{}); + + Functor3<4> f3; + h.single_task(f3); + + h.single_task([]() { + func<5>(); + }); + + }); +} + +// CHECK: FunctionDecl {{.*}}{{.*}}kernel_name1{{.*}} +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: ConstantExpr {{.*}} 'int' +// CHECK: value: Int 4 +// CHECK: IntegerLiteral{{.*}}4{{$}} +// CHECK: FunctionDecl {{.*}}{{.*}}kernel_name3{{.*}} +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: ConstantExpr {{.*}} 'int' +// CHECK: value: Int 3 +// CHECK: IntegerLiteral{{.*}}3{{$}} +// CHECK: FunctionDecl {{.*}}{{.*}}kernel_name4{{.*}} +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: ConstantExpr {{.*}} 'int' +// CHECK: value: Int 4 +// CHECK: IntegerLiteral{{.*}}4{{$}} From 86352d3e53488f8b7aad40736cdbb5f7b9fa8e1d Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Wed, 17 Mar 2021 16:37:48 -0400 Subject: [PATCH 3/3] Responded to all comments in review. --- .../clang/Basic/DiagnosticSemaKinds.td | 4 +- clang/include/clang/Sema/Sema.h | 3 ++ clang/lib/Sema/SemaDecl.cpp | 2 + clang/lib/Sema/SemaDeclAttr.cpp | 31 ++++++++++++++- clang/lib/Sema/SemaSYCL.cpp | 22 +---------- clang/lib/Sema/SemaStmtAttr.cpp | 4 +- clang/test/SemaSYCL/max-concurrency.cpp | 39 ++++++++++++++++--- 7 files changed, 74 insertions(+), 31 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index d1e2a22702bb5..48a22ff109038 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -134,8 +134,8 @@ def err_intel_fpga_reg_limitations : Error < "__builtin_intel_fpga_reg">; def illegal_type_declared_here : Note< "field with illegal type declared here">; -def err_sycl_loop_attr_duplication : Error< - "duplicate %select{unroll|Intel FPGA}0 loop attribute %1">; +def err_sycl_attr_duplication : Error< + "duplicate %select{unroll|Intel FPGA}0 %select{loop|function}1 attribute %2">; def err_loop_unroll_compatibility : Error< "incompatible loop unroll instructions: '%0' and '%1'">; def err_pipe_attribute_arg_not_allowed : Error< diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index f365240c3e26e..3254c47f8aa1e 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10241,6 +10241,9 @@ class Sema final { IntelFPGAMaxReplicatesAttr * MergeIntelFPGAMaxReplicatesAttr(Decl *D, const IntelFPGAMaxReplicatesAttr &A); + SYCLIntelFPGAMaxConcurrencyAttr *MergeSYCLIntelFPGAMaxConcurrencyAttr( + Decl *D, const SYCLIntelFPGAMaxConcurrencyAttr &A); + /// AddAlignedAttr - Adds an aligned attribute to a particular declaration. void AddAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E, bool IsPackExpansion); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index a4d507879d703..a4fc4c03a95d7 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2628,6 +2628,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D, NewAttr = S.MergeSYCLIntelNoGlobalWorkOffsetAttr(D, *A); else if (const auto *A = dyn_cast(Attr)) NewAttr = S.MergeIntelFPGAMaxReplicatesAttr(D, *A); + else if (const auto *A = dyn_cast(Attr)) + NewAttr = S.MergeSYCLIntelFPGAMaxConcurrencyAttr(D, *A); else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr)) NewAttr = cast(Attr->clone(S.Context)); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index f83bb6c1bc0a0..8cd8aa8306f35 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6294,6 +6294,30 @@ static void handleSYCLIntelPipeIOAttr(Sema &S, Decl *D, S.addSYCLIntelPipeIOAttr(D, Attr, E); } +SYCLIntelFPGAMaxConcurrencyAttr *Sema::MergeSYCLIntelFPGAMaxConcurrencyAttr( + Decl *D, const SYCLIntelFPGAMaxConcurrencyAttr &A) { + // Check to see if there's a duplicate attribute with different values + // already applied to the declaration. + if (const auto *DeclAttr = D->getAttr()) { + const auto *DeclExpr = dyn_cast(DeclAttr->getNThreadsExpr()); + const auto *MergeExpr = dyn_cast(A.getNThreadsExpr()); + if (DeclExpr && MergeExpr && + DeclExpr->getResultAsAPSInt() != MergeExpr->getResultAsAPSInt()) { + Diag(DeclAttr->getLocation(), diag::err_sycl_attr_duplication) + << 1 << 1 << DeclAttr; + return nullptr; + } + } + // TODO + // max_concurrency and disable_component_pipelining attributes can't be applied + // to the same function. + // if (checkAttrMutualExclusion(S, D, AL)) + // return; + + return ::new (Context) + SYCLIntelFPGAMaxConcurrencyAttr(Context, A, A.getNThreadsExpr()); +} + void Sema::AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E) { @@ -6322,12 +6346,17 @@ void Sema::AddSYCLIntelFPGAMaxConcurrencyAttr(Decl *D, } } - D->addAttr(::new (Context) SYCLIntelFPGAMaxConcurrencyAttr(Context, CI, E)); + D->addAttr(::new (Context) SYCLIntelFPGAMaxConcurrencyAttr(Context, CI, E)); } static void handleSYCLIntelFPGAMaxConcurrencyAttr(Sema &S, Decl *D, const ParsedAttr &A) { S.CheckDeprecatedSYCLAttributeSpelling(A); + // TODO + // max_concurrency and disable_component_pipelining attributes can't be + // applied to the same function. if + // (checkAttrMutualExclusion(S, D, AL)) + // return; Expr *E = A.getArgAsExpr(0); S.AddSYCLIntelFPGAMaxConcurrencyAttr(D, A, E); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 1ea58a27c9b97..f807d9dc5516c 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -3316,25 +3316,6 @@ void Sema::MarkDevice(void) { } break; } - case attr::Kind::SYCLIntelFPGAMaxConcurrency: { - auto *SIMCA = cast(A); - if (auto *Existing = - SYCLKernel->getAttr()) { - ASTContext &Ctx = getASTContext(); - if (Existing->getNThreadsExpr() > SIMCA->getNThreadsExpr()) { - Diag(SYCLKernel->getLocation(), - diag::err_conflicting_sycl_kernel_attributes); - Diag(Existing->getLocation(), diag::note_conflicting_attribute); - Diag(SIMCA->getLocation(), diag::note_conflicting_attribute); - SYCLKernel->setInvalidDecl(); - } else { - SYCLKernel->addAttr(A); - } - } else { - SYCLKernel->addAttr(A); - } - break; - } case attr::Kind::SYCLIntelKernelArgsRestrict: case attr::Kind::SYCLIntelNumSimdWorkItems: case attr::Kind::SYCLIntelSchedulerTargetFmaxMhz: @@ -3342,7 +3323,8 @@ void Sema::MarkDevice(void) { case attr::Kind::SYCLIntelNoGlobalWorkOffset: case attr::Kind::SYCLIntelUseStallEnableClusters: case attr::Kind::SYCLIntelLoopFuse: - case attr::Kind::SYCLSimd: { + case attr::Kind::SYCLSimd: + case attr::Kind::SYCLIntelFPGAMaxConcurrency: { if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody && !KernelBody->getAttr()) { // Usual kernel can't call ESIMD functions. diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index e4e679099cadb..35acce5e58633 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -627,8 +627,8 @@ CheckForDuplicationSYCLLoopAttribute(Sema &S, for (const auto *I : Attrs) { if (LoopAttr && isa(I)) { // Cannot specify same type of attribute twice. - S.Diag(I->getLocation(), diag::err_sycl_loop_attr_duplication) - << isIntelFPGAAttr << LoopAttr; + S.Diag(I->getLocation(), diag::err_sycl_attr_duplication) + << isIntelFPGAAttr << 0 << LoopAttr; } if (isa(I)) LoopAttr = cast(I); diff --git a/clang/test/SemaSYCL/max-concurrency.cpp b/clang/test/SemaSYCL/max-concurrency.cpp index f744ddefbc09d..16e65fbaed4ed 100644 --- a/clang/test/SemaSYCL/max-concurrency.cpp +++ b/clang/test/SemaSYCL/max-concurrency.cpp @@ -48,16 +48,13 @@ class Functor5 { // expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'const char [16]'}} [[intel::max_concurrency("numberofthreads")]] void zoo() {} -class Functor6 { -public: - void operator() () const { - zoo(); - } -}; template [[intel::max_concurrency(NT)]] void func() {} +[[intel::max_concurrency(8)]] void dup(); +[[intel::max_concurrency(9)]] void dup() {} // expected-error {{duplicate Intel FPGA function attribute 'max_concurrency'}} + int main() { queue q; @@ -81,6 +78,36 @@ int main() { }); } +// CHECK: CXXMethodDecl {{.*}}used operator() {{.*}} +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: ConstantExpr {{.*}} 'int' +// CHECK: value: Int 4 +// CHECK: IntegerLiteral {{.*}}4{{$}} +// CHECK: CXXMethodDecl {{.*}}operator() {{.*}} +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: DeclRefExpr {{.*}} 'int' NonTypeTemplateParm {{.*}} 'NT' 'int' +// CHECK: CXXMethodDecl {{.*}}{{.*}}used operator() {{.*}} +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: ConstantExpr {{.*}} 'int' +// CHECK: value: Int 4 +// CHECK: IntegerLiteral {{.*}}4{{$}} +// CHECK: FunctionDecl {{.*}}{{.*}}func {{.*}} +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: FunctionDecl {{.*}}{{.*}}used func 'void ()' +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: ConstantExpr {{.*}} 'int' +// CHECK: value: Int 5 +// CHECK: IntegerLiteral {{.*}}5{{$}} +// CHECK: FunctionDecl {{.*}}{{.*}}dup {{.*}} +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: ConstantExpr {{.*}} 'int' +// CHECK: value: Int 8 +// CHECK: IntegerLiteral {{.*}}8{{$}} +// CHECK: FunctionDecl {{.*}}{{.*}}dup {{.*}} +// CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} +// CHECK: ConstantExpr {{.*}} 'int' +// CHECK: value: Int 9 +// CHECK: IntegerLiteral {{.*}}9{{$}} // CHECK: FunctionDecl {{.*}}{{.*}}kernel_name1{{.*}} // CHECK: SYCLIntelFPGAMaxConcurrencyAttr {{.*}} // CHECK: ConstantExpr {{.*}} 'int'