Skip to content

Commit a58da1a

Browse files
committed
[OPENMP50]Codegen for lastprivate conditional list items.
Added codegen support for lastprivate conditional. According to the standard, if when the conditional modifier appears on the clause, if an assignment to a list item is encountered in the construct then the original list item is assigned the value that is assigned to the new list item in the sequentially last iteration or lexically last section in which such an assignment is encountered. We look for the assignment operations and check if the left side references lastprivate conditional variable. Then the next code is emitted: if (last_iv_a <= iv) { last_iv_a = iv; last_a = lp_a; } At the end the implicit barrier is generated to wait for the end of all threads and then in the check for the last iteration the private copy is assigned the last value. if (last_iter) { lp_a = last_a; // <--- new code a = lp_a; // <--- store of private value to the original variable. }
1 parent f83801f commit a58da1a

File tree

8 files changed

+443
-18
lines changed

8 files changed

+443
-18
lines changed

clang/lib/CodeGen/CGExpr.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4717,6 +4717,9 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) {
47174717
if (RV.isScalar())
47184718
EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc());
47194719
EmitStoreThroughLValue(RV, LV);
4720+
if (getLangOpts().OpenMP)
4721+
CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this,
4722+
E->getLHS());
47204723
return LV;
47214724
}
47224725

clang/lib/CodeGen/CGExprComplex.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//
1111
//===----------------------------------------------------------------------===//
1212

13+
#include "CGOpenMPRuntime.h"
1314
#include "CodeGenFunction.h"
1415
#include "CodeGenModule.h"
1516
#include "clang/AST/StmtVisitor.h"
@@ -1136,7 +1137,11 @@ ComplexPairTy CodeGenFunction::EmitLoadOfComplex(LValue src,
11361137
LValue CodeGenFunction::EmitComplexAssignmentLValue(const BinaryOperator *E) {
11371138
assert(E->getOpcode() == BO_Assign);
11381139
ComplexPairTy Val; // ignored
1139-
return ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val);
1140+
LValue LVal = ComplexExprEmitter(*this).EmitBinAssignLValue(E, Val);
1141+
if (getLangOpts().OpenMP)
1142+
CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this,
1143+
E->getLHS());
1144+
return LVal;
11401145
}
11411146

11421147
typedef ComplexPairTy (ComplexExprEmitter::*CompoundFunc)(

clang/lib/CodeGen/CGExprScalar.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
#include "CGCleanup.h"
1515
#include "CGDebugInfo.h"
1616
#include "CGObjCRuntime.h"
17+
#include "CGOpenMPRuntime.h"
1718
#include "CodeGenFunction.h"
1819
#include "CodeGenModule.h"
1920
#include "ConstantEmitter.h"
@@ -2997,6 +2998,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
29972998
else
29982999
CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV);
29993000

3001+
if (CGF.getLangOpts().OpenMP)
3002+
CGF.CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(CGF,
3003+
E->getLHS());
30003004
return LHSLV;
30013005
}
30023006

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "clang/AST/Decl.h"
2020
#include "clang/AST/OpenMPClause.h"
2121
#include "clang/AST/StmtOpenMP.h"
22+
#include "clang/AST/StmtVisitor.h"
2223
#include "clang/Basic/BitmaskEnum.h"
2324
#include "clang/CodeGen/ConstantInitBuilder.h"
2425
#include "llvm/ADT/ArrayRef.h"
@@ -11401,6 +11402,268 @@ bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl *VD) const {
1140111402
[VD](const NontemporalDeclsSet &Set) { return Set.count(VD) > 0; });
1140211403
}
1140311404

11405+
CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11406+
CodeGenFunction &CGF, const OMPExecutableDirective &S, LValue IVLVal)
11407+
: CGM(CGF.CGM),
11408+
NeedToPush(llvm::any_of(S.getClausesOfKind<OMPLastprivateClause>(),
11409+
[](const OMPLastprivateClause *C) {
11410+
return C->getKind() ==
11411+
OMPC_LASTPRIVATE_conditional;
11412+
})) {
11413+
assert(CGM.getLangOpts().OpenMP && "Not in OpenMP mode.");
11414+
if (!NeedToPush)
11415+
return;
11416+
LastprivateConditionalData &Data =
11417+
CGM.getOpenMPRuntime().LastprivateConditionalStack.emplace_back();
11418+
for (const auto *C : S.getClausesOfKind<OMPLastprivateClause>()) {
11419+
if (C->getKind() != OMPC_LASTPRIVATE_conditional)
11420+
continue;
11421+
11422+
for (const Expr *Ref : C->varlists()) {
11423+
Data.DeclToUniqeName.try_emplace(
11424+
cast<DeclRefExpr>(Ref->IgnoreParenImpCasts())->getDecl(),
11425+
generateUniqueName(CGM, "pl_cond", Ref));
11426+
}
11427+
}
11428+
Data.IVLVal = IVLVal;
11429+
// In simd only mode or for simd directives no need to generate threadprivate
11430+
// references for the loop iteration counter, we can use the original one
11431+
// since outlining cannot happen in simd regions.
11432+
if (CGF.getLangOpts().OpenMPSimd ||
11433+
isOpenMPSimdDirective(S.getDirectiveKind())) {
11434+
Data.UseOriginalIV = true;
11435+
return;
11436+
}
11437+
llvm::SmallString<16> Buffer;
11438+
llvm::raw_svector_ostream OS(Buffer);
11439+
PresumedLoc PLoc =
11440+
CGM.getContext().getSourceManager().getPresumedLoc(S.getBeginLoc());
11441+
assert(PLoc.isValid() && "Source location is expected to be always valid.");
11442+
11443+
llvm::sys::fs::UniqueID ID;
11444+
if (auto EC = llvm::sys::fs::getUniqueID(PLoc.getFilename(), ID))
11445+
CGM.getDiags().Report(diag::err_cannot_open_file)
11446+
<< PLoc.getFilename() << EC.message();
11447+
OS << "$pl_cond_" << ID.getDevice() << "_" << ID.getFile() << "_"
11448+
<< PLoc.getLine() << "_" << PLoc.getColumn() << "$iv";
11449+
Data.IVName = OS.str();
11450+
11451+
// Global loop counter. Required to handle inner parallel-for regions.
11452+
// global_iv = &iv;
11453+
QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType());
11454+
Address GlobIVAddr = CGM.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
11455+
CGF, PtrIVTy, Data.IVName);
11456+
LValue GlobIVLVal = CGF.MakeAddrLValue(GlobIVAddr, PtrIVTy);
11457+
CGF.EmitStoreOfScalar(IVLVal.getPointer(CGF), GlobIVLVal);
11458+
}
11459+
11460+
CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11461+
if (!NeedToPush)
11462+
return;
11463+
CGM.getOpenMPRuntime().LastprivateConditionalStack.pop_back();
11464+
}
11465+
11466+
namespace {
11467+
/// Checks if the lastprivate conditional variable is referenced in LHS.
11468+
class LastprivateConditionalRefChecker final
11469+
: public ConstStmtVisitor<LastprivateConditionalRefChecker, bool> {
11470+
CodeGenFunction &CGF;
11471+
ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM;
11472+
const Expr *FoundE = nullptr;
11473+
const Decl *FoundD = nullptr;
11474+
StringRef UniqueDeclName;
11475+
LValue IVLVal;
11476+
StringRef IVName;
11477+
SourceLocation Loc;
11478+
bool UseOriginalIV = false;
11479+
11480+
public:
11481+
bool VisitDeclRefExpr(const DeclRefExpr *E) {
11482+
for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11483+
llvm::reverse(LPM)) {
11484+
auto It = D.DeclToUniqeName.find(E->getDecl());
11485+
if (It == D.DeclToUniqeName.end())
11486+
continue;
11487+
FoundE = E;
11488+
FoundD = E->getDecl()->getCanonicalDecl();
11489+
UniqueDeclName = It->getSecond();
11490+
IVLVal = D.IVLVal;
11491+
IVName = D.IVName;
11492+
UseOriginalIV = D.UseOriginalIV;
11493+
break;
11494+
}
11495+
return FoundE == E;
11496+
}
11497+
bool VisitMemberExpr(const MemberExpr *E) {
11498+
if (!CGF.IsWrappedCXXThis(E->getBase()))
11499+
return false;
11500+
for (const CGOpenMPRuntime::LastprivateConditionalData &D :
11501+
llvm::reverse(LPM)) {
11502+
auto It = D.DeclToUniqeName.find(E->getMemberDecl());
11503+
if (It == D.DeclToUniqeName.end())
11504+
continue;
11505+
FoundE = E;
11506+
FoundD = E->getMemberDecl()->getCanonicalDecl();
11507+
UniqueDeclName = It->getSecond();
11508+
IVLVal = D.IVLVal;
11509+
IVName = D.IVName;
11510+
UseOriginalIV = D.UseOriginalIV;
11511+
break;
11512+
}
11513+
return FoundE == E;
11514+
}
11515+
bool VisitStmt(const Stmt *S) {
11516+
for (const Stmt *Child : S->children()) {
11517+
if (!Child)
11518+
continue;
11519+
if (const auto *E = dyn_cast<Expr>(Child))
11520+
if (!E->isGLValue())
11521+
continue;
11522+
if (Visit(Child))
11523+
return true;
11524+
}
11525+
return false;
11526+
}
11527+
explicit LastprivateConditionalRefChecker(
11528+
CodeGenFunction &CGF,
11529+
ArrayRef<CGOpenMPRuntime::LastprivateConditionalData> LPM)
11530+
: CGF(CGF), LPM(LPM) {}
11531+
std::tuple<const Expr *, const Decl *, StringRef, LValue, StringRef, bool>
11532+
getFoundData() const {
11533+
return std::make_tuple(FoundE, FoundD, UniqueDeclName, IVLVal, IVName,
11534+
UseOriginalIV);
11535+
}
11536+
};
11537+
} // namespace
11538+
11539+
void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction &CGF,
11540+
const Expr *LHS) {
11541+
if (CGF.getLangOpts().OpenMP < 50)
11542+
return;
11543+
LastprivateConditionalRefChecker Checker(CGF, LastprivateConditionalStack);
11544+
if (!Checker.Visit(LHS))
11545+
return;
11546+
const Expr *FoundE;
11547+
const Decl *FoundD;
11548+
StringRef UniqueDeclName;
11549+
LValue IVLVal;
11550+
StringRef IVName;
11551+
bool UseOriginalIV;
11552+
std::tie(FoundE, FoundD, UniqueDeclName, IVLVal, IVName, UseOriginalIV) =
11553+
Checker.getFoundData();
11554+
11555+
// Last updated loop counter for the lastprivate conditional var.
11556+
// int<xx> last_iv = 0;
11557+
llvm::Type *LLIVTy = CGF.ConvertTypeForMem(IVLVal.getType());
11558+
llvm::Constant *LastIV =
11559+
getOrCreateInternalVariable(LLIVTy, UniqueDeclName + "$iv");
11560+
cast<llvm::GlobalVariable>(LastIV)->setAlignment(
11561+
IVLVal.getAlignment().getAsAlign());
11562+
LValue LastIVLVal = CGF.MakeNaturalAlignAddrLValue(LastIV, IVLVal.getType());
11563+
11564+
// Private address of the lastprivate conditional in the current context.
11565+
// priv_a
11566+
LValue LVal = CGF.EmitLValue(FoundE);
11567+
// Last value of the lastprivate conditional.
11568+
// decltype(priv_a) last_a;
11569+
llvm::Constant *Last = getOrCreateInternalVariable(
11570+
LVal.getAddress(CGF).getElementType(), UniqueDeclName);
11571+
cast<llvm::GlobalVariable>(Last)->setAlignment(
11572+
LVal.getAlignment().getAsAlign());
11573+
LValue LastLVal =
11574+
CGF.MakeAddrLValue(Last, LVal.getType(), LVal.getAlignment());
11575+
11576+
// Global loop counter. Required to handle inner parallel-for regions.
11577+
// global_iv
11578+
if (!UseOriginalIV) {
11579+
QualType PtrIVTy = CGM.getContext().getPointerType(IVLVal.getType());
11580+
Address IVAddr = getAddrOfArtificialThreadPrivate(CGF, PtrIVTy, IVName);
11581+
IVLVal =
11582+
CGF.EmitLoadOfPointerLValue(IVAddr, PtrIVTy->castAs<PointerType>());
11583+
}
11584+
llvm::Value *IVVal = CGF.EmitLoadOfScalar(IVLVal, FoundE->getExprLoc());
11585+
11586+
// #pragma omp critical(a)
11587+
// if (last_iv <= iv) {
11588+
// last_iv = iv;
11589+
// last_a = priv_a;
11590+
// }
11591+
auto &&CodeGen = [&LastIVLVal, &IVLVal, IVVal, &LVal, &LastLVal,
11592+
FoundE](CodeGenFunction &CGF, PrePostActionTy &Action) {
11593+
Action.Enter(CGF);
11594+
llvm::Value *LastIVVal =
11595+
CGF.EmitLoadOfScalar(LastIVLVal, FoundE->getExprLoc());
11596+
// (last_iv <= global_iv) ? Check if the variable is updated and store new
11597+
// value in global var.
11598+
llvm::Value *CmpRes;
11599+
if (IVLVal.getType()->isSignedIntegerType()) {
11600+
CmpRes = CGF.Builder.CreateICmpSLE(LastIVVal, IVVal);
11601+
} else {
11602+
assert(IVLVal.getType()->isUnsignedIntegerType() &&
11603+
"Loop iteration variable must be integer.");
11604+
CmpRes = CGF.Builder.CreateICmpULE(LastIVVal, IVVal);
11605+
}
11606+
llvm::BasicBlock *ThenBB = CGF.createBasicBlock("lp_cond_then");
11607+
llvm::BasicBlock *ExitBB = CGF.createBasicBlock("lp_cond_exit");
11608+
CGF.Builder.CreateCondBr(CmpRes, ThenBB, ExitBB);
11609+
// {
11610+
CGF.EmitBlock(ThenBB);
11611+
11612+
// last_iv = global_iv;
11613+
CGF.EmitStoreOfScalar(IVVal, LastIVLVal);
11614+
11615+
// last_a = priv_a;
11616+
switch (CGF.getEvaluationKind(LVal.getType())) {
11617+
case TEK_Scalar: {
11618+
llvm::Value *PrivVal = CGF.EmitLoadOfScalar(LVal, FoundE->getExprLoc());
11619+
CGF.EmitStoreOfScalar(PrivVal, LastLVal);
11620+
break;
11621+
}
11622+
case TEK_Complex: {
11623+
CodeGenFunction::ComplexPairTy PrivVal =
11624+
CGF.EmitLoadOfComplex(LVal, FoundE->getExprLoc());
11625+
CGF.EmitStoreOfComplex(PrivVal, LastLVal, /*isInit=*/false);
11626+
break;
11627+
}
11628+
case TEK_Aggregate:
11629+
llvm_unreachable(
11630+
"Aggregates are not supported in lastprivate conditional.");
11631+
}
11632+
// }
11633+
CGF.EmitBranch(ExitBB);
11634+
// There is no need to emit line number for unconditional branch.
11635+
(void)ApplyDebugLocation::CreateEmpty(CGF);
11636+
CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
11637+
};
11638+
11639+
if (CGM.getLangOpts().OpenMPSimd) {
11640+
// Do not emit as a critical region as no parallel region could be emitted.
11641+
RegionCodeGenTy ThenRCG(CodeGen);
11642+
ThenRCG(CGF);
11643+
} else {
11644+
emitCriticalRegion(CGF, UniqueDeclName, CodeGen, FoundE->getExprLoc());
11645+
}
11646+
}
11647+
11648+
void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11649+
CodeGenFunction &CGF, LValue PrivLVal, const VarDecl *VD,
11650+
SourceLocation Loc) {
11651+
if (CGF.getLangOpts().OpenMP < 50)
11652+
return;
11653+
auto It = LastprivateConditionalStack.back().DeclToUniqeName.find(VD);
11654+
assert(It != LastprivateConditionalStack.back().DeclToUniqeName.end() &&
11655+
"Unknown lastprivate conditional variable.");
11656+
StringRef UniqueName = It->getSecond();
11657+
llvm::GlobalVariable *GV = CGM.getModule().getNamedGlobal(UniqueName);
11658+
// The variable was not updated in the region - exit.
11659+
if (!GV)
11660+
return;
11661+
LValue LPLVal = CGF.MakeAddrLValue(
11662+
GV, PrivLVal.getType().getNonReferenceType(), PrivLVal.getAlignment());
11663+
llvm::Value *Res = CGF.EmitLoadOfScalar(LPLVal, Loc);
11664+
CGF.EmitStoreOfScalar(Res, PrivLVal);
11665+
}
11666+
1140411667
llvm::Function *CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
1140511668
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
1140611669
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {

0 commit comments

Comments
 (0)