summaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/BackendUtil.cpp37
-rw-r--r--lib/CodeGen/CGDebugInfo.cpp12
-rw-r--r--lib/CodeGen/CGExpr.cpp21
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.cpp22
-rw-r--r--lib/CodeGen/CGOpenMPRuntime.h40
-rw-r--r--lib/CodeGen/CGStmtOpenMP.cpp436
-rw-r--r--lib/CodeGen/CodeGenFunction.h96
-rw-r--r--lib/CodeGen/CodeGenModule.cpp8
-rw-r--r--lib/CodeGen/CodeGenPGO.cpp2
-rw-r--r--lib/CodeGen/CodeGenPGO.h7
10 files changed, 523 insertions, 158 deletions
diff --git a/lib/CodeGen/BackendUtil.cpp b/lib/CodeGen/BackendUtil.cpp
index 20059d922f90b..85788b4272087 100644
--- a/lib/CodeGen/BackendUtil.cpp
+++ b/lib/CodeGen/BackendUtil.cpp
@@ -129,16 +129,20 @@ public:
// that we add to the PassManagerBuilder.
class PassManagerBuilderWrapper : public PassManagerBuilder {
public:
- PassManagerBuilderWrapper(const CodeGenOptions &CGOpts,
+ PassManagerBuilderWrapper(const Triple &TargetTriple,
+ const CodeGenOptions &CGOpts,
const LangOptions &LangOpts)
- : PassManagerBuilder(), CGOpts(CGOpts), LangOpts(LangOpts) {}
+ : PassManagerBuilder(), TargetTriple(TargetTriple), CGOpts(CGOpts),
+ LangOpts(LangOpts) {}
+ const Triple &getTargetTriple() const { return TargetTriple; }
const CodeGenOptions &getCGOpts() const { return CGOpts; }
const LangOptions &getLangOpts() const { return LangOpts; }
+
private:
+ const Triple &TargetTriple;
const CodeGenOptions &CGOpts;
const LangOptions &LangOpts;
};
-
}
static void addObjCARCAPElimPass(const PassManagerBuilder &Builder, PassManagerBase &PM) {
@@ -185,16 +189,35 @@ static void addSanitizerCoveragePass(const PassManagerBuilder &Builder,
PM.add(createSanitizerCoverageModulePass(Opts));
}
+// Check if ASan should use GC-friendly instrumentation for globals.
+// First of all, there is no point if -fdata-sections is off (expect for MachO,
+// where this is not a factor). Also, on ELF this feature requires an assembler
+// extension that only works with -integrated-as at the moment.
+static bool asanUseGlobalsGC(const Triple &T, const CodeGenOptions &CGOpts) {
+ switch (T.getObjectFormat()) {
+ case Triple::MachO:
+ case Triple::COFF:
+ return true;
+ case Triple::ELF:
+ return CGOpts.DataSections && !CGOpts.DisableIntegratedAS;
+ default:
+ return false;
+ }
+}
+
static void addAddressSanitizerPasses(const PassManagerBuilder &Builder,
legacy::PassManagerBase &PM) {
const PassManagerBuilderWrapper &BuilderWrapper =
static_cast<const PassManagerBuilderWrapper&>(Builder);
+ const Triple &T = BuilderWrapper.getTargetTriple();
const CodeGenOptions &CGOpts = BuilderWrapper.getCGOpts();
bool Recover = CGOpts.SanitizeRecover.has(SanitizerKind::Address);
bool UseAfterScope = CGOpts.SanitizeAddressUseAfterScope;
+ bool UseGlobalsGC = asanUseGlobalsGC(T, CGOpts);
PM.add(createAddressSanitizerFunctionPass(/*CompileKernel*/ false, Recover,
UseAfterScope));
- PM.add(createAddressSanitizerModulePass(/*CompileKernel*/false, Recover));
+ PM.add(createAddressSanitizerModulePass(/*CompileKernel*/ false, Recover,
+ UseGlobalsGC));
}
static void addKernelAddressSanitizerPasses(const PassManagerBuilder &Builder,
@@ -407,6 +430,8 @@ static void initTargetOptions(llvm::TargetOptions &Options,
Options.EmulatedTLS = CodeGenOpts.EmulatedTLS;
Options.DebuggerTuning = CodeGenOpts.getDebuggerTuning();
+ if (CodeGenOpts.EnableSplitDwarf)
+ Options.MCOptions.SplitDwarfFile = CodeGenOpts.SplitDwarfFile;
Options.MCOptions.MCRelaxAll = CodeGenOpts.RelaxAll;
Options.MCOptions.MCSaveTempLabels = CodeGenOpts.SaveTempLabels;
Options.MCOptions.MCUseDwarfDirectory = !CodeGenOpts.NoDwarfDirectoryAsm;
@@ -434,8 +459,6 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
if (CodeGenOpts.DisableLLVMPasses)
return;
- PassManagerBuilderWrapper PMBuilder(CodeGenOpts, LangOpts);
-
// Figure out TargetLibraryInfo. This needs to be added to MPM and FPM
// manually (and not via PMBuilder), since some passes (eg. InstrProfiling)
// are inserted before PMBuilder ones - they'd get the default-constructed
@@ -444,6 +467,8 @@ void EmitAssemblyHelper::CreatePasses(legacy::PassManager &MPM,
std::unique_ptr<TargetLibraryInfoImpl> TLII(
createTLII(TargetTriple, CodeGenOpts));
+ PassManagerBuilderWrapper PMBuilder(TargetTriple, CodeGenOpts, LangOpts);
+
// At O0 and O1 we only run the always inliner which is more efficient. At
// higher optimization levels we run the normal inliner.
if (CodeGenOpts.OptimizationLevel <= 1) {
diff --git a/lib/CodeGen/CGDebugInfo.cpp b/lib/CodeGen/CGDebugInfo.cpp
index 2f6a2b95fb61b..dd32a44393c60 100644
--- a/lib/CodeGen/CGDebugInfo.cpp
+++ b/lib/CodeGen/CGDebugInfo.cpp
@@ -528,12 +528,14 @@ void CGDebugInfo::CreateCompileUnit() {
// Create new compile unit.
// FIXME - Eliminate TheCU.
TheCU = DBuilder.createCompileUnit(
- LangTag, DBuilder.createFile(remapDIPath(MainFileName),
- remapDIPath(getCurrentDirname()), CSKind,
- Checksum),
+ LangTag,
+ DBuilder.createFile(remapDIPath(MainFileName),
+ remapDIPath(getCurrentDirname()), CSKind, Checksum),
Producer, LO.Optimize, CGM.getCodeGenOpts().DwarfDebugFlags, RuntimeVers,
- CGM.getCodeGenOpts().SplitDwarfFile, EmissionKind, 0 /* DWOid */,
- CGM.getCodeGenOpts().SplitDwarfInlining,
+ CGM.getCodeGenOpts().EnableSplitDwarf
+ ? ""
+ : CGM.getCodeGenOpts().SplitDwarfFile,
+ EmissionKind, 0 /* DWOid */, CGM.getCodeGenOpts().SplitDwarfInlining,
CGM.getCodeGenOpts().DebugInfoForProfiling);
}
diff --git a/lib/CodeGen/CGExpr.cpp b/lib/CodeGen/CGExpr.cpp
index 719147a58e087..d0aacf65428fc 100644
--- a/lib/CodeGen/CGExpr.cpp
+++ b/lib/CodeGen/CGExpr.cpp
@@ -533,15 +533,6 @@ bool CodeGenFunction::sanitizePerformTypeCheck() const {
SanOpts.has(SanitizerKind::Vptr);
}
-/// Check if a runtime null check for \p Ptr can be omitted.
-static bool canOmitPointerNullCheck(llvm::Value *Ptr) {
- // Note: do not perform any constant-folding in this function. That is best
- // left to the IR builder.
-
- // Pointers to alloca'd memory are non-null.
- return isa<llvm::AllocaInst>(Ptr->stripPointerCastsNoFollowAliases());
-}
-
void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
llvm::Value *Ptr, QualType Ty,
CharUnits Alignment,
@@ -560,11 +551,16 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
SmallVector<std::pair<llvm::Value *, SanitizerMask>, 3> Checks;
llvm::BasicBlock *Done = nullptr;
+ // Quickly determine whether we have a pointer to an alloca. It's possible
+ // to skip null checks, and some alignment checks, for these pointers. This
+ // can reduce compile-time significantly.
+ auto PtrToAlloca =
+ dyn_cast<llvm::AllocaInst>(Ptr->stripPointerCastsNoFollowAliases());
+
bool AllowNullPointers = TCK == TCK_DowncastPointer || TCK == TCK_Upcast ||
TCK == TCK_UpcastToVirtualBase;
if ((SanOpts.has(SanitizerKind::Null) || AllowNullPointers) &&
- !SkippedChecks.has(SanitizerKind::Null) &&
- !canOmitPointerNullCheck(Ptr)) {
+ !SkippedChecks.has(SanitizerKind::Null) && !PtrToAlloca) {
// The glvalue must not be an empty glvalue.
llvm::Value *IsNonNull = Builder.CreateIsNotNull(Ptr);
@@ -617,7 +613,8 @@ void CodeGenFunction::EmitTypeCheck(TypeCheckKind TCK, SourceLocation Loc,
AlignVal = getContext().getTypeAlignInChars(Ty).getQuantity();
// The glvalue must be suitably aligned.
- if (AlignVal > 1) {
+ if (AlignVal > 1 &&
+ (!PtrToAlloca || PtrToAlloca->getAlignment() < AlignVal)) {
llvm::Value *Align =
Builder.CreateAnd(Builder.CreatePtrToInt(Ptr, IntPtrTy),
llvm::ConstantInt::get(IntPtrTy, AlignVal - 1));
diff --git a/lib/CodeGen/CGOpenMPRuntime.cpp b/lib/CodeGen/CGOpenMPRuntime.cpp
index 874b6a69e513f..d1a706b8821ef 100644
--- a/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -2466,16 +2466,14 @@ static int addMonoNonMonoModifier(OpenMPSchedType Schedule,
return Schedule | Modifier;
}
-void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
- SourceLocation Loc,
- const OpenMPScheduleTy &ScheduleKind,
- unsigned IVSize, bool IVSigned,
- bool Ordered, llvm::Value *UB,
- llvm::Value *Chunk) {
+void CGOpenMPRuntime::emitForDispatchInit(
+ CodeGenFunction &CGF, SourceLocation Loc,
+ const OpenMPScheduleTy &ScheduleKind, unsigned IVSize, bool IVSigned,
+ bool Ordered, const DispatchRTInput &DispatchValues) {
if (!CGF.HaveInsertPoint())
return;
- OpenMPSchedType Schedule =
- getRuntimeSchedule(ScheduleKind.Schedule, Chunk != nullptr, Ordered);
+ OpenMPSchedType Schedule = getRuntimeSchedule(
+ ScheduleKind.Schedule, DispatchValues.Chunk != nullptr, Ordered);
assert(Ordered ||
(Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked &&
@@ -2486,14 +2484,14 @@ void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
// kmp_int[32|64] stride, kmp_int[32|64] chunk);
// If the Chunk was not specified in the clause - use default value 1.
- if (Chunk == nullptr)
- Chunk = CGF.Builder.getIntN(IVSize, 1);
+ llvm::Value *Chunk = DispatchValues.Chunk ? DispatchValues.Chunk
+ : CGF.Builder.getIntN(IVSize, 1);
llvm::Value *Args[] = {
emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
CGF.Builder.getInt32(addMonoNonMonoModifier(
Schedule, ScheduleKind.M1, ScheduleKind.M2)), // Schedule type
- CGF.Builder.getIntN(IVSize, 0), // Lower
- UB, // Upper
+ DispatchValues.LB, // Lower
+ DispatchValues.UB, // Upper
CGF.Builder.getIntN(IVSize, 1), // Stride
Chunk // Chunk
};
diff --git a/lib/CodeGen/CGOpenMPRuntime.h b/lib/CodeGen/CGOpenMPRuntime.h
index 7901a6b7a8fce..6f460f121791e 100644
--- a/lib/CodeGen/CGOpenMPRuntime.h
+++ b/lib/CodeGen/CGOpenMPRuntime.h
@@ -672,16 +672,50 @@ public:
///
virtual bool isDynamic(OpenMPScheduleClauseKind ScheduleKind) const;
+ /// struct with the values to be passed to the dispatch runtime function
+ struct DispatchRTInput {
+ /// Loop lower bound
+ llvm::Value *LB = nullptr;
+ /// Loop upper bound
+ llvm::Value *UB = nullptr;
+ /// Chunk size specified using 'schedule' clause (nullptr if chunk
+ /// was not specified)
+ llvm::Value *Chunk = nullptr;
+ DispatchRTInput() = default;
+ DispatchRTInput(llvm::Value *LB, llvm::Value *UB, llvm::Value *Chunk)
+ : LB(LB), UB(UB), Chunk(Chunk) {}
+ };
+
+ /// Call the appropriate runtime routine to initialize it before start
+ /// of loop.
+
+ /// This is used for non static scheduled types and when the ordered
+ /// clause is present on the loop construct.
+ /// Depending on the loop schedule, it is necessary to call some runtime
+ /// routine before start of the OpenMP loop to get the loop upper / lower
+ /// bounds \a LB and \a UB and stride \a ST.
+ ///
+ /// \param CGF Reference to current CodeGenFunction.
+ /// \param Loc Clang source location.
+ /// \param ScheduleKind Schedule kind, specified by the 'schedule' clause.
+ /// \param IVSize Size of the iteration variable in bits.
+ /// \param IVSigned Sign of the interation variable.
+ /// \param Ordered true if loop is ordered, false otherwise.
+ /// \param DispatchValues struct containing llvm values for lower bound, upper
+ /// bound, and chunk expression.
+ /// For the default (nullptr) value, the chunk 1 will be used.
+ ///
virtual void emitForDispatchInit(CodeGenFunction &CGF, SourceLocation Loc,
const OpenMPScheduleTy &ScheduleKind,
unsigned IVSize, bool IVSigned, bool Ordered,
- llvm::Value *UB,
- llvm::Value *Chunk = nullptr);
+ const DispatchRTInput &DispatchValues);
/// \brief Call the appropriate runtime routine to initialize it before start
/// of loop.
///
- /// Depending on the loop schedule, it is nesessary to call some runtime
+ /// This is used only in case of static schedule, when the user did not
+ /// specify a ordered clause on the loop construct.
+ /// Depending on the loop schedule, it is necessary to call some runtime
/// routine before start of the OpenMP loop to get the loop upper / lower
/// bounds \a LB and \a UB and stride \a ST.
///
diff --git a/lib/CodeGen/CGStmtOpenMP.cpp b/lib/CodeGen/CGStmtOpenMP.cpp
index 22269e42c7a00..f738dd0750faa 100644
--- a/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/lib/CodeGen/CGStmtOpenMP.cpp
@@ -87,7 +87,8 @@ public:
class OMPParallelScope final : public OMPLexicalScope {
bool EmitPreInitStmt(const OMPExecutableDirective &S) {
OpenMPDirectiveKind Kind = S.getDirectiveKind();
- return !isOpenMPTargetExecutionDirective(Kind) &&
+ return !(isOpenMPTargetExecutionDirective(Kind) ||
+ isOpenMPLoopBoundSharingDirective(Kind)) &&
isOpenMPParallelDirective(Kind);
}
@@ -1249,10 +1250,20 @@ static void emitPostUpdateForReductionClause(
CGF.EmitBlock(DoneBB, /*IsFinished=*/true);
}
-static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
- const OMPExecutableDirective &S,
- OpenMPDirectiveKind InnermostKind,
- const RegionCodeGenTy &CodeGen) {
+namespace {
+/// Codegen lambda for appending distribute lower and upper bounds to outlined
+/// parallel function. This is necessary for combined constructs such as
+/// 'distribute parallel for'
+typedef llvm::function_ref<void(CodeGenFunction &,
+ const OMPExecutableDirective &,
+ llvm::SmallVectorImpl<llvm::Value *> &)>
+ CodeGenBoundParametersTy;
+} // anonymous namespace
+
+static void emitCommonOMPParallelDirective(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen,
+ const CodeGenBoundParametersTy &CodeGenBoundParameters) {
const CapturedStmt *CS = S.getCapturedStmt(OMPD_parallel);
auto OutlinedFn = CGF.CGM.getOpenMPRuntime().emitParallelOutlinedFunction(
S, *CS->getCapturedDecl()->param_begin(), InnermostKind, CodeGen);
@@ -1279,11 +1290,20 @@ static void emitCommonOMPParallelDirective(CodeGenFunction &CGF,
OMPParallelScope Scope(CGF, S);
llvm::SmallVector<llvm::Value *, 16> CapturedVars;
+ // Combining 'distribute' with 'for' requires sharing each 'distribute' chunk
+ // lower and upper bounds with the pragma 'for' chunking mechanism.
+ // The following lambda takes care of appending the lower and upper bound
+ // parameters when necessary
+ CodeGenBoundParameters(CGF, S, CapturedVars);
CGF.GenerateOpenMPCapturedVars(*CS, CapturedVars);
CGF.CGM.getOpenMPRuntime().emitParallelCall(CGF, S.getLocStart(), OutlinedFn,
CapturedVars, IfCond);
}
+static void emitEmptyBoundParameters(CodeGenFunction &,
+ const OMPExecutableDirective &,
+ llvm::SmallVectorImpl<llvm::Value *> &) {}
+
void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
// Emit parallel region as a standalone region.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
@@ -1304,7 +1324,8 @@ void CodeGenFunction::EmitOMPParallelDirective(const OMPParallelDirective &S) {
CGF.EmitStmt(cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_parallel, CodeGen,
+ emitEmptyBoundParameters);
emitPostUpdateForReductionClause(
*this, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
}
@@ -1649,6 +1670,13 @@ void CodeGenFunction::EmitOMPSimdFinal(
EmitBlock(DoneBB, /*IsFinished=*/true);
}
+static void emitOMPLoopBodyWithStopPoint(CodeGenFunction &CGF,
+ const OMPLoopDirective &S,
+ CodeGenFunction::JumpDest LoopExit) {
+ CGF.EmitOMPLoopBody(S, LoopExit);
+ CGF.EmitStopPoint(&S);
+};
+
void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPLoopScope PreInitScope(CGF, S);
@@ -1731,9 +1759,12 @@ void CodeGenFunction::EmitOMPSimdDirective(const OMPSimdDirective &S) {
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_simd, CodeGen);
}
-void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
- const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+void CodeGenFunction::EmitOMPOuterLoop(
+ bool DynamicOrOrdered, bool IsMonotonic, const OMPLoopDirective &S,
+ CodeGenFunction::OMPPrivateScope &LoopScope,
+ const CodeGenFunction::OMPLoopArguments &LoopArgs,
+ const CodeGenFunction::CodeGenLoopTy &CodeGenLoop,
+ const CodeGenFunction::CodeGenOrderedTy &CodeGenOrdered) {
auto &RT = CGM.getOpenMPRuntime();
const Expr *IVExpr = S.getIterationVariable();
@@ -1751,15 +1782,18 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
llvm::Value *BoolCondVal = nullptr;
if (!DynamicOrOrdered) {
- // UB = min(UB, GlobalUB)
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ // UB = min(UB, GlobalUB) or
+ // UB = min(UB, PrevUB) for combined loop sharing constructs (e.g.
+ // 'distribute parallel for')
+ EmitIgnoredExpr(LoopArgs.EUB);
// IV = LB
- EmitIgnoredExpr(S.getInit());
+ EmitIgnoredExpr(LoopArgs.Init);
// IV < UB
- BoolCondVal = EvaluateExprAsBool(S.getCond());
+ BoolCondVal = EvaluateExprAsBool(LoopArgs.Cond);
} else {
- BoolCondVal = RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, IL,
- LB, UB, ST);
+ BoolCondVal =
+ RT.emitForNext(*this, S.getLocStart(), IVSize, IVSigned, LoopArgs.IL,
+ LoopArgs.LB, LoopArgs.UB, LoopArgs.ST);
}
// If there are any cleanups between here and the loop-exit scope,
@@ -1779,7 +1813,7 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
// Emit "IV = LB" (in case of static schedule, we have already calculated new
// LB for loop condition and emitted it above).
if (DynamicOrOrdered)
- EmitIgnoredExpr(S.getInit());
+ EmitIgnoredExpr(LoopArgs.Init);
// Create a block for the increment.
auto Continue = getJumpDestInCurrentScope("omp.dispatch.inc");
@@ -1793,24 +1827,27 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
EmitOMPSimdInit(S, IsMonotonic);
SourceLocation Loc = S.getLocStart();
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(), S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
- },
- [Ordered, IVSize, IVSigned, Loc](CodeGenFunction &CGF) {
- if (Ordered) {
- CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(
- CGF, Loc, IVSize, IVSigned);
- }
- });
+
+ // when 'distribute' is not combined with a 'for':
+ // while (idx <= UB) { BODY; ++idx; }
+ // when 'distribute' is combined with a 'for'
+ // (e.g. 'distribute parallel for')
+ // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ EmitOMPInnerLoop(
+ S, LoopScope.requiresCleanups(), LoopArgs.Cond, LoopArgs.IncExpr,
+ [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
+ CodeGenLoop(CGF, S, LoopExit);
+ },
+ [IVSize, IVSigned, Loc, &CodeGenOrdered](CodeGenFunction &CGF) {
+ CodeGenOrdered(CGF, Loc, IVSize, IVSigned);
+ });
EmitBlock(Continue.getBlock());
BreakContinueStack.pop_back();
if (!DynamicOrOrdered) {
// Emit "LB = LB + Stride", "UB = UB + Stride".
- EmitIgnoredExpr(S.getNextLowerBound());
- EmitIgnoredExpr(S.getNextUpperBound());
+ EmitIgnoredExpr(LoopArgs.NextLB);
+ EmitIgnoredExpr(LoopArgs.NextUB);
}
EmitBranch(CondBlock);
@@ -1829,7 +1866,8 @@ void CodeGenFunction::EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
void CodeGenFunction::EmitOMPForOuterLoop(
const OpenMPScheduleTy &ScheduleKind, bool IsMonotonic,
const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+ const OMPLoopArguments &LoopArgs,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds) {
auto &RT = CGM.getOpenMPRuntime();
// Dynamic scheduling of the outer loop (dynamic, guided, auto, runtime).
@@ -1838,7 +1876,7 @@ void CodeGenFunction::EmitOMPForOuterLoop(
assert((Ordered ||
!RT.isStaticNonchunked(ScheduleKind.Schedule,
- /*Chunked=*/Chunk != nullptr)) &&
+ LoopArgs.Chunk != nullptr)) &&
"static non-chunked schedule does not need outer loop");
// Emit outer loop.
@@ -1896,22 +1934,46 @@ void CodeGenFunction::EmitOMPForOuterLoop(
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
if (DynamicOrOrdered) {
- llvm::Value *UBVal = EmitScalarExpr(S.getLastIteration());
+ auto DispatchBounds = CGDispatchBounds(*this, S, LoopArgs.LB, LoopArgs.UB);
+ llvm::Value *LBVal = DispatchBounds.first;
+ llvm::Value *UBVal = DispatchBounds.second;
+ CGOpenMPRuntime::DispatchRTInput DipatchRTInputValues = {LBVal, UBVal,
+ LoopArgs.Chunk};
RT.emitForDispatchInit(*this, S.getLocStart(), ScheduleKind, IVSize,
- IVSigned, Ordered, UBVal, Chunk);
+ IVSigned, Ordered, DipatchRTInputValues);
} else {
RT.emitForStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize, IVSigned,
- Ordered, IL, LB, UB, ST, Chunk);
+ Ordered, LoopArgs.IL, LoopArgs.LB, LoopArgs.UB,
+ LoopArgs.ST, LoopArgs.Chunk);
}
- EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, Ordered, LB, UB,
- ST, IL, Chunk);
+ auto &&CodeGenOrdered = [Ordered](CodeGenFunction &CGF, SourceLocation Loc,
+ const unsigned IVSize,
+ const bool IVSigned) {
+ if (Ordered) {
+ CGF.CGM.getOpenMPRuntime().emitForOrderedIterationEnd(CGF, Loc, IVSize,
+ IVSigned);
+ }
+ };
+
+ OMPLoopArguments OuterLoopArgs(LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
+ LoopArgs.IL, LoopArgs.Chunk, LoopArgs.EUB);
+ OuterLoopArgs.IncExpr = S.getInc();
+ OuterLoopArgs.Init = S.getInit();
+ OuterLoopArgs.Cond = S.getCond();
+ OuterLoopArgs.NextLB = S.getNextLowerBound();
+ OuterLoopArgs.NextUB = S.getNextUpperBound();
+ EmitOMPOuterLoop(DynamicOrOrdered, IsMonotonic, S, LoopScope, OuterLoopArgs,
+ emitOMPLoopBodyWithStopPoint, CodeGenOrdered);
}
+static void emitEmptyOrdered(CodeGenFunction &, SourceLocation Loc,
+ const unsigned IVSize, const bool IVSigned) {}
+
void CodeGenFunction::EmitOMPDistributeOuterLoop(
- OpenMPDistScheduleClauseKind ScheduleKind,
- const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk) {
+ OpenMPDistScheduleClauseKind ScheduleKind, const OMPLoopDirective &S,
+ OMPPrivateScope &LoopScope, const OMPLoopArguments &LoopArgs,
+ const CodeGenLoopTy &CodeGenLoopContent) {
auto &RT = CGM.getOpenMPRuntime();
@@ -1924,26 +1986,159 @@ void CodeGenFunction::EmitOMPDistributeOuterLoop(
const unsigned IVSize = getContext().getTypeSize(IVExpr->getType());
const bool IVSigned = IVExpr->getType()->hasSignedIntegerRepresentation();
- RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind,
- IVSize, IVSigned, /* Ordered = */ false,
- IL, LB, UB, ST, Chunk);
+ RT.emitDistributeStaticInit(*this, S.getLocStart(), ScheduleKind, IVSize,
+ IVSigned, /* Ordered = */ false, LoopArgs.IL,
+ LoopArgs.LB, LoopArgs.UB, LoopArgs.ST,
+ LoopArgs.Chunk);
- EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false,
- S, LoopScope, /* Ordered = */ false, LB, UB, ST, IL, Chunk);
+ // for combined 'distribute' and 'for' the increment expression of distribute
+ // is store in DistInc. For 'distribute' alone, it is in Inc.
+ Expr *IncExpr;
+ if (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()))
+ IncExpr = S.getDistInc();
+ else
+ IncExpr = S.getInc();
+
+ // this routine is shared by 'omp distribute parallel for' and
+ // 'omp distribute': select the right EUB expression depending on the
+ // directive
+ OMPLoopArguments OuterLoopArgs;
+ OuterLoopArgs.LB = LoopArgs.LB;
+ OuterLoopArgs.UB = LoopArgs.UB;
+ OuterLoopArgs.ST = LoopArgs.ST;
+ OuterLoopArgs.IL = LoopArgs.IL;
+ OuterLoopArgs.Chunk = LoopArgs.Chunk;
+ OuterLoopArgs.EUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedEnsureUpperBound()
+ : S.getEnsureUpperBound();
+ OuterLoopArgs.IncExpr = IncExpr;
+ OuterLoopArgs.Init = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedInit()
+ : S.getInit();
+ OuterLoopArgs.Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedCond()
+ : S.getCond();
+ OuterLoopArgs.NextLB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedNextLowerBound()
+ : S.getNextLowerBound();
+ OuterLoopArgs.NextUB = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedNextUpperBound()
+ : S.getNextUpperBound();
+
+ EmitOMPOuterLoop(/* DynamicOrOrdered = */ false, /* IsMonotonic = */ false, S,
+ LoopScope, OuterLoopArgs, CodeGenLoopContent,
+ emitEmptyOrdered);
+}
+
+/// Emit a helper variable and return corresponding lvalue.
+static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
+ const DeclRefExpr *Helper) {
+ auto VDecl = cast<VarDecl>(Helper->getDecl());
+ CGF.EmitVarDecl(*VDecl);
+ return CGF.EmitLValue(Helper);
+}
+
+static std::pair<LValue, LValue>
+emitDistributeParallelForInnerBounds(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ LValue LB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
+
+ // When composing 'distribute' with 'for' (e.g. as in 'distribute
+ // parallel for') we need to use the 'distribute'
+ // chunk lower and upper bounds rather than the whole loop iteration
+ // space. These are parameters to the outlined function for 'parallel'
+ // and we copy the bounds of the previous schedule into the
+ // the current ones.
+ LValue PrevLB = CGF.EmitLValue(LS.getPrevLowerBoundVariable());
+ LValue PrevUB = CGF.EmitLValue(LS.getPrevUpperBoundVariable());
+ llvm::Value *PrevLBVal = CGF.EmitLoadOfScalar(PrevLB, SourceLocation());
+ PrevLBVal = CGF.EmitScalarConversion(
+ PrevLBVal, LS.getPrevLowerBoundVariable()->getType(),
+ LS.getIterationVariable()->getType(), SourceLocation());
+ llvm::Value *PrevUBVal = CGF.EmitLoadOfScalar(PrevUB, SourceLocation());
+ PrevUBVal = CGF.EmitScalarConversion(
+ PrevUBVal, LS.getPrevUpperBoundVariable()->getType(),
+ LS.getIterationVariable()->getType(), SourceLocation());
+
+ CGF.EmitStoreOfScalar(PrevLBVal, LB);
+ CGF.EmitStoreOfScalar(PrevUBVal, UB);
+
+ return {LB, UB};
+}
+
+/// if the 'for' loop has a dispatch schedule (e.g. dynamic, guided) then
+/// we need to use the LB and UB expressions generated by the worksharing
+/// code generation support, whereas in non combined situations we would
+/// just emit 0 and the LastIteration expression
+/// This function is necessary due to the difference of the LB and UB
+/// types for the RT emission routines for 'for_static_init' and
+/// 'for_dispatch_init'
+static std::pair<llvm::Value *, llvm::Value *>
+emitDistributeParallelForDispatchBounds(CodeGenFunction &CGF,
+ const OMPExecutableDirective &S,
+ Address LB, Address UB) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ const Expr *IVExpr = LS.getIterationVariable();
+ // when implementing a dynamic schedule for a 'for' combined with a
+ // 'distribute' (e.g. 'distribute parallel for'), the 'for' loop
+ // is not normalized as each team only executes its own assigned
+ // distribute chunk
+ QualType IteratorTy = IVExpr->getType();
+ llvm::Value *LBVal = CGF.EmitLoadOfScalar(LB, /*Volatile=*/false, IteratorTy,
+ SourceLocation());
+ llvm::Value *UBVal = CGF.EmitLoadOfScalar(UB, /*Volatile=*/false, IteratorTy,
+ SourceLocation());
+ return {LBVal, UBVal};
+};
+
+static void emitDistributeParallelForDistributeInnerBoundParams(
+ CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ llvm::SmallVectorImpl<llvm::Value *> &CapturedVars) {
+ const auto &Dir = cast<OMPLoopDirective>(S);
+ LValue LB =
+ CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedLowerBoundVariable()));
+ auto LBCast = CGF.Builder.CreateIntCast(
+ CGF.Builder.CreateLoad(LB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
+ CapturedVars.push_back(LBCast);
+ LValue UB =
+ CGF.EmitLValue(cast<DeclRefExpr>(Dir.getCombinedUpperBoundVariable()));
+
+ auto UBCast = CGF.Builder.CreateIntCast(
+ CGF.Builder.CreateLoad(UB.getAddress()), CGF.SizeTy, /*isSigned=*/false);
+ CapturedVars.push_back(UBCast);
+};
+
+static void
+emitInnerParallelForWhenCombined(CodeGenFunction &CGF,
+ const OMPLoopDirective &S,
+ CodeGenFunction::JumpDest LoopExit) {
+ auto &&CGInlinedWorksharingLoop = [&S](CodeGenFunction &CGF,
+ PrePostActionTy &) {
+ CGF.EmitOMPWorksharingLoop(S, S.getPrevEnsureUpperBound(),
+ emitDistributeParallelForInnerBounds,
+ emitDistributeParallelForDispatchBounds);
+ };
+
+ emitCommonOMPParallelDirective(
+ CGF, S, OMPD_for, CGInlinedWorksharingLoop,
+ emitDistributeParallelForDistributeInnerBoundParams);
}
void CodeGenFunction::EmitOMPDistributeParallelForDirective(
const OMPDistributeParallelForDirective &S) {
+ auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
+ CGF.EmitOMPDistributeLoop(S, emitInnerParallelForWhenCombined,
+ S.getDistInc());
+ };
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
- CGM.getOpenMPRuntime().emitInlinedDirective(
- *this, OMPD_distribute_parallel_for,
- [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- OMPLoopScope PreInitScope(CGF, S);
- OMPCancelStackRAII CancelRegion(CGF, OMPD_distribute_parallel_for,
- /*HasCancel=*/false);
- CGF.EmitStmt(
- cast<CapturedStmt>(S.getAssociatedStmt())->getCapturedStmt());
- });
+ OMPCancelStackRAII CancelRegion(*this, OMPD_distribute_parallel_for,
+ /*HasCancel=*/false);
+ CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
+ /*HasCancel=*/false);
}
void CodeGenFunction::EmitOMPDistributeParallelForSimdDirective(
@@ -2081,14 +2276,6 @@ void CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDirective(
});
}
-/// \brief Emit a helper variable and return corresponding lvalue.
-static LValue EmitOMPHelperVar(CodeGenFunction &CGF,
- const DeclRefExpr *Helper) {
- auto VDecl = cast<VarDecl>(Helper->getDecl());
- CGF.EmitVarDecl(*VDecl);
- return CGF.EmitLValue(Helper);
-}
-
namespace {
struct ScheduleKindModifiersTy {
OpenMPScheduleClauseKind Kind;
@@ -2101,7 +2288,10 @@ namespace {
};
} // namespace
-bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
+bool CodeGenFunction::EmitOMPWorksharingLoop(
+ const OMPLoopDirective &S, Expr *EUB,
+ const CodeGenLoopBoundsTy &CodeGenLoopBounds,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -2151,10 +2341,10 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
emitAlignedClause(*this, S);
EmitOMPLinearClauseInit(S);
// Emit helper vars inits.
- LValue LB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
- LValue UB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+
+ std::pair<LValue, LValue> Bounds = CodeGenLoopBounds(*this, S);
+ LValue LB = Bounds.first;
+ LValue UB = Bounds.second;
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
@@ -2240,9 +2430,11 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_monotonic;
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
+ const OMPLoopArguments LoopArguments(LB.getAddress(), UB.getAddress(),
+ ST.getAddress(), IL.getAddress(),
+ Chunk, EUB);
EmitOMPForOuterLoop(ScheduleKind, IsMonotonic, S, LoopScope, Ordered,
- LB.getAddress(), UB.getAddress(), ST.getAddress(),
- IL.getAddress(), Chunk);
+ LoopArguments, CGDispatchBounds);
}
if (isOpenMPSimdDirective(S.getDirectiveKind())) {
EmitOMPSimdFinal(S,
@@ -2280,12 +2472,42 @@ bool CodeGenFunction::EmitOMPWorksharingLoop(const OMPLoopDirective &S) {
return HasLastprivateClause;
}
+/// The following two functions generate expressions for the loop lower
+/// and upper bounds in case of static and dynamic (dispatch) schedule
+/// of the associated 'for' or 'distribute' loop.
+static std::pair<LValue, LValue>
+emitForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ LValue LB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getLowerBoundVariable()));
+ LValue UB =
+ EmitOMPHelperVar(CGF, cast<DeclRefExpr>(LS.getUpperBoundVariable()));
+ return {LB, UB};
+}
+
+/// When dealing with dispatch schedules (e.g. dynamic, guided) we do not
+/// consider the lower and upper bound expressions generated by the
+/// worksharing loop support, but we use 0 and the iteration space size as
+/// constants
+static std::pair<llvm::Value *, llvm::Value *>
+emitDispatchForLoopBounds(CodeGenFunction &CGF, const OMPExecutableDirective &S,
+ Address LB, Address UB) {
+ const OMPLoopDirective &LS = cast<OMPLoopDirective>(S);
+ const Expr *IVExpr = LS.getIterationVariable();
+ const unsigned IVSize = CGF.getContext().getTypeSize(IVExpr->getType());
+ llvm::Value *LBVal = CGF.Builder.getIntN(IVSize, 0);
+ llvm::Value *UBVal = CGF.EmitScalarExpr(LS.getLastIteration());
+ return {LBVal, UBVal};
+}
+
void CodeGenFunction::EmitOMPForDirective(const OMPForDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
OMPCancelStackRAII CancelRegion(CGF, OMPD_for, S.hasCancel());
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
{
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
@@ -2303,7 +2525,9 @@ void CodeGenFunction::EmitOMPForSimdDirective(const OMPForSimdDirective &S) {
bool HasLastprivates = false;
auto &&CodeGen = [&S, &HasLastprivates](CodeGenFunction &CGF,
PrePostActionTy &) {
- HasLastprivates = CGF.EmitOMPWorksharingLoop(S);
+ HasLastprivates = CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(),
+ emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
{
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
@@ -2554,9 +2778,11 @@ void CodeGenFunction::EmitOMPParallelForDirective(
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
OMPCancelStackRAII CancelRegion(CGF, OMPD_parallel_for, S.hasCancel());
- CGF.EmitOMPWorksharingLoop(S);
+ CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_for, CodeGen,
+ emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPParallelForSimdDirective(
@@ -2564,9 +2790,11 @@ void CodeGenFunction::EmitOMPParallelForSimdDirective(
// Emit directive as a combined directive that consists of two implicit
// directives: 'parallel' with 'for' directive.
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitOMPWorksharingLoop(S);
+ CGF.EmitOMPWorksharingLoop(S, S.getEnsureUpperBound(), emitForLoopBounds,
+ emitDispatchForLoopBounds);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_simd, CodeGen,
+ emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPParallelSectionsDirective(
@@ -2576,7 +2804,8 @@ void CodeGenFunction::EmitOMPParallelSectionsDirective(
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
CGF.EmitSections(S);
};
- emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen);
+ emitCommonOMPParallelDirective(*this, S, OMPD_sections, CodeGen,
+ emitEmptyBoundParameters);
}
void CodeGenFunction::EmitOMPTaskBasedDirective(const OMPExecutableDirective &S,
@@ -2794,7 +3023,9 @@ void CodeGenFunction::EmitOMPFlushDirective(const OMPFlushDirective &S) {
}(), S.getLocStart());
}
-void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
+void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S,
+ const CodeGenLoopTy &CodeGenLoop,
+ Expr *IncExpr) {
// Emit the loop iteration variable.
auto IVExpr = cast<DeclRefExpr>(S.getIterationVariable());
auto IVDecl = cast<VarDecl>(IVExpr->getDecl());
@@ -2835,10 +3066,17 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
// Emit 'then' code.
{
// Emit helper vars inits.
- LValue LB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getLowerBoundVariable()));
- LValue UB =
- EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getUpperBoundVariable()));
+
+ LValue LB = EmitOMPHelperVar(
+ *this, cast<DeclRefExpr>(
+ (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedLowerBoundVariable()
+ : S.getLowerBoundVariable())));
+ LValue UB = EmitOMPHelperVar(
+ *this, cast<DeclRefExpr>(
+ (isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedUpperBoundVariable()
+ : S.getUpperBoundVariable())));
LValue ST =
EmitOMPHelperVar(*this, cast<DeclRefExpr>(S.getStrideVariable()));
LValue IL =
@@ -2890,15 +3128,25 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
auto LoopExit =
getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit"));
// UB = min(UB, GlobalUB);
- EmitIgnoredExpr(S.getEnsureUpperBound());
+ EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedEnsureUpperBound()
+ : S.getEnsureUpperBound());
// IV = LB;
- EmitIgnoredExpr(S.getInit());
+ EmitIgnoredExpr(isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedInit()
+ : S.getInit());
+
+ Expr *Cond = isOpenMPLoopBoundSharingDirective(S.getDirectiveKind())
+ ? S.getCombinedCond()
+ : S.getCond();
+
+ // for distribute alone, codegen
// while (idx <= UB) { BODY; ++idx; }
- EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), S.getCond(),
- S.getInc(),
- [&S, LoopExit](CodeGenFunction &CGF) {
- CGF.EmitOMPLoopBody(S, LoopExit);
- CGF.EmitStopPoint(&S);
+ // when combined with 'for' (e.g. as in 'distribute parallel for')
+ // while (idx <= UB) { <CodeGen rest of pragma>; idx += ST; }
+ EmitOMPInnerLoop(S, LoopScope.requiresCleanups(), Cond, IncExpr,
+ [&S, LoopExit, &CodeGenLoop](CodeGenFunction &CGF) {
+ CodeGenLoop(CGF, S, LoopExit);
},
[](CodeGenFunction &) {});
EmitBlock(LoopExit.getBlock());
@@ -2907,9 +3155,11 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
} else {
// Emit the outer loop, which requests its work chunk [LB..UB] from
// runtime and runs the inner loop to process it.
- EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope,
- LB.getAddress(), UB.getAddress(), ST.getAddress(),
- IL.getAddress(), Chunk);
+ const OMPLoopArguments LoopArguments = {
+ LB.getAddress(), UB.getAddress(), ST.getAddress(), IL.getAddress(),
+ Chunk};
+ EmitOMPDistributeOuterLoop(ScheduleKind, S, LoopScope, LoopArguments,
+ CodeGenLoop);
}
// Emit final copy of the lastprivate variables if IsLastIter != 0.
@@ -2931,7 +3181,8 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPDistributeDirective &S) {
void CodeGenFunction::EmitOMPDistributeDirective(
const OMPDistributeDirective &S) {
auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &) {
- CGF.EmitOMPDistributeLoop(S);
+
+ CGF.EmitOMPDistributeLoop(S, emitOMPLoopBodyWithStopPoint, S.getInc());
};
OMPLexicalScope Scope(*this, S, /*AsInlined=*/true);
CGM.getOpenMPRuntime().emitInlinedDirective(*this, OMPD_distribute, CodeGen,
@@ -3840,7 +4091,8 @@ static void emitTargetParallelRegion(CodeGenFunction &CGF,
CGF.EmitStmt(CS->getCapturedStmt());
CGF.EmitOMPReductionClauseFinal(S, /*ReductionKind=*/OMPD_parallel);
};
- emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen);
+ emitCommonOMPParallelDirective(CGF, S, OMPD_parallel, CodeGen,
+ emitEmptyBoundParameters);
emitPostUpdateForReductionClause(
CGF, S, [](CodeGenFunction &) -> llvm::Value * { return nullptr; });
}
diff --git a/lib/CodeGen/CodeGenFunction.h b/lib/CodeGen/CodeGenFunction.h
index fa72019eb08b2..1ded824ba5b02 100644
--- a/lib/CodeGen/CodeGenFunction.h
+++ b/lib/CodeGen/CodeGenFunction.h
@@ -175,6 +175,25 @@ public:
// because of jumps.
VarBypassDetector Bypasses;
+ // CodeGen lambda for loops and support for ordered clause
+ typedef llvm::function_ref<void(CodeGenFunction &, const OMPLoopDirective &,
+ JumpDest)>
+ CodeGenLoopTy;
+ typedef llvm::function_ref<void(CodeGenFunction &, SourceLocation,
+ const unsigned, const bool)>
+ CodeGenOrderedTy;
+
+ // Codegen lambda for loop bounds in worksharing loop constructs
+ typedef llvm::function_ref<std::pair<LValue, LValue>(
+ CodeGenFunction &, const OMPExecutableDirective &S)>
+ CodeGenLoopBoundsTy;
+
+ // Codegen lambda for loop bounds in dispatch-based loop implementation
+ typedef llvm::function_ref<std::pair<llvm::Value *, llvm::Value *>(
+ CodeGenFunction &, const OMPExecutableDirective &S, Address LB,
+ Address UB)>
+ CodeGenDispatchBoundsTy;
+
/// \brief CGBuilder insert helper. This function is called after an
/// instruction is created using Builder.
void InsertHelper(llvm::Instruction *I, const llvm::Twine &Name,
@@ -2756,7 +2775,6 @@ public:
void EmitOMPTaskLoopDirective(const OMPTaskLoopDirective &S);
void EmitOMPTaskLoopSimdDirective(const OMPTaskLoopSimdDirective &S);
void EmitOMPDistributeDirective(const OMPDistributeDirective &S);
- void EmitOMPDistributeLoop(const OMPDistributeDirective &S);
void EmitOMPDistributeParallelForDirective(
const OMPDistributeParallelForDirective &S);
void EmitOMPDistributeParallelForSimdDirective(
@@ -2813,32 +2831,78 @@ public:
void EmitOMPPrivateLoopCounters(const OMPLoopDirective &S,
OMPPrivateScope &LoopScope);
+ /// Helper for the OpenMP loop directives.
+ void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
+
+ /// \brief Emit code for the worksharing loop-based directive.
+ /// \return true, if this construct has any lastprivate clause, false -
+ /// otherwise.
+ bool EmitOMPWorksharingLoop(const OMPLoopDirective &S, Expr *EUB,
+ const CodeGenLoopBoundsTy &CodeGenLoopBounds,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds);
+
private:
/// Helpers for blocks
llvm::Value *EmitBlockLiteral(const CGBlockInfo &Info);
/// Helpers for the OpenMP loop directives.
- void EmitOMPLoopBody(const OMPLoopDirective &D, JumpDest LoopExit);
void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false);
void EmitOMPSimdFinal(
const OMPLoopDirective &D,
const llvm::function_ref<llvm::Value *(CodeGenFunction &)> &CondGen);
- /// \brief Emit code for the worksharing loop-based directive.
- /// \return true, if this construct has any lastprivate clause, false -
- /// otherwise.
- bool EmitOMPWorksharingLoop(const OMPLoopDirective &S);
- void EmitOMPOuterLoop(bool IsMonotonic, bool DynamicOrOrdered,
- const OMPLoopDirective &S, OMPPrivateScope &LoopScope, bool Ordered,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
+
+ void EmitOMPDistributeLoop(const OMPLoopDirective &S,
+ const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr);
+
+ /// struct with the values to be passed to the OpenMP loop-related functions
+ struct OMPLoopArguments {
+ /// loop lower bound
+ Address LB = Address::invalid();
+ /// loop upper bound
+ Address UB = Address::invalid();
+ /// loop stride
+ Address ST = Address::invalid();
+ /// isLastIteration argument for runtime functions
+ Address IL = Address::invalid();
+ /// Chunk value generated by sema
+ llvm::Value *Chunk = nullptr;
+ /// EnsureUpperBound
+ Expr *EUB = nullptr;
+ /// IncrementExpression
+ Expr *IncExpr = nullptr;
+ /// Loop initialization
+ Expr *Init = nullptr;
+ /// Loop exit condition
+ Expr *Cond = nullptr;
+ /// Update of LB after a whole chunk has been executed
+ Expr *NextLB = nullptr;
+ /// Update of UB after a whole chunk has been executed
+ Expr *NextUB = nullptr;
+ OMPLoopArguments() = default;
+ OMPLoopArguments(Address LB, Address UB, Address ST, Address IL,
+ llvm::Value *Chunk = nullptr, Expr *EUB = nullptr,
+ Expr *IncExpr = nullptr, Expr *Init = nullptr,
+ Expr *Cond = nullptr, Expr *NextLB = nullptr,
+ Expr *NextUB = nullptr)
+ : LB(LB), UB(UB), ST(ST), IL(IL), Chunk(Chunk), EUB(EUB),
+ IncExpr(IncExpr), Init(Init), Cond(Cond), NextLB(NextLB),
+ NextUB(NextUB) {}
+ };
+ void EmitOMPOuterLoop(bool DynamicOrOrdered, bool IsMonotonic,
+ const OMPLoopDirective &S, OMPPrivateScope &LoopScope,
+ const OMPLoopArguments &LoopArgs,
+ const CodeGenLoopTy &CodeGenLoop,
+ const CodeGenOrderedTy &CodeGenOrdered);
void EmitOMPForOuterLoop(const OpenMPScheduleTy &ScheduleKind,
bool IsMonotonic, const OMPLoopDirective &S,
- OMPPrivateScope &LoopScope, bool Ordered, Address LB,
- Address UB, Address ST, Address IL,
- llvm::Value *Chunk);
- void EmitOMPDistributeOuterLoop(
- OpenMPDistScheduleClauseKind ScheduleKind,
- const OMPDistributeDirective &S, OMPPrivateScope &LoopScope,
- Address LB, Address UB, Address ST, Address IL, llvm::Value *Chunk);
+ OMPPrivateScope &LoopScope, bool Ordered,
+ const OMPLoopArguments &LoopArgs,
+ const CodeGenDispatchBoundsTy &CGDispatchBounds);
+ void EmitOMPDistributeOuterLoop(OpenMPDistScheduleClauseKind ScheduleKind,
+ const OMPLoopDirective &S,
+ OMPPrivateScope &LoopScope,
+ const OMPLoopArguments &LoopArgs,
+ const CodeGenLoopTy &CodeGenLoopContent);
/// \brief Emit code for sections directive.
void EmitSections(const OMPExecutableDirective &S);
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 19203973ff1b0..25d32f19d0e5e 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -565,12 +565,8 @@ void CodeGenModule::DecorateInstructionWithTBAA(llvm::Instruction *Inst,
void CodeGenModule::DecorateInstructionWithInvariantGroup(
llvm::Instruction *I, const CXXRecordDecl *RD) {
- llvm::Metadata *MD = CreateMetadataIdentifierForType(QualType(RD->getTypeForDecl(), 0));
- auto *MetaDataNode = dyn_cast<llvm::MDNode>(MD);
- // Check if we have to wrap MDString in MDNode.
- if (!MetaDataNode)
- MetaDataNode = llvm::MDNode::get(getLLVMContext(), MD);
- I->setMetadata(llvm::LLVMContext::MD_invariant_group, MetaDataNode);
+ I->setMetadata(llvm::LLVMContext::MD_invariant_group,
+ llvm::MDNode::get(getLLVMContext(), {}));
}
void CodeGenModule::Error(SourceLocation loc, StringRef message) {
diff --git a/lib/CodeGen/CodeGenPGO.cpp b/lib/CodeGen/CodeGenPGO.cpp
index 6acedc033a6ea..9e193531d0f60 100644
--- a/lib/CodeGen/CodeGenPGO.cpp
+++ b/lib/CodeGen/CodeGenPGO.cpp
@@ -666,7 +666,7 @@ void CodeGenPGO::mapRegionCounters(const Decl *D) {
}
bool CodeGenPGO::skipRegionMappingForDecl(const Decl *D) {
- if (SkipCoverageMapping)
+ if (!D->getBody())
return true;
// Don't map the functions in system headers.
diff --git a/lib/CodeGen/CodeGenPGO.h b/lib/CodeGen/CodeGenPGO.h
index 0026df570bce0..0759e65388b8f 100644
--- a/lib/CodeGen/CodeGenPGO.h
+++ b/lib/CodeGen/CodeGenPGO.h
@@ -40,14 +40,11 @@ private:
std::unique_ptr<llvm::InstrProfRecord> ProfRecord;
std::vector<uint64_t> RegionCounts;
uint64_t CurrentRegionCount;
- /// \brief A flag that is set to true when this function doesn't need
- /// to have coverage mapping data.
- bool SkipCoverageMapping;
public:
CodeGenPGO(CodeGenModule &CGM)
- : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0),
- FunctionHash(0), CurrentRegionCount(0), SkipCoverageMapping(false) {}
+ : CGM(CGM), NumValueSites({{0}}), NumRegionCounters(0), FunctionHash(0),
+ CurrentRegionCount(0) {}
/// Whether or not we have PGO region data for the current function. This is
/// false both when we have no data at all and when our data has been