aboutsummaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/IPO/OpenMPOpt.cpp')
-rw-r--r--llvm/lib/Transforms/IPO/OpenMPOpt.cpp255
1 files changed, 162 insertions, 93 deletions
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 2d765fb6ce6d..227ad8501f25 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -49,7 +49,6 @@
#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
-#include "llvm/Transforms/Utils/CodeExtractor.h"
#include <algorithm>
@@ -59,17 +58,16 @@ using namespace omp;
#define DEBUG_TYPE "openmp-opt"
static cl::opt<bool> DisableOpenMPOptimizations(
- "openmp-opt-disable", cl::ZeroOrMore,
- cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
- cl::init(false));
+ "openmp-opt-disable", cl::desc("Disable OpenMP specific optimizations."),
+ cl::Hidden, cl::init(false));
static cl::opt<bool> EnableParallelRegionMerging(
- "openmp-opt-enable-merging", cl::ZeroOrMore,
+ "openmp-opt-enable-merging",
cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
cl::init(false));
static cl::opt<bool>
- DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore,
+ DisableInternalization("openmp-opt-disable-internalization",
cl::desc("Disable function internalization."),
cl::Hidden, cl::init(false));
@@ -85,42 +83,47 @@ static cl::opt<bool> HideMemoryTransferLatency(
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptDeglobalization(
- "openmp-opt-disable-deglobalization", cl::ZeroOrMore,
+ "openmp-opt-disable-deglobalization",
cl::desc("Disable OpenMP optimizations involving deglobalization."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptSPMDization(
- "openmp-opt-disable-spmdization", cl::ZeroOrMore,
+ "openmp-opt-disable-spmdization",
cl::desc("Disable OpenMP optimizations involving SPMD-ization."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptFolding(
- "openmp-opt-disable-folding", cl::ZeroOrMore,
+ "openmp-opt-disable-folding",
cl::desc("Disable OpenMP optimizations involving folding."), cl::Hidden,
cl::init(false));
static cl::opt<bool> DisableOpenMPOptStateMachineRewrite(
- "openmp-opt-disable-state-machine-rewrite", cl::ZeroOrMore,
+ "openmp-opt-disable-state-machine-rewrite",
cl::desc("Disable OpenMP optimizations that replace the state machine."),
cl::Hidden, cl::init(false));
static cl::opt<bool> DisableOpenMPOptBarrierElimination(
- "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore,
+ "openmp-opt-disable-barrier-elimination",
cl::desc("Disable OpenMP optimizations that eliminate barriers."),
cl::Hidden, cl::init(false));
static cl::opt<bool> PrintModuleAfterOptimizations(
- "openmp-opt-print-module", cl::ZeroOrMore,
+ "openmp-opt-print-module-after",
cl::desc("Print the current module after OpenMP optimizations."),
cl::Hidden, cl::init(false));
+static cl::opt<bool> PrintModuleBeforeOptimizations(
+ "openmp-opt-print-module-before",
+ cl::desc("Print the current module before OpenMP optimizations."),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool> AlwaysInlineDeviceFunctions(
- "openmp-opt-inline-device", cl::ZeroOrMore,
+ "openmp-opt-inline-device",
cl::desc("Inline all applicible functions on the device."), cl::Hidden,
cl::init(false));
static cl::opt<bool>
- EnableVerboseRemarks("openmp-opt-verbose-remarks", cl::ZeroOrMore,
+ EnableVerboseRemarks("openmp-opt-verbose-remarks",
cl::desc("Enables more verbose remarks."), cl::Hidden,
cl::init(false));
@@ -129,6 +132,11 @@ static cl::opt<unsigned>
cl::desc("Maximal number of attributor iterations."),
cl::init(256));
+static cl::opt<unsigned>
+ SharedMemoryLimit("openmp-opt-shared-limit", cl::Hidden,
+ cl::desc("Maximum amount of shared memory to use."),
+ cl::init(std::numeric_limits<unsigned>::max()));
+
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPParallelRegionsDeleted,
@@ -493,11 +501,14 @@ struct OMPInformationCache : public InformationCache {
// Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_`
// functions, except if `optnone` is present.
- for (Function &F : M) {
- for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
- if (F.getName().startswith(Prefix) &&
- !F.hasFnAttribute(Attribute::OptimizeNone))
- F.removeFnAttr(Attribute::NoInline);
+ if (isOpenMPDevice(M)) {
+ for (Function &F : M) {
+ for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"})
+ if (F.hasFnAttribute(Attribute::NoInline) &&
+ F.getName().startswith(Prefix) &&
+ !F.hasFnAttribute(Attribute::OptimizeNone))
+ F.removeFnAttr(Attribute::NoInline);
+ }
}
// TODO: We should attach the attributes defined in OMPKinds.def.
@@ -591,7 +602,7 @@ struct KernelInfoState : AbstractState {
/// Abstract State interface
///{
- KernelInfoState() {}
+ KernelInfoState() = default;
KernelInfoState(bool BestState) {
if (!BestState)
indicatePessimisticFixpoint();
@@ -926,8 +937,7 @@ private:
SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
BasicBlock *StartBB = nullptr, *EndBB = nullptr;
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
@@ -966,8 +976,7 @@ private:
const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
ParentBB->getTerminator()->eraseFromParent();
- auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
- BasicBlock &ContinuationIP) {
+ auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
BasicBlock *CGStartBB = CodeGenIP.getBlock();
BasicBlock *CGEndBB =
SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
@@ -1107,10 +1116,8 @@ private:
// callbacks.
SmallVector<Value *, 8> Args;
for (auto *CI : MergableCIs) {
- Value *Callee =
- CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
- FunctionType *FT =
- cast<FunctionType>(Callee->getType()->getPointerElementType());
+ Value *Callee = CI->getArgOperand(CallbackCalleeOperand);
+ FunctionType *FT = OMPInfoCache.OMPBuilder.ParallelTask;
Args.clear();
Args.push_back(OutlinedFn->getArg(0));
Args.push_back(OutlinedFn->getArg(1));
@@ -1458,7 +1465,6 @@ private:
case Intrinsic::nvvm_barrier0_and:
case Intrinsic::nvvm_barrier0_or:
case Intrinsic::nvvm_barrier0_popc:
- case Intrinsic::amdgcn_s_barrier:
return true;
default:
break;
@@ -2120,6 +2126,8 @@ private:
OMPRTL___kmpc_barrier_simple_generic);
ExternalizationRAII ThreadId(OMPInfoCache,
OMPRTL___kmpc_get_hardware_thread_id_in_block);
+ ExternalizationRAII NumThreads(
+ OMPInfoCache, OMPRTL___kmpc_get_hardware_num_threads_in_block);
ExternalizationRAII WarpSize(OMPInfoCache, OMPRTL___kmpc_get_warp_size);
registerAAs(IsModulePass);
@@ -2407,8 +2415,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
auto CallCheck = [&](Instruction &I) {
Optional<Value *> ReplVal = getValueForCall(A, I, ICV);
- if (ReplVal.hasValue() &&
- ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
+ if (ReplVal && ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
HasChanged = ChangeStatus::CHANGED;
return true;
@@ -2468,7 +2475,8 @@ struct AAICVTrackerFunction : public AAICVTracker {
if (ICVTrackingAA.isAssumedTracked()) {
Optional<Value *> URV = ICVTrackingAA.getUniqueReplacementValue(ICV);
- if (!URV || (*URV && AA::isValidAtPosition(**URV, I, OMPInfoCache)))
+ if (!URV || (*URV && AA::isValidAtPosition(AA::ValueAndContext(**URV, I),
+ OMPInfoCache)))
return URV;
}
@@ -2509,13 +2517,13 @@ struct AAICVTrackerFunction : public AAICVTracker {
if (ValuesMap.count(CurrInst)) {
Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
// Unknown value, track new.
- if (!ReplVal.hasValue()) {
+ if (!ReplVal) {
ReplVal = NewReplVal;
break;
}
// If we found a new value, we can't know the icv value anymore.
- if (NewReplVal.hasValue())
+ if (NewReplVal)
if (ReplVal != NewReplVal)
return nullptr;
@@ -2523,11 +2531,11 @@ struct AAICVTrackerFunction : public AAICVTracker {
}
Optional<Value *> NewReplVal = getValueForCall(A, *CurrInst, ICV);
- if (!NewReplVal.hasValue())
+ if (!NewReplVal)
continue;
// Unknown value, track new.
- if (!ReplVal.hasValue()) {
+ if (!ReplVal) {
ReplVal = NewReplVal;
break;
}
@@ -2539,7 +2547,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
}
// If we are in the same BB and we have a value, we are done.
- if (CurrBB == I->getParent() && ReplVal.hasValue())
+ if (CurrBB == I->getParent() && ReplVal)
return ReplVal;
// Go through all predecessors and add terminators for analysis.
@@ -2597,7 +2605,7 @@ struct AAICVTrackerFunctionReturned : AAICVTracker {
ICVTrackingAA.getReplacementValue(ICV, &I, A);
// If we found a second ICV value there is no unique returned value.
- if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
+ if (UniqueICVValue && UniqueICVValue != NewReplVal)
return false;
UniqueICVValue = NewReplVal;
@@ -2648,10 +2656,10 @@ struct AAICVTrackerCallSite : AAICVTracker {
}
ChangeStatus manifest(Attributor &A) override {
- if (!ReplVal.hasValue() || !ReplVal.getValue())
+ if (!ReplVal || !*ReplVal)
return ChangeStatus::UNCHANGED;
- A.changeValueAfterManifest(*getCtxI(), **ReplVal);
+ A.changeAfterManifest(IRPosition::inst(*getCtxI()), **ReplVal);
A.deleteAfterManifest(*getCtxI());
return ChangeStatus::CHANGED;
@@ -2789,7 +2797,7 @@ struct AAExecutionDomainFunction : public AAExecutionDomain {
SmallSetVector<const BasicBlock *, 16> SingleThreadedBBs;
/// Total number of basic blocks in this function.
- long unsigned NumBBs;
+ long unsigned NumBBs = 0;
};
ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
@@ -2952,12 +2960,23 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
}
void initialize(Attributor &A) override {
+ if (DisableOpenMPOptDeglobalization) {
+ indicatePessimisticFixpoint();
+ return;
+ }
+
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
+ Attributor::SimplifictionCallbackTy SCB =
+ [](const IRPosition &, const AbstractAttribute *,
+ bool &) -> Optional<Value *> { return nullptr; };
for (User *U : RFI.Declaration->users())
- if (CallBase *CB = dyn_cast<CallBase>(U))
+ if (CallBase *CB = dyn_cast<CallBase>(U)) {
MallocCalls.insert(CB);
+ A.registerSimplificationCallback(IRPosition::callsite_returned(*CB),
+ SCB);
+ }
findPotentialRemovedFreeCalls(A);
}
@@ -2999,6 +3018,14 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
auto *AllocSize = cast<ConstantInt>(CB->getArgOperand(0));
+ if (AllocSize->getZExtValue() + SharedMemoryUsed > SharedMemoryLimit) {
+ LLVM_DEBUG(dbgs() << TAG << "Cannot replace call " << *CB
+ << " with shared memory."
+ << " Shared memory usage is limited to "
+ << SharedMemoryLimit << " bytes\n");
+ continue;
+ }
+
LLVM_DEBUG(dbgs() << TAG << "Replace globalization call " << *CB
<< " with " << AllocSize->getZExtValue()
<< " bytes of shared memory\n");
@@ -3029,11 +3056,12 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
"HeapToShared on allocation without alignment attribute");
SharedMem->setAlignment(MaybeAlign(Alignment));
- A.changeValueAfterManifest(*CB, *NewBuffer);
+ A.changeAfterManifest(IRPosition::callsite_returned(*CB), *NewBuffer);
A.deleteAfterManifest(*CB);
A.deleteAfterManifest(*FreeCalls.front());
- NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
+ SharedMemoryUsed += AllocSize->getZExtValue();
+ NumBytesMovedToSharedMemory = SharedMemoryUsed;
Changed = ChangeStatus::CHANGED;
}
@@ -3069,6 +3097,8 @@ struct AAHeapToSharedFunction : public AAHeapToShared {
SmallSetVector<CallBase *, 4> MallocCalls;
/// Collection of potentially removed free calls in a function.
SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
+ /// The total amount of shared memory that has been used for HeapToShared.
+ unsigned SharedMemoryUsed = 0;
};
struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
@@ -3137,12 +3167,6 @@ struct AAKernelInfoFunction : AAKernelInfo {
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
Function *Fn = getAnchorScope();
- if (!OMPInfoCache.Kernels.count(Fn))
- return;
-
- // Add itself to the reaching kernel and set IsKernelEntry.
- ReachingKernelEntries.insert(Fn);
- IsKernelEntry = true;
OMPInformationCache::RuntimeFunctionInfo &InitRFI =
OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
@@ -3176,10 +3200,12 @@ struct AAKernelInfoFunction : AAKernelInfo {
Fn);
// Ignore kernels without initializers such as global constructors.
- if (!KernelInitCB || !KernelDeinitCB) {
- indicateOptimisticFixpoint();
+ if (!KernelInitCB || !KernelDeinitCB)
return;
- }
+
+ // Add itself to the reaching kernel and set IsKernelEntry.
+ ReachingKernelEntries.insert(Fn);
+ IsKernelEntry = true;
// For kernels we might need to initialize/finalize the IsSPMD state and
// we need to register a simplification callback so that the Attributor
@@ -3345,8 +3371,17 @@ struct AAKernelInfoFunction : AAKernelInfo {
return false;
}
- // Check if the kernel is already in SPMD mode, if so, return success.
+ // Get the actual kernel, could be the caller of the anchor scope if we have
+ // a debug wrapper.
Function *Kernel = getAnchorScope();
+ if (Kernel->hasLocalLinkage()) {
+ assert(Kernel->hasOneUse() && "Unexpected use of debug kernel wrapper.");
+ auto *CB = cast<CallBase>(Kernel->user_back());
+ Kernel = CB->getCaller();
+ }
+ assert(OMPInfoCache.Kernels.count(Kernel) && "Expected kernel function!");
+
+ // Check if the kernel is already in SPMD mode, if so, return success.
GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
(Kernel->getName() + "_exec_mode").str());
assert(ExecMode && "Kernel without exec mode?");
@@ -3711,9 +3746,9 @@ struct AAKernelInfoFunction : AAKernelInfo {
// __kmpc_get_hardware_num_threads_in_block();
// WarpSize = __kmpc_get_warp_size();
// BlockSize = BlockHwSize - WarpSize;
- // if (InitCB >= BlockSize) return;
- // IsWorkerCheckBB: bool IsWorker = InitCB >= 0;
+ // IsWorkerCheckBB: bool IsWorker = InitCB != -1;
// if (IsWorker) {
+ // if (InitCB >= BlockSize) return;
// SMBeginBB: __kmpc_barrier_simple_generic(...);
// void *WorkFn;
// bool Active = __kmpc_kernel_parallel(&WorkFn);
@@ -3770,6 +3805,13 @@ struct AAKernelInfoFunction : AAKernelInfo {
ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
InitBB->getTerminator()->eraseFromParent();
+ Instruction *IsWorker =
+ ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
+ ConstantInt::get(KernelInitCB->getType(), -1),
+ "thread.is_worker", InitBB);
+ IsWorker->setDebugLoc(DLoc);
+ BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB);
+
Module &M = *Kernel->getParent();
auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
FunctionCallee BlockHwSizeFn =
@@ -3779,29 +3821,22 @@ struct AAKernelInfoFunction : AAKernelInfo {
OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
M, OMPRTL___kmpc_get_warp_size);
CallInst *BlockHwSize =
- CallInst::Create(BlockHwSizeFn, "block.hw_size", InitBB);
+ CallInst::Create(BlockHwSizeFn, "block.hw_size", IsWorkerCheckBB);
OMPInfoCache.setCallingConvention(BlockHwSizeFn, BlockHwSize);
BlockHwSize->setDebugLoc(DLoc);
- CallInst *WarpSize = CallInst::Create(WarpSizeFn, "warp.size", InitBB);
+ CallInst *WarpSize =
+ CallInst::Create(WarpSizeFn, "warp.size", IsWorkerCheckBB);
OMPInfoCache.setCallingConvention(WarpSizeFn, WarpSize);
WarpSize->setDebugLoc(DLoc);
- Instruction *BlockSize =
- BinaryOperator::CreateSub(BlockHwSize, WarpSize, "block.size", InitBB);
+ Instruction *BlockSize = BinaryOperator::CreateSub(
+ BlockHwSize, WarpSize, "block.size", IsWorkerCheckBB);
BlockSize->setDebugLoc(DLoc);
- Instruction *IsMainOrWorker =
- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB,
- BlockSize, "thread.is_main_or_worker", InitBB);
+ Instruction *IsMainOrWorker = ICmpInst::Create(
+ ICmpInst::ICmp, llvm::CmpInst::ICMP_SLT, KernelInitCB, BlockSize,
+ "thread.is_main_or_worker", IsWorkerCheckBB);
IsMainOrWorker->setDebugLoc(DLoc);
- BranchInst::Create(IsWorkerCheckBB, StateMachineFinishedBB, IsMainOrWorker,
- InitBB);
-
- Instruction *IsWorker =
- ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
- ConstantInt::get(KernelInitCB->getType(), -1),
- "thread.is_worker", IsWorkerCheckBB);
- IsWorker->setDebugLoc(DLoc);
- BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker,
- IsWorkerCheckBB);
+ BranchInst::Create(StateMachineBeginBB, StateMachineFinishedBB,
+ IsMainOrWorker, IsWorkerCheckBB);
// Create local storage for the work function pointer.
const DataLayout &DL = M.getDataLayout();
@@ -4241,10 +4276,10 @@ struct AAKernelInfoCallSite : AAKernelInfo {
unsigned ScheduleTypeVal =
ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
switch (OMPScheduleType(ScheduleTypeVal)) {
- case OMPScheduleType::Static:
- case OMPScheduleType::StaticChunked:
- case OMPScheduleType::Distribute:
- case OMPScheduleType::DistributeChunked:
+ case OMPScheduleType::UnorderedStatic:
+ case OMPScheduleType::UnorderedStaticChunked:
+ case OMPScheduleType::OrderedDistribute:
+ case OMPScheduleType::OrderedDistributeChunked:
break;
default:
SPMDCompatibilityTracker.indicatePessimisticFixpoint();
@@ -4390,7 +4425,7 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
std::string Str("simplified value: ");
- if (!SimplifiedValue.hasValue())
+ if (!SimplifiedValue)
return Str + std::string("none");
if (!SimplifiedValue.getValue())
@@ -4420,8 +4455,8 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
IRPosition::callsite_returned(CB),
[&](const IRPosition &IRP, const AbstractAttribute *AA,
bool &UsedAssumedInformation) -> Optional<Value *> {
- assert((isValidState() || (SimplifiedValue.hasValue() &&
- SimplifiedValue.getValue() == nullptr)) &&
+ assert((isValidState() ||
+ (SimplifiedValue && SimplifiedValue.getValue() == nullptr)) &&
"Unexpected invalid state!");
if (!isAtFixpoint()) {
@@ -4461,9 +4496,9 @@ struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
ChangeStatus manifest(Attributor &A) override {
ChangeStatus Changed = ChangeStatus::UNCHANGED;
- if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
+ if (SimplifiedValue && *SimplifiedValue) {
Instruction &I = *getCtxI();
- A.changeValueAfterManifest(I, **SimplifiedValue);
+ A.changeAfterManifest(IRPosition::inst(I), **SimplifiedValue);
A.deleteAfterManifest(I);
CallBase *CB = dyn_cast<CallBase>(&I);
@@ -4549,7 +4584,7 @@ private:
// We have empty reaching kernels, therefore we cannot tell if the
// associated call site can be folded. At this moment, SimplifiedValue
// must be none.
- assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none");
+ assert(!SimplifiedValue && "SimplifiedValue should be none");
}
return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
@@ -4592,7 +4627,7 @@ private:
return indicatePessimisticFixpoint();
if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
- assert(!SimplifiedValue.hasValue() &&
+ assert(!SimplifiedValue &&
"SimplifiedValue should keep none at this point");
return ChangeStatus::UNCHANGED;
}
@@ -4700,18 +4735,23 @@ void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
void OpenMPOpt::registerAAs(bool IsModulePass) {
if (SCC.empty())
-
return;
+
if (IsModulePass) {
// Ensure we create the AAKernelInfo AAs first and without triggering an
// update. This will make sure we register all value simplification
// callbacks before any other AA has the chance to create an AAValueSimplify
// or similar.
- for (Function *Kernel : OMPInfoCache.Kernels)
+ auto CreateKernelInfoCB = [&](Use &, Function &Kernel) {
A.getOrCreateAAFor<AAKernelInfo>(
- IRPosition::function(*Kernel), /* QueryingAA */ nullptr,
+ IRPosition::function(Kernel), /* QueryingAA */ nullptr,
DepClassTy::NONE, /* ForceUpdate */ false,
/* UpdateAfterInit */ false);
+ return false;
+ };
+ OMPInformationCache::RuntimeFunctionInfo &InitRFI =
+ OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
+ InitRFI.foreachUse(SCC, CreateKernelInfoCB);
registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
@@ -4899,6 +4939,9 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
KernelSet Kernels = getDeviceKernels(M);
+ if (PrintModuleBeforeOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt Module Pass:\n" << M);
+
auto IsCalled = [&](Function &F) {
if (Kernels.contains(&F))
return true;
@@ -4958,8 +5001,15 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
- MaxFixpointIterations, OREGetter, DEBUG_TYPE);
+
+ AttributorConfig AC(CGUpdater);
+ AC.DefaultInitializeLiveInternals = false;
+ AC.RewriteSignatures = false;
+ AC.MaxFixpointIterations = MaxFixpointIterations;
+ AC.OREGetter = OREGetter;
+ AC.PassName = DEBUG_TYPE;
+
+ Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(true);
@@ -5001,6 +5051,9 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
Module &M = *C.begin()->getFunction().getParent();
+ if (PrintModuleBeforeOptimizations)
+ LLVM_DEBUG(dbgs() << TAG << "Module before OpenMPOpt CGSCC Pass:\n" << M);
+
KernelSet Kernels = getDeviceKernels(M);
FunctionAnalysisManager &FAM =
@@ -5022,8 +5075,16 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
- MaxFixpointIterations, OREGetter, DEBUG_TYPE);
+
+ AttributorConfig AC(CGUpdater);
+ AC.DefaultInitializeLiveInternals = false;
+ AC.IsModulePass = false;
+ AC.RewriteSignatures = false;
+ AC.MaxFixpointIterations = MaxFixpointIterations;
+ AC.OREGetter = OREGetter;
+ AC.PassName = DEBUG_TYPE;
+
+ Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(false);
@@ -5093,8 +5154,16 @@ struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
unsigned MaxFixpointIterations =
(isOpenMPDevice(M)) ? SetFixpointIterations : 32;
- Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
- MaxFixpointIterations, OREGetter, DEBUG_TYPE);
+
+ AttributorConfig AC(CGUpdater);
+ AC.DefaultInitializeLiveInternals = false;
+ AC.IsModulePass = false;
+ AC.RewriteSignatures = false;
+ AC.MaxFixpointIterations = MaxFixpointIterations;
+ AC.OREGetter = OREGetter;
+ AC.PassName = DEBUG_TYPE;
+
+ Attributor A(Functions, InfoCache, AC);
OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Result = OMPOpt.run(false);