diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:53:39 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2023-02-11 12:53:39 +0000 |
| commit | 11edbfca22fe6e8280caeb77832f4dfbb68ed274 (patch) | |
| tree | a3e238dbe004c4ea0f061135a0c1d4430e50c28e /llvm/lib/Transforms | |
| parent | e3b557809604d036af6e00c60f012c2025b59a5e (diff) | |
Diffstat (limited to 'llvm/lib/Transforms')
| -rw-r--r-- | llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 32 | ||||
| -rw-r--r-- | llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 79 |
2 files changed, 89 insertions, 22 deletions
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 001ef55ba472..42158e4e05dd 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -1043,12 +1043,14 @@ struct AAPointerInfoImpl const auto &NoSyncAA = A.getAAFor<AANoSync>( QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); const auto *ExecDomainAA = A.lookupAAFor<AAExecutionDomain>( - IRPosition::function(Scope), &QueryingAA, DepClassTy::OPTIONAL); + IRPosition::function(Scope), &QueryingAA, DepClassTy::NONE); bool AllInSameNoSyncFn = NoSyncAA.isAssumedNoSync(); bool InstIsExecutedByInitialThreadOnly = ExecDomainAA && ExecDomainAA->isExecutedByInitialThreadOnly(I); bool InstIsExecutedInAlignedRegion = ExecDomainAA && ExecDomainAA->isExecutedInAlignedRegion(A, I); + if (InstIsExecutedInAlignedRegion || InstIsExecutedByInitialThreadOnly) + A.recordDependence(*ExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); InformationCache &InfoCache = A.getInfoCache(); bool IsThreadLocalObj = @@ -1063,14 +1065,24 @@ struct AAPointerInfoImpl auto CanIgnoreThreadingForInst = [&](const Instruction &I) -> bool { if (IsThreadLocalObj || AllInSameNoSyncFn) return true; - if (!ExecDomainAA) + const auto *FnExecDomainAA = + I.getFunction() == &Scope + ? ExecDomainAA + : A.lookupAAFor<AAExecutionDomain>( + IRPosition::function(*I.getFunction()), &QueryingAA, + DepClassTy::NONE); + if (!FnExecDomainAA) return false; if (InstIsExecutedInAlignedRegion || - ExecDomainAA->isExecutedInAlignedRegion(A, I)) + FnExecDomainAA->isExecutedInAlignedRegion(A, I)) { + A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); return true; + } if (InstIsExecutedByInitialThreadOnly && - ExecDomainAA->isExecutedByInitialThreadOnly(I)) + FnExecDomainAA->isExecutedByInitialThreadOnly(I)) { + A.recordDependence(*FnExecDomainAA, QueryingAA, DepClassTy::OPTIONAL); return true; + } return false; }; @@ -4161,12 +4173,14 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl { return true; if (auto *LI = dyn_cast<LoadInst>(V)) { if (llvm::all_of(LI->uses(), [&](const Use &U) { - return InfoCache.isOnlyUsedByAssume( - cast<Instruction>(*U.getUser())) || - A.isAssumedDead(U, this, nullptr, UsedAssumedInformation); + auto &UserI = cast<Instruction>(*U.getUser()); + if (InfoCache.isOnlyUsedByAssume(UserI)) { + if (AssumeOnlyInst) + AssumeOnlyInst->insert(&UserI); + return true; + } + return A.isAssumedDead(U, this, nullptr, UsedAssumedInformation); })) { - if (AssumeOnlyInst) - AssumeOnlyInst->insert(LI); return true; } } diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index bee154dab10f..eb499a1aa912 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -188,9 +188,9 @@ struct AAICVTracker; struct OMPInformationCache : public InformationCache { OMPInformationCache(Module &M, AnalysisGetter &AG, BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC, - KernelSet &Kernels) + KernelSet &Kernels, bool OpenMPPostLink) : InformationCache(M, AG, Allocator, CGSCC), OMPBuilder(M), - Kernels(Kernels) { + Kernels(Kernels), OpenMPPostLink(OpenMPPostLink) { OMPBuilder.initialize(); initializeRuntimeFunctions(M); @@ -448,6 +448,24 @@ struct OMPInformationCache : public InformationCache { CI->setCallingConv(Fn->getCallingConv()); } + // Helper function to determine if it's legal to create a call to the runtime + // functions. + bool runtimeFnsAvailable(ArrayRef<RuntimeFunction> Fns) { + // We can always emit calls if we haven't yet linked in the runtime. + if (!OpenMPPostLink) + return true; + + // Once the runtime has been already been linked in we cannot emit calls to + // any undefined functions. + for (RuntimeFunction Fn : Fns) { + RuntimeFunctionInfo &RFI = RFIs[Fn]; + + if (RFI.Declaration && RFI.Declaration->isDeclaration()) + return false; + } + return true; + } + /// Helper to initialize all runtime function information for those defined /// in OpenMPKinds.def. void initializeRuntimeFunctions(Module &M) { @@ -523,6 +541,9 @@ struct OMPInformationCache : public InformationCache { /// Collection of known OpenMP runtime functions.. DenseSet<const Function *> RTLFunctions; + + /// Indicates if we have already linked in the OpenMP device library. + bool OpenMPPostLink = false; }; template <typename Ty, bool InsertInvalidates = true> @@ -1412,7 +1433,10 @@ private: Changed |= WasSplit; return WasSplit; }; - RFI.foreachUse(SCC, SplitMemTransfers); + if (OMPInfoCache.runtimeFnsAvailable( + {OMPRTL___tgt_target_data_begin_mapper_issue, + OMPRTL___tgt_target_data_begin_mapper_wait})) + RFI.foreachUse(SCC, SplitMemTransfers); return Changed; } @@ -2656,7 +2680,9 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { bool isExecutedInAlignedRegion(Attributor &A, const Instruction &I) const override { - if (!isValidState() || isa<CallBase>(I)) + assert(I.getFunction() == getAnchorScope() && + "Instruction is out of scope!"); + if (!isValidState()) return false; const Instruction *CurI; @@ -2667,14 +2693,18 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { auto *CB = dyn_cast<CallBase>(CurI); if (!CB) continue; + if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) { + break; + } const auto &It = CEDMap.find(CB); if (It == CEDMap.end()) continue; - if (!It->getSecond().IsReachedFromAlignedBarrierOnly) + if (!It->getSecond().IsReachingAlignedBarrierOnly) return false; + break; } while ((CurI = CurI->getNextNonDebugInstruction())); - if (!CurI && !BEDMap.lookup(I.getParent()).IsReachedFromAlignedBarrierOnly) + if (!CurI && !BEDMap.lookup(I.getParent()).IsReachingAlignedBarrierOnly) return false; // Check backward until a call or the block beginning is reached. @@ -2683,12 +2713,16 @@ struct AAExecutionDomainFunction : public AAExecutionDomain { auto *CB = dyn_cast<CallBase>(CurI); if (!CB) continue; + if (CB != &I && AlignedBarriers.contains(const_cast<CallBase *>(CB))) { + break; + } const auto &It = CEDMap.find(CB); if (It == CEDMap.end()) continue; if (!AA::isNoSyncInst(A, *CB, *this)) { - if (It->getSecond().IsReachedFromAlignedBarrierOnly) + if (It->getSecond().IsReachedFromAlignedBarrierOnly) { break; + } return false; } @@ -2984,7 +3018,8 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { if (EDAA.getState().isValidState()) { const auto &CalleeED = EDAA.getFunctionExecutionDomain(); ED.IsReachedFromAlignedBarrierOnly = - CalleeED.IsReachedFromAlignedBarrierOnly; + CallED.IsReachedFromAlignedBarrierOnly = + CalleeED.IsReachedFromAlignedBarrierOnly; AlignedBarrierLastInBlock = ED.IsReachedFromAlignedBarrierOnly; if (IsNoSync || !CalleeED.IsReachedFromAlignedBarrierOnly) ED.EncounteredNonLocalSideEffect |= @@ -2999,8 +3034,9 @@ ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { continue; } } - ED.IsReachedFromAlignedBarrierOnly = - IsNoSync && ED.IsReachedFromAlignedBarrierOnly; + if (!IsNoSync) + ED.IsReachedFromAlignedBarrierOnly = + CallED.IsReachedFromAlignedBarrierOnly = false; AlignedBarrierLastInBlock &= ED.IsReachedFromAlignedBarrierOnly; ED.EncounteredNonLocalSideEffect |= !CB->doesNotAccessMemory(); if (!IsNoSync) @@ -3914,6 +3950,12 @@ struct AAKernelInfoFunction : AAKernelInfo { bool changeToSPMDMode(Attributor &A, ChangeStatus &Changed) { auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); + // We cannot change to SPMD mode if the runtime functions aren't availible. + if (!OMPInfoCache.runtimeFnsAvailable( + {OMPRTL___kmpc_get_hardware_thread_id_in_block, + OMPRTL___kmpc_barrier_simple_spmd})) + return false; + if (!SPMDCompatibilityTracker.isAssumed()) { for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) { if (!NonCompatibleI) @@ -4021,6 +4063,13 @@ struct AAKernelInfoFunction : AAKernelInfo { if (!ReachedKnownParallelRegions.isValidState()) return ChangeStatus::UNCHANGED; + auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); + if (!OMPInfoCache.runtimeFnsAvailable( + {OMPRTL___kmpc_get_hardware_num_threads_in_block, + OMPRTL___kmpc_get_warp_size, OMPRTL___kmpc_barrier_simple_generic, + OMPRTL___kmpc_kernel_parallel, OMPRTL___kmpc_kernel_end_parallel})) + return ChangeStatus::UNCHANGED; + const int InitModeArgNo = 1; const int InitUseStateMachineArgNo = 2; @@ -4167,7 +4216,6 @@ struct AAKernelInfoFunction : AAKernelInfo { BranchInst::Create(IsWorkerCheckBB, UserCodeEntryBB, IsWorker, InitBB); Module &M = *Kernel->getParent(); - auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); FunctionCallee BlockHwSizeFn = OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( M, OMPRTL___kmpc_get_hardware_num_threads_in_block); @@ -5343,7 +5391,10 @@ PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { BumpPtrAllocator Allocator; CallGraphUpdater CGUpdater; - OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels); + bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || + LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink; + OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ nullptr, Kernels, + PostLink); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; @@ -5417,9 +5468,11 @@ PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, CallGraphUpdater CGUpdater; CGUpdater.initialize(CG, C, AM, UR); + bool PostLink = LTOPhase == ThinOrFullLTOPhase::FullLTOPostLink || + LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink; SetVector<Function *> Functions(SCC.begin(), SCC.end()); OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, - /*CGSCC*/ &Functions, Kernels); + /*CGSCC*/ &Functions, Kernels, PostLink); unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? SetFixpointIterations : 32; |
