diff options
Diffstat (limited to 'llvm/lib/Transforms/IPO/OpenMPOpt.cpp')
| -rw-r--r-- | llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 233 |
1 files changed, 232 insertions, 1 deletions
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 68f33410c602..2d765fb6ce6d 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -26,19 +26,25 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/CallGraphSCCPass.h" +#include "llvm/Analysis/MemoryLocation.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Frontend/OpenMP/OMPConstants.h" #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" #include "llvm/IR/Assumptions.h" +#include "llvm/IR/Constants.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/IntrinsicsAMDGPU.h" #include "llvm/IR/IntrinsicsNVPTX.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/Attributor.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -98,6 +104,11 @@ static cl::opt<bool> DisableOpenMPOptStateMachineRewrite( cl::desc("Disable OpenMP optimizations that replace the state machine."), cl::Hidden, cl::init(false)); +static cl::opt<bool> DisableOpenMPOptBarrierElimination( + "openmp-opt-disable-barrier-elimination", cl::ZeroOrMore, + cl::desc("Disable OpenMP optimizations that eliminate barriers."), + cl::Hidden, cl::init(false)); + static cl::opt<bool> PrintModuleAfterOptimizations( "openmp-opt-print-module", cl::ZeroOrMore, cl::desc("Print the current module after OpenMP optimizations."), @@ -147,6 +158,7 @@ STATISTIC(NumOpenMPParallelRegionsMerged, "Number of OpenMP parallel regions merged"); STATISTIC(NumBytesMovedToSharedMemory, "Amount of memory pushed to shared memory"); +STATISTIC(NumBarriersEliminated, "Number of redundant barriers eliminated"); #if !defined(NDEBUG) static constexpr auto TAG = "[" DEBUG_TYPE "]"; @@ -458,7 +470,6 @@ struct OMPInformationCache : public InformationCache { RTLFunctions.insert(F); \ if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ RuntimeFunctionIDMap[F] = _Enum; \ - F->removeFnAttr(Attribute::NoInline); \ auto &RFI = RFIs[_Enum]; \ RFI.Kind = _Enum; \ RFI.Name = _Name; \ @@ -480,6 +491,15 @@ struct OMPInformationCache : public InformationCache { } #include "llvm/Frontend/OpenMP/OMPKinds.def" + // Remove the `noinline` attribute from `__kmpc`, `_OMP::` and `omp_` + // functions, except if `optnone` is present. + for (Function &F : M) { + for (StringRef Prefix : {"__kmpc", "_ZN4_OMP", "omp_"}) + if (F.getName().startswith(Prefix) && + !F.hasFnAttribute(Attribute::OptimizeNone)) + F.removeFnAttr(Attribute::NoInline); + } + // TODO: We should attach the attributes defined in OMPKinds.def. } @@ -787,6 +807,8 @@ struct OpenMPOpt { if (remarksEnabled()) analysisGlobalization(); + + Changed |= eliminateBarriers(); } else { if (PrintICVValues) printICVs(); @@ -809,6 +831,8 @@ struct OpenMPOpt { Changed = true; } } + + Changed |= eliminateBarriers(); } return Changed; @@ -1378,6 +1402,213 @@ private: return Changed; } + /// Eliminates redundant, aligned barriers in OpenMP offloaded kernels. + /// TODO: Make this an AA and expand it to work across blocks and functions. + bool eliminateBarriers() { + bool Changed = false; + + if (DisableOpenMPOptBarrierElimination) + return /*Changed=*/false; + + if (OMPInfoCache.Kernels.empty()) + return /*Changed=*/false; + + enum ImplicitBarrierType { IBT_ENTRY, IBT_EXIT }; + + class BarrierInfo { + Instruction *I; + enum ImplicitBarrierType Type; + + public: + BarrierInfo(enum ImplicitBarrierType Type) : I(nullptr), Type(Type) {} + BarrierInfo(Instruction &I) : I(&I) {} + + bool isImplicit() { return !I; } + + bool isImplicitEntry() { return isImplicit() && Type == IBT_ENTRY; } + + bool isImplicitExit() { return isImplicit() && Type == IBT_EXIT; } + + Instruction *getInstruction() { return I; } + }; + + for (Function *Kernel : OMPInfoCache.Kernels) { + for (BasicBlock &BB : *Kernel) { + SmallVector<BarrierInfo, 8> BarriersInBlock; + SmallPtrSet<Instruction *, 8> BarriersToBeDeleted; + + // Add the kernel entry implicit barrier. + if (&Kernel->getEntryBlock() == &BB) + BarriersInBlock.push_back(IBT_ENTRY); + + // Find implicit and explicit aligned barriers in the same basic block. + for (Instruction &I : BB) { + if (isa<ReturnInst>(I)) { + // Add the implicit barrier when exiting the kernel. + BarriersInBlock.push_back(IBT_EXIT); + continue; + } + CallBase *CB = dyn_cast<CallBase>(&I); + if (!CB) + continue; + + auto IsAlignBarrierCB = [&](CallBase &CB) { + switch (CB.getIntrinsicID()) { + case Intrinsic::nvvm_barrier0: + case Intrinsic::nvvm_barrier0_and: + case Intrinsic::nvvm_barrier0_or: + case Intrinsic::nvvm_barrier0_popc: + case Intrinsic::amdgcn_s_barrier: + return true; + default: + break; + } + return hasAssumption(CB, + KnownAssumptionString("ompx_aligned_barrier")); + }; + + if (IsAlignBarrierCB(*CB)) { + // Add an explicit aligned barrier. + BarriersInBlock.push_back(I); + } + } + + if (BarriersInBlock.size() <= 1) + continue; + + // A barrier in a barrier pair is removeable if all instructions + // between the barriers in the pair are side-effect free modulo the + // barrier operation. + auto IsBarrierRemoveable = [&Kernel](BarrierInfo *StartBI, + BarrierInfo *EndBI) { + assert( + !StartBI->isImplicitExit() && + "Expected start barrier to be other than a kernel exit barrier"); + assert( + !EndBI->isImplicitEntry() && + "Expected end barrier to be other than a kernel entry barrier"); + // If StarBI instructions is null then this the implicit + // kernel entry barrier, so iterate from the first instruction in the + // entry block. + Instruction *I = (StartBI->isImplicitEntry()) + ? &Kernel->getEntryBlock().front() + : StartBI->getInstruction()->getNextNode(); + assert(I && "Expected non-null start instruction"); + Instruction *E = (EndBI->isImplicitExit()) + ? I->getParent()->getTerminator() + : EndBI->getInstruction(); + assert(E && "Expected non-null end instruction"); + + for (; I != E; I = I->getNextNode()) { + if (!I->mayHaveSideEffects() && !I->mayReadFromMemory()) + continue; + + auto IsPotentiallyAffectedByBarrier = + [](Optional<MemoryLocation> Loc) { + const Value *Obj = (Loc && Loc->Ptr) + ? getUnderlyingObject(Loc->Ptr) + : nullptr; + if (!Obj) { + LLVM_DEBUG( + dbgs() + << "Access to unknown location requires barriers\n"); + return true; + } + if (isa<UndefValue>(Obj)) + return false; + if (isa<AllocaInst>(Obj)) + return false; + if (auto *GV = dyn_cast<GlobalVariable>(Obj)) { + if (GV->isConstant()) + return false; + if (GV->isThreadLocal()) + return false; + if (GV->getAddressSpace() == (int)AddressSpace::Local) + return false; + if (GV->getAddressSpace() == (int)AddressSpace::Constant) + return false; + } + LLVM_DEBUG(dbgs() << "Access to '" << *Obj + << "' requires barriers\n"); + return true; + }; + + if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) { + Optional<MemoryLocation> Loc = MemoryLocation::getForDest(MI); + if (IsPotentiallyAffectedByBarrier(Loc)) + return false; + if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I)) { + Optional<MemoryLocation> Loc = + MemoryLocation::getForSource(MTI); + if (IsPotentiallyAffectedByBarrier(Loc)) + return false; + } + continue; + } + + if (auto *LI = dyn_cast<LoadInst>(I)) + if (LI->hasMetadata(LLVMContext::MD_invariant_load)) + continue; + + Optional<MemoryLocation> Loc = MemoryLocation::getOrNone(I); + if (IsPotentiallyAffectedByBarrier(Loc)) + return false; + } + + return true; + }; + + // Iterate barrier pairs and remove an explicit barrier if analysis + // deems it removeable. + for (auto *It = BarriersInBlock.begin(), + *End = BarriersInBlock.end() - 1; + It != End; ++It) { + + BarrierInfo *StartBI = It; + BarrierInfo *EndBI = (It + 1); + + // Cannot remove when both are implicit barriers, continue. + if (StartBI->isImplicit() && EndBI->isImplicit()) + continue; + + if (!IsBarrierRemoveable(StartBI, EndBI)) + continue; + + assert(!(StartBI->isImplicit() && EndBI->isImplicit()) && + "Expected at least one explicit barrier to remove."); + + // Remove an explicit barrier, check first, then second. + if (!StartBI->isImplicit()) { + LLVM_DEBUG(dbgs() << "Remove start barrier " + << *StartBI->getInstruction() << "\n"); + BarriersToBeDeleted.insert(StartBI->getInstruction()); + } else { + LLVM_DEBUG(dbgs() << "Remove end barrier " + << *EndBI->getInstruction() << "\n"); + BarriersToBeDeleted.insert(EndBI->getInstruction()); + } + } + + if (BarriersToBeDeleted.empty()) + continue; + + Changed = true; + for (Instruction *I : BarriersToBeDeleted) { + ++NumBarriersEliminated; + auto Remark = [&](OptimizationRemark OR) { + return OR << "Redundant barrier eliminated."; + }; + + if (EnableVerboseRemarks) + emitRemark<OptimizationRemark>(I, "OMP190", Remark); + I->eraseFromParent(); + } + } + } + + return Changed; + } + void analysisGlobalization() { auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; |
