diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp')
-rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp | 114 |
1 files changed, 85 insertions, 29 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp index 011f55efce1d..5fd78eccf732 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp @@ -10,7 +10,6 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/BlockFrequencyInfo.h" @@ -29,6 +28,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/ProfDataUtils.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" #include "llvm/Support/ScaledNumber.h" @@ -180,7 +180,7 @@ private: // consisting of instructions exclusively computed for producing the operands // of the source instruction. void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, - bool ForSinking = false); + Instruction *SI, bool ForSinking = false); // Returns true if the condition of the select is highly predictable. bool isSelectHighlyPredictable(const SelectInst *SI); @@ -199,7 +199,7 @@ private: SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups); // Returns the latency cost of a given instruction. - Optional<uint64_t> computeInstCost(const Instruction *I); + std::optional<uint64_t> computeInstCost(const Instruction *I); // Returns the misprediction cost of a given select when converted to branch. Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost); @@ -242,6 +242,10 @@ bool SelectOptimize::runOnFunction(Function &F) { return false; TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + + if (!TTI->enableSelectOptimize()) + return false; + DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); BPI.reset(new BranchProbabilityInfo(F, *LI)); @@ -375,13 +379,13 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { // false operands. if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) { std::stack<Instruction *> TrueSlice; - getExclBackwardsSlice(TI, TrueSlice, true); + getExclBackwardsSlice(TI, TrueSlice, SI, true); maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); TrueSlices.push_back(TrueSlice); } if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) { std::stack<Instruction *> FalseSlice; - getExclBackwardsSlice(FI, FalseSlice, true); + getExclBackwardsSlice(FI, FalseSlice, SI, true); maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); FalseSlices.push_back(FalseSlice); } @@ -514,12 +518,27 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { } } +static bool isSpecialSelect(SelectInst *SI) { + using namespace llvm::PatternMatch; + + // If the select is a logical-and/logical-or then it is better treated as a + // and/or by the backend. + if (match(SI, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), + m_LogicalOr(m_Value(), m_Value())))) + return true; + + return false; +} + void SelectOptimize::collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups) { BasicBlock::iterator BBIt = BB.begin(); while (BBIt != BB.end()) { Instruction *I = &*BBIt++; if (SelectInst *SI = dyn_cast<SelectInst>(I)) { + if (isSpecialSelect(SI)) + continue; + SelectGroup SIGroup; SIGroup.push_back(SI); while (BBIt != BB.end()) { @@ -554,6 +573,12 @@ void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups, } } +static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, + DiagnosticInfoOptimizationBase &Rem) { + LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n"); + ORE->emit(Rem); +} + void SelectOptimize::findProfitableSIGroupsInnerLoops( const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { NumSelectOptAnalyzed += SIGroups.size(); @@ -588,7 +613,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops( OR << "Profitable to convert to branch (loop analysis). BranchCost=" << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() << ". "; - ORE->emit(OR); + EmitAndPrintRemark(ORE, OR); ++NumSelectConvertedLoop; ProfSIGroups.push_back(ASI); } else { @@ -596,7 +621,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops( ORmiss << "Select is more profitable (loop analysis). BranchCost=" << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() << ". "; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); } } } @@ -604,6 +629,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops( bool SelectOptimize::isConvertToBranchProfitableBase( const SmallVector<SelectInst *, 2> &ASI) { SelectInst *SI = ASI.front(); + LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n"); OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI); OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI); @@ -611,7 +637,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase( if (PSI->isColdBlock(SI->getParent(), BFI.get())) { ++NumSelectColdBB; ORmiss << "Not converted to branch because of cold basic block. "; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); return false; } @@ -619,7 +645,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase( if (SI->getMetadata(LLVMContext::MD_unpredictable)) { ++NumSelectUnPred; ORmiss << "Not converted to branch because of unpredictable branch. "; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); return false; } @@ -628,7 +654,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase( if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { ++NumSelectConvertedHighPred; OR << "Converted to branch because of highly predictable branch. "; - ORE->emit(OR); + EmitAndPrintRemark(ORE, OR); return true; } @@ -637,12 +663,12 @@ bool SelectOptimize::isConvertToBranchProfitableBase( if (hasExpensiveColdOperand(ASI)) { ++NumSelectConvertedExpColdOperand; OR << "Converted to branch because of expensive cold operand."; - ORE->emit(OR); + EmitAndPrintRemark(ORE, OR); return true; } ORmiss << "Not profitable to convert to branch (base heuristic)."; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); return false; } @@ -655,7 +681,7 @@ bool SelectOptimize::hasExpensiveColdOperand( const SmallVector<SelectInst *, 2> &ASI) { bool ColdOperand = false; uint64_t TrueWeight, FalseWeight, TotalWeight; - if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) { + if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) { uint64_t MinWeight = std::min(TrueWeight, FalseWeight); TotalWeight = TrueWeight + FalseWeight; // Is there a path with frequency <ColdOperandThreshold% (default:20%) ? @@ -664,7 +690,7 @@ bool SelectOptimize::hasExpensiveColdOperand( OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front()); ORmiss << "Profile data available but missing branch-weights metadata for " "select instruction. "; - ORE->emit(ORmiss); + EmitAndPrintRemark(ORE, ORmiss); } if (!ColdOperand) return false; @@ -682,7 +708,7 @@ bool SelectOptimize::hasExpensiveColdOperand( } if (ColdI) { std::stack<Instruction *> ColdSlice; - getExclBackwardsSlice(ColdI, ColdSlice); + getExclBackwardsSlice(ColdI, ColdSlice, SI); InstructionCost SliceCost = 0; while (!ColdSlice.empty()) { SliceCost += TTI->getInstructionCost(ColdSlice.top(), @@ -703,6 +729,22 @@ bool SelectOptimize::hasExpensiveColdOperand( return false; } +// Check if it is safe to move LoadI next to the SI. +// Conservatively assume it is safe only if there is no instruction +// modifying memory in-between the load and the select instruction. +static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) { + // Assume loads from different basic blocks are unsafe to move. + if (LoadI->getParent() != SI->getParent()) + return false; + auto It = LoadI->getIterator(); + while (&*It != SI) { + if (It->mayWriteToMemory()) + return false; + It++; + } + return true; +} + // For a given source instruction, collect its backwards dependence slice // consisting of instructions exclusively computed for the purpose of producing // the operands of the source instruction. As an approximation @@ -711,7 +753,7 @@ bool SelectOptimize::hasExpensiveColdOperand( // form an one-use chain that leads to the source instruction. void SelectOptimize::getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, - bool ForSinking) { + Instruction *SI, bool ForSinking) { SmallPtrSet<Instruction *, 2> Visited; std::queue<Instruction *> Worklist; Worklist.push(I); @@ -733,6 +775,13 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I, isa<SelectInst>(II) || isa<PHINode>(II))) continue; + // Avoid sinking loads in order not to skip state-modifying instructions, + // that may alias with the loaded address. + // Only allow sinking of loads within the same basic block that are + // conservatively proven to be safe. + if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI)) + continue; + // Avoid considering instructions with less frequency than the source // instruction (i.e., avoid colder code regions of the dependence slice). if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) @@ -750,7 +799,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I, bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) { uint64_t TrueWeight, FalseWeight; - if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { uint64_t Max = std::max(TrueWeight, FalseWeight); uint64_t Sum = TrueWeight + FalseWeight; if (Sum != 0) { @@ -777,7 +826,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, LoopCost[1].NonPredCost >= LoopCost[1].PredCost) { ORmissL << "No select conversion in the loop due to no reduction of loop's " "critical path. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } @@ -794,7 +843,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, "loop's critical path. Gain=" << Gain[1].toString() << ", RelativeGain=" << RelativeGain.toString() << "%. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } @@ -810,7 +859,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, ORmissL << "No select conversion in the loop due to small gradient gain. " "GradientGain=" << GradientGain.toString() << "%. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } } @@ -818,7 +867,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, else if (Gain[1] < Gain[0]) { ORmissL << "No select conversion in the loop due to negative gradient gain. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } @@ -834,6 +883,8 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L, bool SelectOptimize::computeLoopCosts( const Loop *L, const SelectGroups &SIGroups, DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) { + LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop " + << L->getHeader()->getName() << "\n"); const auto &SIset = getSIset(SIGroups); // Compute instruction and loop-critical-path costs across two iterations for // both predicated and non-predicated version. @@ -867,11 +918,11 @@ bool SelectOptimize::computeLoopCosts( ORmissL << "Invalid instruction cost preventing analysis and " "optimization of the inner-most loop containing this " "instruction. "; - ORE->emit(ORmissL); + EmitAndPrintRemark(ORE, ORmissL); return false; } - IPredCost += Scaled64::get(ILatency.value()); - INonPredCost += Scaled64::get(ILatency.value()); + IPredCost += Scaled64::get(*ILatency); + INonPredCost += Scaled64::get(*ILatency); // For a select that can be converted to branch, // compute its cost as a branch (non-predicated cost). @@ -880,7 +931,7 @@ bool SelectOptimize::computeLoopCosts( // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate if (SIset.contains(&I)) { - auto SI = dyn_cast<SelectInst>(&I); + auto SI = cast<SelectInst>(&I); Scaled64 TrueOpCost = Scaled64::getZero(), FalseOpCost = Scaled64::getZero(); @@ -901,12 +952,17 @@ bool SelectOptimize::computeLoopCosts( INonPredCost = PredictedPathCost + MispredictCost; } + LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/" + << INonPredCost << " for " << I << "\n"); InstCostMap[&I] = {IPredCost, INonPredCost}; MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost); MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost); } } + LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1 + << " MaxCost = " << MaxCost.PredCost << " " + << MaxCost.NonPredCost << "\n"); } return true; } @@ -920,12 +976,12 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) { return SIset; } -Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { +std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) { InstructionCost ICost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); if (auto OC = ICost.getValue()) - return Optional<uint64_t>(*OC); - return Optional<uint64_t>(None); + return std::optional<uint64_t>(*OC); + return std::nullopt; } ScaledNumber<uint64_t> @@ -959,7 +1015,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, const SelectInst *SI) { Scaled64 PredPathCost; uint64_t TrueWeight, FalseWeight; - if (SI->extractProfMetadata(TrueWeight, FalseWeight)) { + if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) { uint64_t SumWeight = TrueWeight + FalseWeight; if (SumWeight != 0) { PredPathCost = TrueCost * Scaled64::get(TrueWeight) + |