aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp114
1 files changed, 85 insertions, 29 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
index 011f55efce1d..5fd78eccf732 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp
@@ -10,7 +10,6 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -29,6 +28,7 @@
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instruction.h"
+#include "llvm/IR/ProfDataUtils.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/ScaledNumber.h"
@@ -180,7 +180,7 @@ private:
// consisting of instructions exclusively computed for producing the operands
// of the source instruction.
void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
- bool ForSinking = false);
+ Instruction *SI, bool ForSinking = false);
// Returns true if the condition of the select is highly predictable.
bool isSelectHighlyPredictable(const SelectInst *SI);
@@ -199,7 +199,7 @@ private:
SmallPtrSet<const Instruction *, 2> getSIset(const SelectGroups &SIGroups);
// Returns the latency cost of a given instruction.
- Optional<uint64_t> computeInstCost(const Instruction *I);
+ std::optional<uint64_t> computeInstCost(const Instruction *I);
// Returns the misprediction cost of a given select when converted to branch.
Scaled64 getMispredictionCost(const SelectInst *SI, const Scaled64 CondCost);
@@ -242,6 +242,10 @@ bool SelectOptimize::runOnFunction(Function &F) {
return false;
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+ if (!TTI->enableSelectOptimize())
+ return false;
+
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
@@ -375,13 +379,13 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
// false operands.
if (auto *TI = dyn_cast<Instruction>(SI->getTrueValue())) {
std::stack<Instruction *> TrueSlice;
- getExclBackwardsSlice(TI, TrueSlice, true);
+ getExclBackwardsSlice(TI, TrueSlice, SI, true);
maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
TrueSlices.push_back(TrueSlice);
}
if (auto *FI = dyn_cast<Instruction>(SI->getFalseValue())) {
std::stack<Instruction *> FalseSlice;
- getExclBackwardsSlice(FI, FalseSlice, true);
+ getExclBackwardsSlice(FI, FalseSlice, SI, true);
maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
FalseSlices.push_back(FalseSlice);
}
@@ -514,12 +518,27 @@ void SelectOptimize::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
}
}
+static bool isSpecialSelect(SelectInst *SI) {
+ using namespace llvm::PatternMatch;
+
+ // If the select is a logical-and/logical-or then it is better treated as a
+ // and/or by the backend.
+ if (match(SI, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()),
+ m_LogicalOr(m_Value(), m_Value()))))
+ return true;
+
+ return false;
+}
+
void SelectOptimize::collectSelectGroups(BasicBlock &BB,
SelectGroups &SIGroups) {
BasicBlock::iterator BBIt = BB.begin();
while (BBIt != BB.end()) {
Instruction *I = &*BBIt++;
if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
+ if (isSpecialSelect(SI))
+ continue;
+
SelectGroup SIGroup;
SIGroup.push_back(SI);
while (BBIt != BB.end()) {
@@ -554,6 +573,12 @@ void SelectOptimize::findProfitableSIGroupsBase(SelectGroups &SIGroups,
}
}
+static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
+ DiagnosticInfoOptimizationBase &Rem) {
+ LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
+ ORE->emit(Rem);
+}
+
void SelectOptimize::findProfitableSIGroupsInnerLoops(
const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
NumSelectOptAnalyzed += SIGroups.size();
@@ -588,7 +613,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
OR << "Profitable to convert to branch (loop analysis). BranchCost="
<< BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
<< ". ";
- ORE->emit(OR);
+ EmitAndPrintRemark(ORE, OR);
++NumSelectConvertedLoop;
ProfSIGroups.push_back(ASI);
} else {
@@ -596,7 +621,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
ORmiss << "Select is more profitable (loop analysis). BranchCost="
<< BranchCost.toString()
<< ", SelectCost=" << SelectCost.toString() << ". ";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
}
}
}
@@ -604,6 +629,7 @@ void SelectOptimize::findProfitableSIGroupsInnerLoops(
bool SelectOptimize::isConvertToBranchProfitableBase(
const SmallVector<SelectInst *, 2> &ASI) {
SelectInst *SI = ASI.front();
+ LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI << "\n");
OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI);
OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI);
@@ -611,7 +637,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
if (PSI->isColdBlock(SI->getParent(), BFI.get())) {
++NumSelectColdBB;
ORmiss << "Not converted to branch because of cold basic block. ";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
return false;
}
@@ -619,7 +645,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
if (SI->getMetadata(LLVMContext::MD_unpredictable)) {
++NumSelectUnPred;
ORmiss << "Not converted to branch because of unpredictable branch. ";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
return false;
}
@@ -628,7 +654,7 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
++NumSelectConvertedHighPred;
OR << "Converted to branch because of highly predictable branch. ";
- ORE->emit(OR);
+ EmitAndPrintRemark(ORE, OR);
return true;
}
@@ -637,12 +663,12 @@ bool SelectOptimize::isConvertToBranchProfitableBase(
if (hasExpensiveColdOperand(ASI)) {
++NumSelectConvertedExpColdOperand;
OR << "Converted to branch because of expensive cold operand.";
- ORE->emit(OR);
+ EmitAndPrintRemark(ORE, OR);
return true;
}
ORmiss << "Not profitable to convert to branch (base heuristic).";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
return false;
}
@@ -655,7 +681,7 @@ bool SelectOptimize::hasExpensiveColdOperand(
const SmallVector<SelectInst *, 2> &ASI) {
bool ColdOperand = false;
uint64_t TrueWeight, FalseWeight, TotalWeight;
- if (ASI.front()->extractProfMetadata(TrueWeight, FalseWeight)) {
+ if (extractBranchWeights(*ASI.front(), TrueWeight, FalseWeight)) {
uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
TotalWeight = TrueWeight + FalseWeight;
// Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
@@ -664,7 +690,7 @@ bool SelectOptimize::hasExpensiveColdOperand(
OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", ASI.front());
ORmiss << "Profile data available but missing branch-weights metadata for "
"select instruction. ";
- ORE->emit(ORmiss);
+ EmitAndPrintRemark(ORE, ORmiss);
}
if (!ColdOperand)
return false;
@@ -682,7 +708,7 @@ bool SelectOptimize::hasExpensiveColdOperand(
}
if (ColdI) {
std::stack<Instruction *> ColdSlice;
- getExclBackwardsSlice(ColdI, ColdSlice);
+ getExclBackwardsSlice(ColdI, ColdSlice, SI);
InstructionCost SliceCost = 0;
while (!ColdSlice.empty()) {
SliceCost += TTI->getInstructionCost(ColdSlice.top(),
@@ -703,6 +729,22 @@ bool SelectOptimize::hasExpensiveColdOperand(
return false;
}
+// Check if it is safe to move LoadI next to the SI.
+// Conservatively assume it is safe only if there is no instruction
+// modifying memory in-between the load and the select instruction.
+static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
+ // Assume loads from different basic blocks are unsafe to move.
+ if (LoadI->getParent() != SI->getParent())
+ return false;
+ auto It = LoadI->getIterator();
+ while (&*It != SI) {
+ if (It->mayWriteToMemory())
+ return false;
+ It++;
+ }
+ return true;
+}
+
// For a given source instruction, collect its backwards dependence slice
// consisting of instructions exclusively computed for the purpose of producing
// the operands of the source instruction. As an approximation
@@ -711,7 +753,7 @@ bool SelectOptimize::hasExpensiveColdOperand(
// form an one-use chain that leads to the source instruction.
void SelectOptimize::getExclBackwardsSlice(Instruction *I,
std::stack<Instruction *> &Slice,
- bool ForSinking) {
+ Instruction *SI, bool ForSinking) {
SmallPtrSet<Instruction *, 2> Visited;
std::queue<Instruction *> Worklist;
Worklist.push(I);
@@ -733,6 +775,13 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I,
isa<SelectInst>(II) || isa<PHINode>(II)))
continue;
+ // Avoid sinking loads in order not to skip state-modifying instructions,
+ // that may alias with the loaded address.
+ // Only allow sinking of loads within the same basic block that are
+ // conservatively proven to be safe.
+ if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
+ continue;
+
// Avoid considering instructions with less frequency than the source
// instruction (i.e., avoid colder code regions of the dependence slice).
if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
@@ -750,7 +799,7 @@ void SelectOptimize::getExclBackwardsSlice(Instruction *I,
bool SelectOptimize::isSelectHighlyPredictable(const SelectInst *SI) {
uint64_t TrueWeight, FalseWeight;
- if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
uint64_t Max = std::max(TrueWeight, FalseWeight);
uint64_t Sum = TrueWeight + FalseWeight;
if (Sum != 0) {
@@ -777,7 +826,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
ORmissL << "No select conversion in the loop due to no reduction of loop's "
"critical path. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
@@ -794,7 +843,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
"loop's critical path. Gain="
<< Gain[1].toString()
<< ", RelativeGain=" << RelativeGain.toString() << "%. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
@@ -810,7 +859,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
ORmissL << "No select conversion in the loop due to small gradient gain. "
"GradientGain="
<< GradientGain.toString() << "%. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
}
@@ -818,7 +867,7 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
else if (Gain[1] < Gain[0]) {
ORmissL
<< "No select conversion in the loop due to negative gradient gain. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
@@ -834,6 +883,8 @@ bool SelectOptimize::checkLoopHeuristics(const Loop *L,
bool SelectOptimize::computeLoopCosts(
const Loop *L, const SelectGroups &SIGroups,
DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
+ LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
+ << L->getHeader()->getName() << "\n");
const auto &SIset = getSIset(SIGroups);
// Compute instruction and loop-critical-path costs across two iterations for
// both predicated and non-predicated version.
@@ -867,11 +918,11 @@ bool SelectOptimize::computeLoopCosts(
ORmissL << "Invalid instruction cost preventing analysis and "
"optimization of the inner-most loop containing this "
"instruction. ";
- ORE->emit(ORmissL);
+ EmitAndPrintRemark(ORE, ORmissL);
return false;
}
- IPredCost += Scaled64::get(ILatency.value());
- INonPredCost += Scaled64::get(ILatency.value());
+ IPredCost += Scaled64::get(*ILatency);
+ INonPredCost += Scaled64::get(*ILatency);
// For a select that can be converted to branch,
// compute its cost as a branch (non-predicated cost).
@@ -880,7 +931,7 @@ bool SelectOptimize::computeLoopCosts(
// PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
// MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
if (SIset.contains(&I)) {
- auto SI = dyn_cast<SelectInst>(&I);
+ auto SI = cast<SelectInst>(&I);
Scaled64 TrueOpCost = Scaled64::getZero(),
FalseOpCost = Scaled64::getZero();
@@ -901,12 +952,17 @@ bool SelectOptimize::computeLoopCosts(
INonPredCost = PredictedPathCost + MispredictCost;
}
+ LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
+ << INonPredCost << " for " << I << "\n");
InstCostMap[&I] = {IPredCost, INonPredCost};
MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
}
}
+ LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
+ << " MaxCost = " << MaxCost.PredCost << " "
+ << MaxCost.NonPredCost << "\n");
}
return true;
}
@@ -920,12 +976,12 @@ SelectOptimize::getSIset(const SelectGroups &SIGroups) {
return SIset;
}
-Optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
+std::optional<uint64_t> SelectOptimize::computeInstCost(const Instruction *I) {
InstructionCost ICost =
TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
if (auto OC = ICost.getValue())
- return Optional<uint64_t>(*OC);
- return Optional<uint64_t>(None);
+ return std::optional<uint64_t>(*OC);
+ return std::nullopt;
}
ScaledNumber<uint64_t>
@@ -959,7 +1015,7 @@ SelectOptimize::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
const SelectInst *SI) {
Scaled64 PredPathCost;
uint64_t TrueWeight, FalseWeight;
- if (SI->extractProfMetadata(TrueWeight, FalseWeight)) {
+ if (extractBranchWeights(*SI, TrueWeight, FalseWeight)) {
uint64_t SumWeight = TrueWeight + FalseWeight;
if (SumWeight != 0) {
PredPathCost = TrueCost * Scaled64::get(TrueWeight) +