diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp')
| -rw-r--r-- | contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp | 2104 |
1 files changed, 0 insertions, 2104 deletions
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp deleted file mode 100644 index 3f4f9bc7145d..000000000000 --- a/contrib/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp +++ /dev/null @@ -1,2104 +0,0 @@ -//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This pass merges conditional blocks of code and reduces the number of -// conditional branches in the hot paths based on profiles. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/Analysis/BlockFrequencyInfo.h" -#include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/ProfileSummaryInfo.h" -#include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionIterator.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/CFG.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/Support/BranchProbability.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Transforms/Utils.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/ValueMapper.h" - -#include <set> -#include <sstream> - -using namespace llvm; - -#define DEBUG_TYPE "chr" - -#define CHR_DEBUG(X) LLVM_DEBUG(X) - -static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, - cl::desc("Apply CHR for all functions")); - -static cl::opt<double> CHRBiasThreshold( - "chr-bias-threshold", cl::init(0.99), cl::Hidden, - cl::desc("CHR considers a branch bias greater than this ratio as biased")); - -static cl::opt<unsigned> CHRMergeThreshold( - "chr-merge-threshold", cl::init(2), cl::Hidden, - cl::desc("CHR merges a group of N branches/selects where N >= this value")); - -static cl::opt<std::string> CHRModuleList( - "chr-module-list", cl::init(""), cl::Hidden, - cl::desc("Specify file to retrieve the list of modules to apply CHR to")); - -static cl::opt<std::string> CHRFunctionList( - "chr-function-list", cl::init(""), cl::Hidden, - cl::desc("Specify file to retrieve the list of functions to apply CHR to")); - -static StringSet<> CHRModules; -static StringSet<> CHRFunctions; - -static void parseCHRFilterFiles() { - if (!CHRModuleList.empty()) { - auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); - if (!FileOrErr) { - errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; - std::exit(1); - } - StringRef Buf = FileOrErr->get()->getBuffer(); - SmallVector<StringRef, 0> Lines; - Buf.split(Lines, '\n'); - for (StringRef Line : Lines) { - Line = Line.trim(); - if (!Line.empty()) - CHRModules.insert(Line); - } - } - if (!CHRFunctionList.empty()) { - auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); - if (!FileOrErr) { - errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; - std::exit(1); - } - StringRef Buf = FileOrErr->get()->getBuffer(); - SmallVector<StringRef, 0> Lines; - Buf.split(Lines, '\n'); - for (StringRef Line : Lines) { - Line = Line.trim(); - if (!Line.empty()) - CHRFunctions.insert(Line); - } - } -} - -namespace { -class ControlHeightReductionLegacyPass : public FunctionPass { -public: - static char ID; - - ControlHeightReductionLegacyPass() : FunctionPass(ID) { - initializeControlHeightReductionLegacyPassPass( - *PassRegistry::getPassRegistry()); - parseCHRFilterFiles(); - } - - bool runOnFunction(Function &F) override; - void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequired<BlockFrequencyInfoWrapperPass>(); - AU.addRequired<DominatorTreeWrapperPass>(); - AU.addRequired<ProfileSummaryInfoWrapperPass>(); - AU.addRequired<RegionInfoPass>(); - AU.addPreserved<GlobalsAAWrapperPass>(); - } -}; -} // end anonymous namespace - -char ControlHeightReductionLegacyPass::ID = 0; - -INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, - "chr", - "Reduce control height in the hot paths", - false, false) -INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) -INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, - "chr", - "Reduce control height in the hot paths", - false, false) - -FunctionPass *llvm::createControlHeightReductionLegacyPass() { - return new ControlHeightReductionLegacyPass(); -} - -namespace { - -struct CHRStats { - CHRStats() : NumBranches(0), NumBranchesDelta(0), - WeightedNumBranchesDelta(0) {} - void print(raw_ostream &OS) const { - OS << "CHRStats: NumBranches " << NumBranches - << " NumBranchesDelta " << NumBranchesDelta - << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; - } - uint64_t NumBranches; // The original number of conditional branches / - // selects - uint64_t NumBranchesDelta; // The decrease of the number of conditional - // branches / selects in the hot paths due to CHR. - uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile - // count at the scope entry. -}; - -// RegInfo - some properties of a Region. -struct RegInfo { - RegInfo() : R(nullptr), HasBranch(false) {} - RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} - Region *R; - bool HasBranch; - SmallVector<SelectInst *, 8> Selects; -}; - -typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; - -// CHRScope - a sequence of regions to CHR together. It corresponds to a -// sequence of conditional blocks. It can have subscopes which correspond to -// nested conditional blocks. Nested CHRScopes form a tree. -class CHRScope { - public: - CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { - assert(RI.R && "Null RegionIn"); - RegInfos.push_back(RI); - } - - Region *getParentRegion() { - assert(RegInfos.size() > 0 && "Empty CHRScope"); - Region *Parent = RegInfos[0].R->getParent(); - assert(Parent && "Unexpected to call this on the top-level region"); - return Parent; - } - - BasicBlock *getEntryBlock() { - assert(RegInfos.size() > 0 && "Empty CHRScope"); - return RegInfos.front().R->getEntry(); - } - - BasicBlock *getExitBlock() { - assert(RegInfos.size() > 0 && "Empty CHRScope"); - return RegInfos.back().R->getExit(); - } - - bool appendable(CHRScope *Next) { - // The next scope is appendable only if this scope is directly connected to - // it (which implies it post-dominates this scope) and this scope dominates - // it (no edge to the next scope outside this scope). - BasicBlock *NextEntry = Next->getEntryBlock(); - if (getExitBlock() != NextEntry) - // Not directly connected. - return false; - Region *LastRegion = RegInfos.back().R; - for (BasicBlock *Pred : predecessors(NextEntry)) - if (!LastRegion->contains(Pred)) - // There's an edge going into the entry of the next scope from outside - // of this scope. - return false; - return true; - } - - void append(CHRScope *Next) { - assert(RegInfos.size() > 0 && "Empty CHRScope"); - assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); - assert(getParentRegion() == Next->getParentRegion() && - "Must be siblings"); - assert(getExitBlock() == Next->getEntryBlock() && - "Must be adjacent"); - for (RegInfo &RI : Next->RegInfos) - RegInfos.push_back(RI); - for (CHRScope *Sub : Next->Subs) - Subs.push_back(Sub); - } - - void addSub(CHRScope *SubIn) { -#ifndef NDEBUG - bool IsChild = false; - for (RegInfo &RI : RegInfos) - if (RI.R == SubIn->getParentRegion()) { - IsChild = true; - break; - } - assert(IsChild && "Must be a child"); -#endif - Subs.push_back(SubIn); - } - - // Split this scope at the boundary region into two, which will belong to the - // tail and returns the tail. - CHRScope *split(Region *Boundary) { - assert(Boundary && "Boundary null"); - assert(RegInfos.begin()->R != Boundary && - "Can't be split at beginning"); - auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), - [&Boundary](const RegInfo& RI) { - return Boundary == RI.R; - }); - if (BoundaryIt == RegInfos.end()) - return nullptr; - SmallVector<RegInfo, 8> TailRegInfos; - SmallVector<CHRScope *, 8> TailSubs; - TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); - RegInfos.resize(BoundaryIt - RegInfos.begin()); - DenseSet<Region *> TailRegionSet; - for (RegInfo &RI : TailRegInfos) - TailRegionSet.insert(RI.R); - for (auto It = Subs.begin(); It != Subs.end(); ) { - CHRScope *Sub = *It; - assert(Sub && "null Sub"); - Region *Parent = Sub->getParentRegion(); - if (TailRegionSet.count(Parent)) { - TailSubs.push_back(Sub); - It = Subs.erase(It); - } else { - assert(std::find_if(RegInfos.begin(), RegInfos.end(), - [&Parent](const RegInfo& RI) { - return Parent == RI.R; - }) != RegInfos.end() && - "Must be in head"); - ++It; - } - } - assert(HoistStopMap.empty() && "MapHoistStops must be empty"); - return new CHRScope(TailRegInfos, TailSubs); - } - - bool contains(Instruction *I) const { - BasicBlock *Parent = I->getParent(); - for (const RegInfo &RI : RegInfos) - if (RI.R->contains(Parent)) - return true; - return false; - } - - void print(raw_ostream &OS) const; - - SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope - SmallVector<CHRScope *, 8> Subs; // Subscopes. - - // The instruction at which to insert the CHR conditional branch (and hoist - // the dependent condition values). - Instruction *BranchInsertPoint; - - // True-biased and false-biased regions (conditional blocks), - // respectively. Used only for the outermost scope and includes regions in - // subscopes. The rest are unbiased. - DenseSet<Region *> TrueBiasedRegions; - DenseSet<Region *> FalseBiasedRegions; - // Among the biased regions, the regions that get CHRed. - SmallVector<RegInfo, 8> CHRRegions; - - // True-biased and false-biased selects, respectively. Used only for the - // outermost scope and includes ones in subscopes. - DenseSet<SelectInst *> TrueBiasedSelects; - DenseSet<SelectInst *> FalseBiasedSelects; - - // Map from one of the above regions to the instructions to stop - // hoisting instructions at through use-def chains. - HoistStopMapTy HoistStopMap; - - private: - CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, - SmallVector<CHRScope *, 8> &SubsIn) - : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} -}; - -class CHR { - public: - CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, - ProfileSummaryInfo &PSIin, RegionInfo &RIin, - OptimizationRemarkEmitter &OREin) - : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} - - ~CHR() { - for (CHRScope *Scope : Scopes) { - delete Scope; - } - } - - bool run(); - - private: - // See the comments in CHR::run() for the high level flow of the algorithm and - // what the following functions do. - - void findScopes(SmallVectorImpl<CHRScope *> &Output) { - Region *R = RI.getTopLevelRegion(); - CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); - if (Scope) { - Output.push_back(Scope); - } - } - CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, - SmallVectorImpl<CHRScope *> &Scopes); - CHRScope *findScope(Region *R); - void checkScopeHoistable(CHRScope *Scope); - - void splitScopes(SmallVectorImpl<CHRScope *> &Input, - SmallVectorImpl<CHRScope *> &Output); - SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, - CHRScope *Outer, - DenseSet<Value *> *OuterConditionValues, - Instruction *OuterInsertPoint, - SmallVectorImpl<CHRScope *> &Output, - DenseSet<Instruction *> &Unhoistables); - - void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); - void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); - - void filterScopes(SmallVectorImpl<CHRScope *> &Input, - SmallVectorImpl<CHRScope *> &Output); - - void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, - SmallVectorImpl<CHRScope *> &Output); - void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); - - void sortScopes(SmallVectorImpl<CHRScope *> &Input, - SmallVectorImpl<CHRScope *> &Output); - - void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); - void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); - void cloneScopeBlocks(CHRScope *Scope, - BasicBlock *PreEntryBlock, - BasicBlock *ExitBlock, - Region *LastRegion, - ValueToValueMapTy &VMap); - BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, - BasicBlock *EntryBlock, - BasicBlock *NewEntryBlock, - ValueToValueMapTy &VMap); - void fixupBranchesAndSelects(CHRScope *Scope, - BasicBlock *PreEntryBlock, - BranchInst *MergedBR, - uint64_t ProfileCount); - void fixupBranch(Region *R, - CHRScope *Scope, - IRBuilder<> &IRB, - Value *&MergedCondition, BranchProbability &CHRBranchBias); - void fixupSelect(SelectInst* SI, - CHRScope *Scope, - IRBuilder<> &IRB, - Value *&MergedCondition, BranchProbability &CHRBranchBias); - void addToMergedCondition(bool IsTrueBiased, Value *Cond, - Instruction *BranchOrSelect, - CHRScope *Scope, - IRBuilder<> &IRB, - Value *&MergedCondition); - - Function &F; - BlockFrequencyInfo &BFI; - DominatorTree &DT; - ProfileSummaryInfo &PSI; - RegionInfo &RI; - OptimizationRemarkEmitter &ORE; - CHRStats Stats; - - // All the true-biased regions in the function - DenseSet<Region *> TrueBiasedRegionsGlobal; - // All the false-biased regions in the function - DenseSet<Region *> FalseBiasedRegionsGlobal; - // All the true-biased selects in the function - DenseSet<SelectInst *> TrueBiasedSelectsGlobal; - // All the false-biased selects in the function - DenseSet<SelectInst *> FalseBiasedSelectsGlobal; - // A map from biased regions to their branch bias - DenseMap<Region *, BranchProbability> BranchBiasMap; - // A map from biased selects to their branch bias - DenseMap<SelectInst *, BranchProbability> SelectBiasMap; - // All the scopes. - DenseSet<CHRScope *> Scopes; -}; - -} // end anonymous namespace - -static inline -raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, - const CHRStats &Stats) { - Stats.print(OS); - return OS; -} - -static inline -raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { - Scope.print(OS); - return OS; -} - -static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { - if (ForceCHR) - return true; - - if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { - if (CHRModules.count(F.getParent()->getName())) - return true; - return CHRFunctions.count(F.getName()); - } - - assert(PSI.hasProfileSummary() && "Empty PSI?"); - return PSI.isFunctionEntryHot(&F); -} - -static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, - CHRStats *Stats) { - StringRef FuncName = F.getName(); - StringRef ModuleName = F.getParent()->getName(); - (void)(FuncName); // Unused in release build. - (void)(ModuleName); // Unused in release build. - CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " - << FuncName); - if (Stats) - CHR_DEBUG(dbgs() << " " << *Stats); - CHR_DEBUG(dbgs() << "\n"); - CHR_DEBUG(F.dump()); -} - -void CHRScope::print(raw_ostream &OS) const { - assert(RegInfos.size() > 0 && "Empty CHRScope"); - OS << "CHRScope["; - OS << RegInfos.size() << ", Regions["; - for (const RegInfo &RI : RegInfos) { - OS << RI.R->getNameStr(); - if (RI.HasBranch) - OS << " B"; - if (RI.Selects.size() > 0) - OS << " S" << RI.Selects.size(); - OS << ", "; - } - if (RegInfos[0].R->getParent()) { - OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); - } else { - // top level region - OS << "]"; - } - OS << ", Subs["; - for (CHRScope *Sub : Subs) { - OS << *Sub << ", "; - } - OS << "]]"; -} - -// Return true if the given instruction type can be hoisted by CHR. -static bool isHoistableInstructionType(Instruction *I) { - return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || - isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || - isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || - isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || - isa<InsertValueInst>(I); -} - -// Return true if the given instruction can be hoisted by CHR. -static bool isHoistable(Instruction *I, DominatorTree &DT) { - if (!isHoistableInstructionType(I)) - return false; - return isSafeToSpeculativelyExecute(I, nullptr, &DT); -} - -// Recursively traverse the use-def chains of the given value and return a set -// of the unhoistable base values defined within the scope (excluding the -// first-region entry block) or the (hoistable or unhoistable) base values that -// are defined outside (including the first-region entry block) of the -// scope. The returned set doesn't include constants. -static std::set<Value *> getBaseValues(Value *V, - DominatorTree &DT) { - std::set<Value *> Result; - if (auto *I = dyn_cast<Instruction>(V)) { - // We don't stop at a block that's not in the Scope because we would miss some - // instructions that are based on the same base values if we stop there. - if (!isHoistable(I, DT)) { - Result.insert(I); - return Result; - } - // I is hoistable above the Scope. - for (Value *Op : I->operands()) { - std::set<Value *> OpResult = getBaseValues(Op, DT); - Result.insert(OpResult.begin(), OpResult.end()); - } - return Result; - } - if (isa<Argument>(V)) { - Result.insert(V); - return Result; - } - // We don't include others like constants because those won't lead to any - // chance of folding of conditions (eg two bit checks merged into one check) - // after CHR. - return Result; // empty -} - -// Return true if V is already hoisted or can be hoisted (along with its -// operands) above the insert point. When it returns true and HoistStops is -// non-null, the instructions to stop hoisting at through the use-def chains are -// inserted into HoistStops. -static bool -checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, - DenseSet<Instruction *> &Unhoistables, - DenseSet<Instruction *> *HoistStops, - DenseMap<Instruction *, bool> &Visited) { - assert(InsertPoint && "Null InsertPoint"); - if (auto *I = dyn_cast<Instruction>(V)) { - if (Visited.count(I)) { - return Visited[I]; - } - assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); - assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); - if (Unhoistables.count(I)) { - // Don't hoist if they are not to be hoisted. - Visited[I] = false; - return false; - } - if (DT.dominates(I, InsertPoint)) { - // We are already above the insert point. Stop here. - if (HoistStops) - HoistStops->insert(I); - Visited[I] = true; - return true; - } - // We aren't not above the insert point, check if we can hoist it above the - // insert point. - if (isHoistable(I, DT)) { - // Check operands first. - DenseSet<Instruction *> OpsHoistStops; - bool AllOpsHoisted = true; - for (Value *Op : I->operands()) { - if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, - Visited)) { - AllOpsHoisted = false; - break; - } - } - if (AllOpsHoisted) { - CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); - if (HoistStops) - HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); - Visited[I] = true; - return true; - } - } - Visited[I] = false; - return false; - } - // Non-instructions are considered hoistable. - return true; -} - -// Returns true and sets the true probability and false probability of an -// MD_prof metadata if it's well-formed. -static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, - BranchProbability &FalseProb) { - if (!MD) return false; - MDString *MDName = cast<MDString>(MD->getOperand(0)); - if (MDName->getString() != "branch_weights" || - MD->getNumOperands() != 3) - return false; - ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); - ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); - if (!TrueWeight || !FalseWeight) - return false; - uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); - uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); - uint64_t SumWt = TrueWt + FalseWt; - - assert(SumWt >= TrueWt && SumWt >= FalseWt && - "Overflow calculating branch probabilities."); - - TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); - FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); - return true; -} - -static BranchProbability getCHRBiasThreshold() { - return BranchProbability::getBranchProbability( - static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); -} - -// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= -// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= -// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return -// false. -template <typename K, typename S, typename M> -static bool checkBias(K *Key, BranchProbability TrueProb, - BranchProbability FalseProb, S &TrueSet, S &FalseSet, - M &BiasMap) { - BranchProbability Threshold = getCHRBiasThreshold(); - if (TrueProb >= Threshold) { - TrueSet.insert(Key); - BiasMap[Key] = TrueProb; - return true; - } else if (FalseProb >= Threshold) { - FalseSet.insert(Key); - BiasMap[Key] = FalseProb; - return true; - } - return false; -} - -// Returns true and insert a region into the right biased set and the map if the -// branch of the region is biased. -static bool checkBiasedBranch(BranchInst *BI, Region *R, - DenseSet<Region *> &TrueBiasedRegionsGlobal, - DenseSet<Region *> &FalseBiasedRegionsGlobal, - DenseMap<Region *, BranchProbability> &BranchBiasMap) { - if (!BI->isConditional()) - return false; - BranchProbability ThenProb, ElseProb; - if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), - ThenProb, ElseProb)) - return false; - BasicBlock *IfThen = BI->getSuccessor(0); - BasicBlock *IfElse = BI->getSuccessor(1); - assert((IfThen == R->getExit() || IfElse == R->getExit()) && - IfThen != IfElse && - "Invariant from findScopes"); - if (IfThen == R->getExit()) { - // Swap them so that IfThen/ThenProb means going into the conditional code - // and IfElse/ElseProb means skipping it. - std::swap(IfThen, IfElse); - std::swap(ThenProb, ElseProb); - } - CHR_DEBUG(dbgs() << "BI " << *BI << " "); - CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); - CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); - return checkBias(R, ThenProb, ElseProb, - TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, - BranchBiasMap); -} - -// Returns true and insert a select into the right biased set and the map if the -// select is biased. -static bool checkBiasedSelect( - SelectInst *SI, Region *R, - DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, - DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, - DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { - BranchProbability TrueProb, FalseProb; - if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), - TrueProb, FalseProb)) - return false; - CHR_DEBUG(dbgs() << "SI " << *SI << " "); - CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); - CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); - return checkBias(SI, TrueProb, FalseProb, - TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, - SelectBiasMap); -} - -// Returns the instruction at which to hoist the dependent condition values and -// insert the CHR branch for a region. This is the terminator branch in the -// entry block or the first select in the entry block, if any. -static Instruction* getBranchInsertPoint(RegInfo &RI) { - Region *R = RI.R; - BasicBlock *EntryBB = R->getEntry(); - // The hoist point is by default the terminator of the entry block, which is - // the same as the branch instruction if RI.HasBranch is true. - Instruction *HoistPoint = EntryBB->getTerminator(); - for (SelectInst *SI : RI.Selects) { - if (SI->getParent() == EntryBB) { - // Pick the first select in Selects in the entry block. Note Selects is - // sorted in the instruction order within a block (asserted below). - HoistPoint = SI; - break; - } - } - assert(HoistPoint && "Null HoistPoint"); -#ifndef NDEBUG - // Check that HoistPoint is the first one in Selects in the entry block, - // if any. - DenseSet<Instruction *> EntryBlockSelectSet; - for (SelectInst *SI : RI.Selects) { - if (SI->getParent() == EntryBB) { - EntryBlockSelectSet.insert(SI); - } - } - for (Instruction &I : *EntryBB) { - if (EntryBlockSelectSet.count(&I) > 0) { - assert(&I == HoistPoint && - "HoistPoint must be the first one in Selects"); - break; - } - } -#endif - return HoistPoint; -} - -// Find a CHR scope in the given region. -CHRScope * CHR::findScope(Region *R) { - CHRScope *Result = nullptr; - BasicBlock *Entry = R->getEntry(); - BasicBlock *Exit = R->getExit(); // null if top level. - assert(Entry && "Entry must not be null"); - assert((Exit == nullptr) == (R->isTopLevelRegion()) && - "Only top level region has a null exit"); - if (Entry) - CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); - else - CHR_DEBUG(dbgs() << "Entry null\n"); - if (Exit) - CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); - else - CHR_DEBUG(dbgs() << "Exit null\n"); - // Exclude cases where Entry is part of a subregion (hence it doesn't belong - // to this region). - bool EntryInSubregion = RI.getRegionFor(Entry) != R; - if (EntryInSubregion) - return nullptr; - // Exclude loops - for (BasicBlock *Pred : predecessors(Entry)) - if (R->contains(Pred)) - return nullptr; - if (Exit) { - // Try to find an if-then block (check if R is an if-then). - // if (cond) { - // ... - // } - auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); - if (BI) - CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); - else - CHR_DEBUG(dbgs() << "BI null\n"); - if (BI && BI->isConditional()) { - BasicBlock *S0 = BI->getSuccessor(0); - BasicBlock *S1 = BI->getSuccessor(1); - CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); - CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); - if (S0 != S1 && (S0 == Exit || S1 == Exit)) { - RegInfo RI(R); - RI.HasBranch = checkBiasedBranch( - BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, - BranchBiasMap); - Result = new CHRScope(RI); - Scopes.insert(Result); - CHR_DEBUG(dbgs() << "Found a region with a branch\n"); - ++Stats.NumBranches; - if (!RI.HasBranch) { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) - << "Branch not biased"; - }); - } - } - } - } - { - // Try to look for selects in the direct child blocks (as opposed to in - // subregions) of R. - // ... - // if (..) { // Some subregion - // ... - // } - // if (..) { // Some subregion - // ... - // } - // ... - // a = cond ? b : c; - // ... - SmallVector<SelectInst *, 8> Selects; - for (RegionNode *E : R->elements()) { - if (E->isSubRegion()) - continue; - // This returns the basic block of E if E is a direct child of R (not a - // subregion.) - BasicBlock *BB = E->getEntry(); - // Need to push in the order to make it easier to find the first Select - // later. - for (Instruction &I : *BB) { - if (auto *SI = dyn_cast<SelectInst>(&I)) { - Selects.push_back(SI); - ++Stats.NumBranches; - } - } - } - if (Selects.size() > 0) { - auto AddSelects = [&](RegInfo &RI) { - for (auto *SI : Selects) - if (checkBiasedSelect(SI, RI.R, - TrueBiasedSelectsGlobal, - FalseBiasedSelectsGlobal, - SelectBiasMap)) - RI.Selects.push_back(SI); - else - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) - << "Select not biased"; - }); - }; - if (!Result) { - CHR_DEBUG(dbgs() << "Found a select-only region\n"); - RegInfo RI(R); - AddSelects(RI); - Result = new CHRScope(RI); - Scopes.insert(Result); - } else { - CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); - AddSelects(Result->RegInfos[0]); - } - } - } - - if (Result) { - checkScopeHoistable(Result); - } - return Result; -} - -// Check that any of the branch and the selects in the region could be -// hoisted above the the CHR branch insert point (the most dominating of -// them, either the branch (at the end of the first block) or the first -// select in the first block). If the branch can't be hoisted, drop the -// selects in the first blocks. -// -// For example, for the following scope/region with selects, we want to insert -// the merged branch right before the first select in the first/entry block by -// hoisting c1, c2, c3, and c4. -// -// // Branch insert point here. -// a = c1 ? b : c; // Select 1 -// d = c2 ? e : f; // Select 2 -// if (c3) { // Branch -// ... -// c4 = foo() // A call. -// g = c4 ? h : i; // Select 3 -// } -// -// But suppose we can't hoist c4 because it's dependent on the preceding -// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop -// Select 2. If we can't hoist c3, we drop Selects 1 & 2. -void CHR::checkScopeHoistable(CHRScope *Scope) { - RegInfo &RI = Scope->RegInfos[0]; - Region *R = RI.R; - BasicBlock *EntryBB = R->getEntry(); - auto *Branch = RI.HasBranch ? - cast<BranchInst>(EntryBB->getTerminator()) : nullptr; - SmallVector<SelectInst *, 8> &Selects = RI.Selects; - if (RI.HasBranch || !Selects.empty()) { - Instruction *InsertPoint = getBranchInsertPoint(RI); - CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); - // Avoid a data dependence from a select or a branch to a(nother) - // select. Note no instruction can't data-depend on a branch (a branch - // instruction doesn't produce a value). - DenseSet<Instruction *> Unhoistables; - // Initialize Unhoistables with the selects. - for (SelectInst *SI : Selects) { - Unhoistables.insert(SI); - } - // Remove Selects that can't be hoisted. - for (auto it = Selects.begin(); it != Selects.end(); ) { - SelectInst *SI = *it; - if (SI == InsertPoint) { - ++it; - continue; - } - DenseMap<Instruction *, bool> Visited; - bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, - DT, Unhoistables, nullptr, Visited); - if (!IsHoistable) { - CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "DropUnhoistableSelect", SI) - << "Dropped unhoistable select"; - }); - it = Selects.erase(it); - // Since we are dropping the select here, we also drop it from - // Unhoistables. - Unhoistables.erase(SI); - } else - ++it; - } - // Update InsertPoint after potentially removing selects. - InsertPoint = getBranchInsertPoint(RI); - CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); - if (RI.HasBranch && InsertPoint != Branch) { - DenseMap<Instruction *, bool> Visited; - bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, - DT, Unhoistables, nullptr, Visited); - if (!IsHoistable) { - // If the branch isn't hoistable, drop the selects in the entry - // block, preferring the branch, which makes the branch the hoist - // point. - assert(InsertPoint != Branch && "Branch must not be the hoist point"); - CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); - CHR_DEBUG( - for (SelectInst *SI : Selects) { - dbgs() << "SI " << *SI << "\n"; - }); - for (SelectInst *SI : Selects) { - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "DropSelectUnhoistableBranch", SI) - << "Dropped select due to unhoistable branch"; - }); - } - Selects.erase(std::remove_if(Selects.begin(), Selects.end(), - [EntryBB](SelectInst *SI) { - return SI->getParent() == EntryBB; - }), Selects.end()); - Unhoistables.clear(); - InsertPoint = Branch; - } - } - CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); -#ifndef NDEBUG - if (RI.HasBranch) { - assert(!DT.dominates(Branch, InsertPoint) && - "Branch can't be already above the hoist point"); - DenseMap<Instruction *, bool> Visited; - assert(checkHoistValue(Branch->getCondition(), InsertPoint, - DT, Unhoistables, nullptr, Visited) && - "checkHoistValue for branch"); - } - for (auto *SI : Selects) { - assert(!DT.dominates(SI, InsertPoint) && - "SI can't be already above the hoist point"); - DenseMap<Instruction *, bool> Visited; - assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, - Unhoistables, nullptr, Visited) && - "checkHoistValue for selects"); - } - CHR_DEBUG(dbgs() << "Result\n"); - if (RI.HasBranch) { - CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); - } - for (auto *SI : Selects) { - CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); - } -#endif - } -} - -// Traverse the region tree, find all nested scopes and merge them if possible. -CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, - SmallVectorImpl<CHRScope *> &Scopes) { - CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); - CHRScope *Result = findScope(R); - // Visit subscopes. - CHRScope *ConsecutiveSubscope = nullptr; - SmallVector<CHRScope *, 8> Subscopes; - for (auto It = R->begin(); It != R->end(); ++It) { - const std::unique_ptr<Region> &SubR = *It; - auto NextIt = std::next(It); - Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; - CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() - << "\n"); - CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); - if (SubCHRScope) { - CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); - } else { - CHR_DEBUG(dbgs() << "Subregion Scope null\n"); - } - if (SubCHRScope) { - if (!ConsecutiveSubscope) - ConsecutiveSubscope = SubCHRScope; - else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { - Subscopes.push_back(ConsecutiveSubscope); - ConsecutiveSubscope = SubCHRScope; - } else - ConsecutiveSubscope->append(SubCHRScope); - } else { - if (ConsecutiveSubscope) { - Subscopes.push_back(ConsecutiveSubscope); - } - ConsecutiveSubscope = nullptr; - } - } - if (ConsecutiveSubscope) { - Subscopes.push_back(ConsecutiveSubscope); - } - for (CHRScope *Sub : Subscopes) { - if (Result) { - // Combine it with the parent. - Result->addSub(Sub); - } else { - // Push Subscopes as they won't be combined with the parent. - Scopes.push_back(Sub); - } - } - return Result; -} - -static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { - DenseSet<Value *> ConditionValues; - if (RI.HasBranch) { - auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); - ConditionValues.insert(BI->getCondition()); - } - for (SelectInst *SI : RI.Selects) { - ConditionValues.insert(SI->getCondition()); - } - return ConditionValues; -} - - -// Determine whether to split a scope depending on the sets of the branch -// condition values of the previous region and the current region. We split -// (return true) it if 1) the condition values of the inner/lower scope can't be -// hoisted up to the outer/upper scope, or 2) the two sets of the condition -// values have an empty intersection (because the combined branch conditions -// won't probably lead to a simpler combined condition). -static bool shouldSplit(Instruction *InsertPoint, - DenseSet<Value *> &PrevConditionValues, - DenseSet<Value *> &ConditionValues, - DominatorTree &DT, - DenseSet<Instruction *> &Unhoistables) { - CHR_DEBUG( - dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; - for (Value *V : PrevConditionValues) { - dbgs() << *V << ", "; - } - dbgs() << " ConditionValues "; - for (Value *V : ConditionValues) { - dbgs() << *V << ", "; - } - dbgs() << "\n"); - assert(InsertPoint && "Null InsertPoint"); - // If any of Bases isn't hoistable to the hoist point, split. - for (Value *V : ConditionValues) { - DenseMap<Instruction *, bool> Visited; - if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { - CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); - return true; // Not hoistable, split. - } - } - // If PrevConditionValues or ConditionValues is empty, don't split to avoid - // unnecessary splits at scopes with no branch/selects. If - // PrevConditionValues and ConditionValues don't intersect at all, split. - if (!PrevConditionValues.empty() && !ConditionValues.empty()) { - // Use std::set as DenseSet doesn't work with set_intersection. - std::set<Value *> PrevBases, Bases; - for (Value *V : PrevConditionValues) { - std::set<Value *> BaseValues = getBaseValues(V, DT); - PrevBases.insert(BaseValues.begin(), BaseValues.end()); - } - for (Value *V : ConditionValues) { - std::set<Value *> BaseValues = getBaseValues(V, DT); - Bases.insert(BaseValues.begin(), BaseValues.end()); - } - CHR_DEBUG( - dbgs() << "PrevBases "; - for (Value *V : PrevBases) { - dbgs() << *V << ", "; - } - dbgs() << " Bases "; - for (Value *V : Bases) { - dbgs() << *V << ", "; - } - dbgs() << "\n"); - std::set<Value *> Intersection; - std::set_intersection(PrevBases.begin(), PrevBases.end(), - Bases.begin(), Bases.end(), - std::inserter(Intersection, Intersection.begin())); - if (Intersection.empty()) { - // Empty intersection, split. - CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); - return true; - } - } - CHR_DEBUG(dbgs() << "No split\n"); - return false; // Don't split. -} - -static void getSelectsInScope(CHRScope *Scope, - DenseSet<Instruction *> &Output) { - for (RegInfo &RI : Scope->RegInfos) - for (SelectInst *SI : RI.Selects) - Output.insert(SI); - for (CHRScope *Sub : Scope->Subs) - getSelectsInScope(Sub, Output); -} - -void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, - SmallVectorImpl<CHRScope *> &Output) { - for (CHRScope *Scope : Input) { - assert(!Scope->BranchInsertPoint && - "BranchInsertPoint must not be set"); - DenseSet<Instruction *> Unhoistables; - getSelectsInScope(Scope, Unhoistables); - splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); - } -#ifndef NDEBUG - for (CHRScope *Scope : Output) { - assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); - } -#endif -} - -SmallVector<CHRScope *, 8> CHR::splitScope( - CHRScope *Scope, - CHRScope *Outer, - DenseSet<Value *> *OuterConditionValues, - Instruction *OuterInsertPoint, - SmallVectorImpl<CHRScope *> &Output, - DenseSet<Instruction *> &Unhoistables) { - if (Outer) { - assert(OuterConditionValues && "Null OuterConditionValues"); - assert(OuterInsertPoint && "Null OuterInsertPoint"); - } - bool PrevSplitFromOuter = true; - DenseSet<Value *> PrevConditionValues; - Instruction *PrevInsertPoint = nullptr; - SmallVector<CHRScope *, 8> Splits; - SmallVector<bool, 8> SplitsSplitFromOuter; - SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; - SmallVector<Instruction *, 8> SplitsInsertPoints; - SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy - for (RegInfo &RI : RegInfos) { - Instruction *InsertPoint = getBranchInsertPoint(RI); - DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); - CHR_DEBUG( - dbgs() << "ConditionValues "; - for (Value *V : ConditionValues) { - dbgs() << *V << ", "; - } - dbgs() << "\n"); - if (RI.R == RegInfos[0].R) { - // First iteration. Check to see if we should split from the outer. - if (Outer) { - CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); - CHR_DEBUG(dbgs() << "Should split from outer at " - << RI.R->getNameStr() << "\n"); - if (shouldSplit(OuterInsertPoint, *OuterConditionValues, - ConditionValues, DT, Unhoistables)) { - PrevConditionValues = ConditionValues; - PrevInsertPoint = InsertPoint; - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "SplitScopeFromOuter", - RI.R->getEntry()->getTerminator()) - << "Split scope from outer due to unhoistable branch/select " - << "and/or lack of common condition values"; - }); - } else { - // Not splitting from the outer. Use the outer bases and insert - // point. Union the bases. - PrevSplitFromOuter = false; - PrevConditionValues = *OuterConditionValues; - PrevConditionValues.insert(ConditionValues.begin(), - ConditionValues.end()); - PrevInsertPoint = OuterInsertPoint; - } - } else { - CHR_DEBUG(dbgs() << "Outer null\n"); - PrevConditionValues = ConditionValues; - PrevInsertPoint = InsertPoint; - } - } else { - CHR_DEBUG(dbgs() << "Should split from prev at " - << RI.R->getNameStr() << "\n"); - if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, - DT, Unhoistables)) { - CHRScope *Tail = Scope->split(RI.R); - Scopes.insert(Tail); - Splits.push_back(Scope); - SplitsSplitFromOuter.push_back(PrevSplitFromOuter); - SplitsConditionValues.push_back(PrevConditionValues); - SplitsInsertPoints.push_back(PrevInsertPoint); - Scope = Tail; - PrevConditionValues = ConditionValues; - PrevInsertPoint = InsertPoint; - PrevSplitFromOuter = true; - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, - "SplitScopeFromPrev", - RI.R->getEntry()->getTerminator()) - << "Split scope from previous due to unhoistable branch/select " - << "and/or lack of common condition values"; - }); - } else { - // Not splitting. Union the bases. Keep the hoist point. - PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); - } - } - } - Splits.push_back(Scope); - SplitsSplitFromOuter.push_back(PrevSplitFromOuter); - SplitsConditionValues.push_back(PrevConditionValues); - assert(PrevInsertPoint && "Null PrevInsertPoint"); - SplitsInsertPoints.push_back(PrevInsertPoint); - assert(Splits.size() == SplitsConditionValues.size() && - Splits.size() == SplitsSplitFromOuter.size() && - Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); - for (size_t I = 0; I < Splits.size(); ++I) { - CHRScope *Split = Splits[I]; - DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; - Instruction *SplitInsertPoint = SplitsInsertPoints[I]; - SmallVector<CHRScope *, 8> NewSubs; - DenseSet<Instruction *> SplitUnhoistables; - getSelectsInScope(Split, SplitUnhoistables); - for (CHRScope *Sub : Split->Subs) { - SmallVector<CHRScope *, 8> SubSplits = splitScope( - Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, - SplitUnhoistables); - NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); - } - Split->Subs = NewSubs; - } - SmallVector<CHRScope *, 8> Result; - for (size_t I = 0; I < Splits.size(); ++I) { - CHRScope *Split = Splits[I]; - if (SplitsSplitFromOuter[I]) { - // Split from the outer. - Output.push_back(Split); - Split->BranchInsertPoint = SplitsInsertPoints[I]; - CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] - << "\n"); - } else { - // Connected to the outer. - Result.push_back(Split); - } - } - if (!Outer) - assert(Result.empty() && - "If no outer (top-level), must return no nested ones"); - return Result; -} - -void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { - for (CHRScope *Scope : Scopes) { - assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); - classifyBiasedScopes(Scope, Scope); - CHR_DEBUG( - dbgs() << "classifyBiasedScopes " << *Scope << "\n"; - dbgs() << "TrueBiasedRegions "; - for (Region *R : Scope->TrueBiasedRegions) { - dbgs() << R->getNameStr() << ", "; - } - dbgs() << "\n"; - dbgs() << "FalseBiasedRegions "; - for (Region *R : Scope->FalseBiasedRegions) { - dbgs() << R->getNameStr() << ", "; - } - dbgs() << "\n"; - dbgs() << "TrueBiasedSelects "; - for (SelectInst *SI : Scope->TrueBiasedSelects) { - dbgs() << *SI << ", "; - } - dbgs() << "\n"; - dbgs() << "FalseBiasedSelects "; - for (SelectInst *SI : Scope->FalseBiasedSelects) { - dbgs() << *SI << ", "; - } - dbgs() << "\n";); - } -} - -void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { - for (RegInfo &RI : Scope->RegInfos) { - if (RI.HasBranch) { - Region *R = RI.R; - if (TrueBiasedRegionsGlobal.count(R) > 0) - OutermostScope->TrueBiasedRegions.insert(R); - else if (FalseBiasedRegionsGlobal.count(R) > 0) - OutermostScope->FalseBiasedRegions.insert(R); - else - llvm_unreachable("Must be biased"); - } - for (SelectInst *SI : RI.Selects) { - if (TrueBiasedSelectsGlobal.count(SI) > 0) - OutermostScope->TrueBiasedSelects.insert(SI); - else if (FalseBiasedSelectsGlobal.count(SI) > 0) - OutermostScope->FalseBiasedSelects.insert(SI); - else - llvm_unreachable("Must be biased"); - } - } - for (CHRScope *Sub : Scope->Subs) { - classifyBiasedScopes(Sub, OutermostScope); - } -} - -static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { - unsigned NumBiased = Scope->TrueBiasedRegions.size() + - Scope->FalseBiasedRegions.size() + - Scope->TrueBiasedSelects.size() + - Scope->FalseBiasedSelects.size(); - return NumBiased >= CHRMergeThreshold; -} - -void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, - SmallVectorImpl<CHRScope *> &Output) { - for (CHRScope *Scope : Input) { - // Filter out the ones with only one region and no subs. - if (!hasAtLeastTwoBiasedBranches(Scope)) { - CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " - << Scope->TrueBiasedRegions.size() - << " falsy-regions " << Scope->FalseBiasedRegions.size() - << " true-selects " << Scope->TrueBiasedSelects.size() - << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); - ORE.emit([&]() { - return OptimizationRemarkMissed( - DEBUG_TYPE, - "DropScopeWithOneBranchOrSelect", - Scope->RegInfos[0].R->getEntry()->getTerminator()) - << "Drop scope with < " - << ore::NV("CHRMergeThreshold", CHRMergeThreshold) - << " biased branch(es) or select(s)"; - }); - continue; - } - Output.push_back(Scope); - } -} - -void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, - SmallVectorImpl<CHRScope *> &Output) { - for (CHRScope *Scope : Input) { - assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && - "Empty"); - setCHRRegions(Scope, Scope); - Output.push_back(Scope); - CHR_DEBUG( - dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; - for (auto pair : Scope->HoistStopMap) { - Region *R = pair.first; - dbgs() << "Region " << R->getNameStr() << "\n"; - for (Instruction *I : pair.second) { - dbgs() << "HoistStop " << *I << "\n"; - } - } - dbgs() << "CHRRegions" << "\n"; - for (RegInfo &RI : Scope->CHRRegions) { - dbgs() << RI.R->getNameStr() << "\n"; - }); - } -} - -void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { - DenseSet<Instruction *> Unhoistables; - // Put the biased selects in Unhoistables because they should stay where they - // are and constant-folded after CHR (in case one biased select or a branch - // can depend on another biased select.) - for (RegInfo &RI : Scope->RegInfos) { - for (SelectInst *SI : RI.Selects) { - Unhoistables.insert(SI); - } - } - Instruction *InsertPoint = OutermostScope->BranchInsertPoint; - for (RegInfo &RI : Scope->RegInfos) { - Region *R = RI.R; - DenseSet<Instruction *> HoistStops; - bool IsHoisted = false; - if (RI.HasBranch) { - assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || - OutermostScope->FalseBiasedRegions.count(R) > 0) && - "Must be truthy or falsy"); - auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); - // Note checkHoistValue fills in HoistStops. - DenseMap<Instruction *, bool> Visited; - bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, - Unhoistables, &HoistStops, Visited); - assert(IsHoistable && "Must be hoistable"); - (void)(IsHoistable); // Unused in release build - IsHoisted = true; - } - for (SelectInst *SI : RI.Selects) { - assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || - OutermostScope->FalseBiasedSelects.count(SI) > 0) && - "Must be true or false biased"); - // Note checkHoistValue fills in HoistStops. - DenseMap<Instruction *, bool> Visited; - bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, - Unhoistables, &HoistStops, Visited); - assert(IsHoistable && "Must be hoistable"); - (void)(IsHoistable); // Unused in release build - IsHoisted = true; - } - if (IsHoisted) { - OutermostScope->CHRRegions.push_back(RI); - OutermostScope->HoistStopMap[R] = HoistStops; - } - } - for (CHRScope *Sub : Scope->Subs) - setCHRRegions(Sub, OutermostScope); -} - -bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { - return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); -} - -void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, - SmallVectorImpl<CHRScope *> &Output) { - Output.resize(Input.size()); - llvm::copy(Input, Output.begin()); - llvm::stable_sort(Output, CHRScopeSorter); -} - -// Return true if V is already hoisted or was hoisted (along with its operands) -// to the insert point. -static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, - HoistStopMapTy &HoistStopMap, - DenseSet<Instruction *> &HoistedSet, - DenseSet<PHINode *> &TrivialPHIs, - DominatorTree &DT) { - auto IT = HoistStopMap.find(R); - assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); - DenseSet<Instruction *> &HoistStops = IT->second; - if (auto *I = dyn_cast<Instruction>(V)) { - if (I == HoistPoint) - return; - if (HoistStops.count(I)) - return; - if (auto *PN = dyn_cast<PHINode>(I)) - if (TrivialPHIs.count(PN)) - // The trivial phi inserted by the previous CHR scope could replace a - // non-phi in HoistStops. Note that since this phi is at the exit of a - // previous CHR scope, which dominates this scope, it's safe to stop - // hoisting there. - return; - if (HoistedSet.count(I)) - // Already hoisted, return. - return; - assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); - assert(DT.getNode(I->getParent()) && "DT must contain I's block"); - assert(DT.getNode(HoistPoint->getParent()) && - "DT must contain HoistPoint block"); - if (DT.dominates(I, HoistPoint)) - // We are already above the hoist point. Stop here. This may be necessary - // when multiple scopes would independently hoist the same - // instruction. Since an outer (dominating) scope would hoist it to its - // entry before an inner (dominated) scope would to its entry, the inner - // scope may see the instruction already hoisted, in which case it - // potentially wrong for the inner scope to hoist it and could cause bad - // IR (non-dominating def), but safe to skip hoisting it instead because - // it's already in a block that dominates the inner scope. - return; - for (Value *Op : I->operands()) { - hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); - } - I->moveBefore(HoistPoint); - HoistedSet.insert(I); - CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); - } -} - -// Hoist the dependent condition values of the branches and the selects in the -// scope to the insert point. -static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, - DenseSet<PHINode *> &TrivialPHIs, - DominatorTree &DT) { - DenseSet<Instruction *> HoistedSet; - for (const RegInfo &RI : Scope->CHRRegions) { - Region *R = RI.R; - bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); - bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); - if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { - auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); - hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, - HoistedSet, TrivialPHIs, DT); - } - for (SelectInst *SI : RI.Selects) { - bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); - bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); - if (!(IsTrueBiased || IsFalseBiased)) - continue; - hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, - HoistedSet, TrivialPHIs, DT); - } - } -} - -// Negate the predicate if an ICmp if it's used only by branches or selects by -// swapping the operands of the branches or the selects. Returns true if success. -static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, - Instruction *ExcludedUser, - CHRScope *Scope) { - for (User *U : ICmp->users()) { - if (U == ExcludedUser) - continue; - if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) - continue; - if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) - continue; - return false; - } - for (User *U : ICmp->users()) { - if (U == ExcludedUser) - continue; - if (auto *BI = dyn_cast<BranchInst>(U)) { - assert(BI->isConditional() && "Must be conditional"); - BI->swapSuccessors(); - // Don't need to swap this in terms of - // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based - // mean whehter the branch is likely go into the if-then rather than - // successor0/successor1 and because we can tell which edge is the then or - // the else one by comparing the destination to the region exit block. - continue; - } - if (auto *SI = dyn_cast<SelectInst>(U)) { - // Swap operands - Value *TrueValue = SI->getTrueValue(); - Value *FalseValue = SI->getFalseValue(); - SI->setTrueValue(FalseValue); - SI->setFalseValue(TrueValue); - SI->swapProfMetadata(); - if (Scope->TrueBiasedSelects.count(SI)) { - assert(Scope->FalseBiasedSelects.count(SI) == 0 && - "Must not be already in"); - Scope->FalseBiasedSelects.insert(SI); - } else if (Scope->FalseBiasedSelects.count(SI)) { - assert(Scope->TrueBiasedSelects.count(SI) == 0 && - "Must not be already in"); - Scope->TrueBiasedSelects.insert(SI); - } - continue; - } - llvm_unreachable("Must be a branch or a select"); - } - ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); - return true; -} - -// A helper for transformScopes. Insert a trivial phi at the scope exit block -// for a value that's defined in the scope but used outside it (meaning it's -// alive at the exit block). -static void insertTrivialPHIs(CHRScope *Scope, - BasicBlock *EntryBlock, BasicBlock *ExitBlock, - DenseSet<PHINode *> &TrivialPHIs) { - DenseSet<BasicBlock *> BlocksInScopeSet; - SmallVector<BasicBlock *, 8> BlocksInScopeVec; - for (RegInfo &RI : Scope->RegInfos) { - for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the - // sub-Scopes. - BlocksInScopeSet.insert(BB); - BlocksInScopeVec.push_back(BB); - } - } - CHR_DEBUG( - dbgs() << "Inserting redudant phis\n"; - for (BasicBlock *BB : BlocksInScopeVec) { - dbgs() << "BlockInScope " << BB->getName() << "\n"; - }); - for (BasicBlock *BB : BlocksInScopeVec) { - for (Instruction &I : *BB) { - SmallVector<Instruction *, 8> Users; - for (User *U : I.users()) { - if (auto *UI = dyn_cast<Instruction>(U)) { - if (BlocksInScopeSet.count(UI->getParent()) == 0 && - // Unless there's already a phi for I at the exit block. - !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { - CHR_DEBUG(dbgs() << "V " << I << "\n"); - CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); - Users.push_back(UI); - } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { - // There's a loop backedge from a block that's dominated by this - // scope to the entry block. - CHR_DEBUG(dbgs() << "V " << I << "\n"); - CHR_DEBUG(dbgs() - << "Used at entry block (for a back edge) by a phi user " - << *UI << "\n"); - Users.push_back(UI); - } - } - } - if (Users.size() > 0) { - // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at - // ExitBlock. Replace I with the new phi in UI unless UI is another - // phi at ExitBlock. - unsigned PredCount = std::distance(pred_begin(ExitBlock), - pred_end(ExitBlock)); - PHINode *PN = PHINode::Create(I.getType(), PredCount, "", - &ExitBlock->front()); - for (BasicBlock *Pred : predecessors(ExitBlock)) { - PN->addIncoming(&I, Pred); - } - TrivialPHIs.insert(PN); - CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); - for (Instruction *UI : Users) { - for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { - if (UI->getOperand(J) == &I) { - UI->setOperand(J, PN); - } - } - CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); - } - } - } - } -} - -// Assert that all the CHR regions of the scope have a biased branch or select. -static void LLVM_ATTRIBUTE_UNUSED -assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { -#ifndef NDEBUG - auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { - if (Scope->TrueBiasedRegions.count(RI.R) || - Scope->FalseBiasedRegions.count(RI.R)) - return true; - for (SelectInst *SI : RI.Selects) - if (Scope->TrueBiasedSelects.count(SI) || - Scope->FalseBiasedSelects.count(SI)) - return true; - return false; - }; - for (RegInfo &RI : Scope->CHRRegions) { - assert(HasBiasedBranchOrSelect(RI, Scope) && - "Must have biased branch or select"); - } -#endif -} - -// Assert that all the condition values of the biased branches and selects have -// been hoisted to the pre-entry block or outside of the scope. -static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( - CHRScope *Scope, BasicBlock *PreEntryBlock) { - CHR_DEBUG(dbgs() << "Biased regions condition values \n"); - for (RegInfo &RI : Scope->CHRRegions) { - Region *R = RI.R; - bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); - bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); - if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { - auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); - Value *V = BI->getCondition(); - CHR_DEBUG(dbgs() << *V << "\n"); - if (auto *I = dyn_cast<Instruction>(V)) { - (void)(I); // Unused in release build. - assert((I->getParent() == PreEntryBlock || - !Scope->contains(I)) && - "Must have been hoisted to PreEntryBlock or outside the scope"); - } - } - for (SelectInst *SI : RI.Selects) { - bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); - bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); - if (!(IsTrueBiased || IsFalseBiased)) - continue; - Value *V = SI->getCondition(); - CHR_DEBUG(dbgs() << *V << "\n"); - if (auto *I = dyn_cast<Instruction>(V)) { - (void)(I); // Unused in release build. - assert((I->getParent() == PreEntryBlock || - !Scope->contains(I)) && - "Must have been hoisted to PreEntryBlock or outside the scope"); - } - } - } -} - -void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { - CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); - - assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); - Region *FirstRegion = Scope->RegInfos[0].R; - BasicBlock *EntryBlock = FirstRegion->getEntry(); - Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; - BasicBlock *ExitBlock = LastRegion->getExit(); - Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); - - if (ExitBlock) { - // Insert a trivial phi at the exit block (where the CHR hot path and the - // cold path merges) for a value that's defined in the scope but used - // outside it (meaning it's alive at the exit block). We will add the - // incoming values for the CHR cold paths to it below. Without this, we'd - // miss updating phi's for such values unless there happens to already be a - // phi for that value there. - insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); - } - - // Split the entry block of the first region. The new block becomes the new - // entry block of the first region. The old entry block becomes the block to - // insert the CHR branch into. Note DT gets updated. Since DT gets updated - // through the split, we update the entry of the first region after the split, - // and Region only points to the entry and the exit blocks, rather than - // keeping everything in a list or set, the blocks membership and the - // entry/exit blocks of the region are still valid after the split. - CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() - << " at " << *Scope->BranchInsertPoint << "\n"); - BasicBlock *NewEntryBlock = - SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); - assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && - "NewEntryBlock's only pred must be EntryBlock"); - FirstRegion->replaceEntryRecursive(NewEntryBlock); - BasicBlock *PreEntryBlock = EntryBlock; - - ValueToValueMapTy VMap; - // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a - // hot path (originals) and a cold path (clones) and update the PHIs at the - // exit block. - cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); - - // Replace the old (placeholder) branch with the new (merged) conditional - // branch. - BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, - NewEntryBlock, VMap); - -#ifndef NDEBUG - assertCHRRegionsHaveBiasedBranchOrSelect(Scope); -#endif - - // Hoist the conditional values of the branches/selects. - hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); - -#ifndef NDEBUG - assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); -#endif - - // Create the combined branch condition and constant-fold the branches/selects - // in the hot path. - fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, - ProfileCount ? ProfileCount.getValue() : 0); -} - -// A helper for transformScopes. Clone the blocks in the scope (excluding the -// PreEntryBlock) to split into a hot path and a cold path and update the PHIs -// at the exit block. -void CHR::cloneScopeBlocks(CHRScope *Scope, - BasicBlock *PreEntryBlock, - BasicBlock *ExitBlock, - Region *LastRegion, - ValueToValueMapTy &VMap) { - // Clone all the blocks. The original blocks will be the hot-path - // CHR-optimized code and the cloned blocks will be the original unoptimized - // code. This is so that the block pointers from the - // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code - // which CHR should apply to. - SmallVector<BasicBlock*, 8> NewBlocks; - for (RegInfo &RI : Scope->RegInfos) - for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the - // sub-Scopes. - assert(BB != PreEntryBlock && "Don't copy the preetntry block"); - BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); - NewBlocks.push_back(NewBB); - VMap[BB] = NewBB; - } - - // Place the cloned blocks right after the original blocks (right before the - // exit block of.) - if (ExitBlock) - F.getBasicBlockList().splice(ExitBlock->getIterator(), - F.getBasicBlockList(), - NewBlocks[0]->getIterator(), F.end()); - - // Update the cloned blocks/instructions to refer to themselves. - for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) - for (Instruction &I : *NewBlocks[i]) - RemapInstruction(&I, VMap, - RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); - - // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for - // the top-level region but we don't need to add PHIs. The trivial PHIs - // inserted above will be updated here. - if (ExitBlock) - for (PHINode &PN : ExitBlock->phis()) - for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; - ++I) { - BasicBlock *Pred = PN.getIncomingBlock(I); - if (LastRegion->contains(Pred)) { - Value *V = PN.getIncomingValue(I); - auto It = VMap.find(V); - if (It != VMap.end()) V = It->second; - assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); - PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); - } - } -} - -// A helper for transformScope. Replace the old (placeholder) branch with the -// new (merged) conditional branch. -BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, - BasicBlock *EntryBlock, - BasicBlock *NewEntryBlock, - ValueToValueMapTy &VMap) { - BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); - assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && - "SplitBlock did not work correctly!"); - assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && - "NewEntryBlock's only pred must be EntryBlock"); - assert(VMap.find(NewEntryBlock) != VMap.end() && - "NewEntryBlock must have been copied"); - OldBR->dropAllReferences(); - OldBR->eraseFromParent(); - // The true predicate is a placeholder. It will be replaced later in - // fixupBranchesAndSelects(). - BranchInst *NewBR = BranchInst::Create(NewEntryBlock, - cast<BasicBlock>(VMap[NewEntryBlock]), - ConstantInt::getTrue(F.getContext())); - PreEntryBlock->getInstList().push_back(NewBR); - assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && - "NewEntryBlock's only pred must be EntryBlock"); - return NewBR; -} - -// A helper for transformScopes. Create the combined branch condition and -// constant-fold the branches/selects in the hot path. -void CHR::fixupBranchesAndSelects(CHRScope *Scope, - BasicBlock *PreEntryBlock, - BranchInst *MergedBR, - uint64_t ProfileCount) { - Value *MergedCondition = ConstantInt::getTrue(F.getContext()); - BranchProbability CHRBranchBias(1, 1); - uint64_t NumCHRedBranches = 0; - IRBuilder<> IRB(PreEntryBlock->getTerminator()); - for (RegInfo &RI : Scope->CHRRegions) { - Region *R = RI.R; - if (RI.HasBranch) { - fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); - ++NumCHRedBranches; - } - for (SelectInst *SI : RI.Selects) { - fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); - ++NumCHRedBranches; - } - } - Stats.NumBranchesDelta += NumCHRedBranches - 1; - Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; - ORE.emit([&]() { - return OptimizationRemark(DEBUG_TYPE, - "CHR", - // Refer to the hot (original) path - MergedBR->getSuccessor(0)->getTerminator()) - << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) - << " branches or selects"; - }); - MergedBR->setCondition(MergedCondition); - SmallVector<uint32_t, 2> Weights; - Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); - Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); - MDBuilder MDB(F.getContext()); - MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); - CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] - << "\n"); -} - -// A helper for fixupBranchesAndSelects. Add to the combined branch condition -// and constant-fold a branch in the hot path. -void CHR::fixupBranch(Region *R, CHRScope *Scope, - IRBuilder<> &IRB, - Value *&MergedCondition, - BranchProbability &CHRBranchBias) { - bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); - assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && - "Must be truthy or falsy"); - auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); - assert(BranchBiasMap.find(R) != BranchBiasMap.end() && - "Must be in the bias map"); - BranchProbability Bias = BranchBiasMap[R]; - assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); - // Take the min. - if (CHRBranchBias > Bias) - CHRBranchBias = Bias; - BasicBlock *IfThen = BI->getSuccessor(1); - BasicBlock *IfElse = BI->getSuccessor(0); - BasicBlock *RegionExitBlock = R->getExit(); - assert(RegionExitBlock && "Null ExitBlock"); - assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && - IfThen != IfElse && "Invariant from findScopes"); - if (IfThen == RegionExitBlock) { - // Swap them so that IfThen means going into it and IfElse means skipping - // it. - std::swap(IfThen, IfElse); - } - CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() - << " IfElse " << IfElse->getName() << "\n"); - Value *Cond = BI->getCondition(); - BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; - bool ConditionTrue = HotTarget == BI->getSuccessor(0); - addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, - MergedCondition); - // Constant-fold the branch at ClonedEntryBlock. - assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && - "The successor shouldn't change"); - Value *NewCondition = ConditionTrue ? - ConstantInt::getTrue(F.getContext()) : - ConstantInt::getFalse(F.getContext()); - BI->setCondition(NewCondition); -} - -// A helper for fixupBranchesAndSelects. Add to the combined branch condition -// and constant-fold a select in the hot path. -void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, - IRBuilder<> &IRB, - Value *&MergedCondition, - BranchProbability &CHRBranchBias) { - bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); - assert((IsTrueBiased || - Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); - assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && - "Must be in the bias map"); - BranchProbability Bias = SelectBiasMap[SI]; - assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); - // Take the min. - if (CHRBranchBias > Bias) - CHRBranchBias = Bias; - Value *Cond = SI->getCondition(); - addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, - MergedCondition); - Value *NewCondition = IsTrueBiased ? - ConstantInt::getTrue(F.getContext()) : - ConstantInt::getFalse(F.getContext()); - SI->setCondition(NewCondition); -} - -// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged -// condition. -void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, - Instruction *BranchOrSelect, - CHRScope *Scope, - IRBuilder<> &IRB, - Value *&MergedCondition) { - if (IsTrueBiased) { - MergedCondition = IRB.CreateAnd(MergedCondition, Cond); - } else { - // If Cond is an icmp and all users of V except for BranchOrSelect is a - // branch, negate the icmp predicate and swap the branch targets and avoid - // inserting an Xor to negate Cond. - bool Done = false; - if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) - if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { - MergedCondition = IRB.CreateAnd(MergedCondition, Cond); - Done = true; - } - if (!Done) { - Value *Negate = IRB.CreateXor( - ConstantInt::getTrue(F.getContext()), Cond); - MergedCondition = IRB.CreateAnd(MergedCondition, Negate); - } - } -} - -void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { - unsigned I = 0; - DenseSet<PHINode *> TrivialPHIs; - for (CHRScope *Scope : CHRScopes) { - transformScopes(Scope, TrivialPHIs); - CHR_DEBUG( - std::ostringstream oss; - oss << " after transformScopes " << I++; - dumpIR(F, oss.str().c_str(), nullptr)); - (void)I; - } -} - -static void LLVM_ATTRIBUTE_UNUSED -dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { - dbgs() << Label << " " << Scopes.size() << "\n"; - for (CHRScope *Scope : Scopes) { - dbgs() << *Scope << "\n"; - } -} - -bool CHR::run() { - if (!shouldApply(F, PSI)) - return false; - - CHR_DEBUG(dumpIR(F, "before", nullptr)); - - bool Changed = false; - { - CHR_DEBUG( - dbgs() << "RegionInfo:\n"; - RI.print(dbgs())); - - // Recursively traverse the region tree and find regions that have biased - // branches and/or selects and create scopes. - SmallVector<CHRScope *, 8> AllScopes; - findScopes(AllScopes); - CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); - - // Split the scopes if 1) the conditiona values of the biased - // branches/selects of the inner/lower scope can't be hoisted up to the - // outermost/uppermost scope entry, or 2) the condition values of the biased - // branches/selects in a scope (including subscopes) don't share at least - // one common value. - SmallVector<CHRScope *, 8> SplitScopes; - splitScopes(AllScopes, SplitScopes); - CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); - - // After splitting, set the biased regions and selects of a scope (a tree - // root) that include those of the subscopes. - classifyBiasedScopes(SplitScopes); - CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); - - // Filter out the scopes that has only one biased region or select (CHR - // isn't useful in such a case). - SmallVector<CHRScope *, 8> FilteredScopes; - filterScopes(SplitScopes, FilteredScopes); - CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); - - // Set the regions to be CHR'ed and their hoist stops for each scope. - SmallVector<CHRScope *, 8> SetScopes; - setCHRRegions(FilteredScopes, SetScopes); - CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); - - // Sort CHRScopes by the depth so that outer CHRScopes comes before inner - // ones. We need to apply CHR from outer to inner so that we apply CHR only - // to the hot path, rather than both hot and cold paths. - SmallVector<CHRScope *, 8> SortedScopes; - sortScopes(SetScopes, SortedScopes); - CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); - - CHR_DEBUG( - dbgs() << "RegionInfo:\n"; - RI.print(dbgs())); - - // Apply the CHR transformation. - if (!SortedScopes.empty()) { - transformScopes(SortedScopes); - Changed = true; - } - } - - if (Changed) { - CHR_DEBUG(dumpIR(F, "after", &Stats)); - ORE.emit([&]() { - return OptimizationRemark(DEBUG_TYPE, "Stats", &F) - << ore::NV("Function", &F) << " " - << "Reduced the number of branches in hot paths by " - << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) - << " (static) and " - << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) - << " (weighted by PGO count)"; - }); - } - - return Changed; -} - -bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { - BlockFrequencyInfo &BFI = - getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); - DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); - ProfileSummaryInfo &PSI = - getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); - RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); - std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = - llvm::make_unique<OptimizationRemarkEmitter>(&F); - return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); -} - -namespace llvm { - -ControlHeightReductionPass::ControlHeightReductionPass() { - parseCHRFilterFiles(); -} - -PreservedAnalyses ControlHeightReductionPass::run( - Function &F, - FunctionAnalysisManager &FAM) { - auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); - auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); - auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); - auto &MAM = MAMProxy.getManager(); - auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); - auto &RI = FAM.getResult<RegionInfoAnalysis>(F); - auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); - bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); - if (!Changed) - return PreservedAnalyses::all(); - auto PA = PreservedAnalyses(); - PA.preserve<GlobalsAA>(); - return PA; -} - -} // namespace llvm |
