diff options
Diffstat (limited to 'llvm/lib/Transforms/Scalar/GuardWidening.cpp')
| -rw-r--r-- | llvm/lib/Transforms/Scalar/GuardWidening.cpp | 946 |
1 files changed, 946 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/llvm/lib/Transforms/Scalar/GuardWidening.cpp new file mode 100644 index 0000000000000..2697d78095681 --- /dev/null +++ b/llvm/lib/Transforms/Scalar/GuardWidening.cpp @@ -0,0 +1,946 @@ +//===- GuardWidening.cpp - ---- Guard widening ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the guard widening pass. The semantics of the +// @llvm.experimental.guard intrinsic lets LLVM transform it so that it fails +// more often that it did before the transform. This optimization is called +// "widening" and can be used hoist and common runtime checks in situations like +// these: +// +// %cmp0 = 7 u< Length +// call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] +// call @unknown_side_effects() +// %cmp1 = 9 u< Length +// call @llvm.experimental.guard(i1 %cmp1) [ "deopt"(...) ] +// ... +// +// => +// +// %cmp0 = 9 u< Length +// call @llvm.experimental.guard(i1 %cmp0) [ "deopt"(...) ] +// call @unknown_side_effects() +// ... +// +// If %cmp0 is false, @llvm.experimental.guard will "deoptimize" back to a +// generic implementation of the same function, which will have the correct +// semantics from that point onward. It is always _legal_ to deoptimize (so +// replacing %cmp0 with false is "correct"), though it may not always be +// profitable to do so. +// +// NB! This pass is a work in progress. It hasn't been tuned to be "production +// ready" yet. It is known to have quadriatic running time and will not scale +// to large numbers of guards +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/GuardWidening.h" +#include <functional> +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/GuardUtils.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/ConstantRange.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +using namespace llvm; + +#define DEBUG_TYPE "guard-widening" + +STATISTIC(GuardsEliminated, "Number of eliminated guards"); +STATISTIC(CondBranchEliminated, "Number of eliminated conditional branches"); + +static cl::opt<bool> WidenFrequentBranches( + "guard-widening-widen-frequent-branches", cl::Hidden, + cl::desc("Widen conditions of explicit branches into dominating guards in " + "case if their taken frequency exceeds threshold set by " + "guard-widening-frequent-branch-threshold option"), + cl::init(false)); + +static cl::opt<unsigned> FrequentBranchThreshold( + "guard-widening-frequent-branch-threshold", cl::Hidden, + cl::desc("When WidenFrequentBranches is set to true, this option is used " + "to determine which branches are frequently taken. The criteria " + "that a branch is taken more often than " + "((FrequentBranchThreshold - 1) / FrequentBranchThreshold), then " + "it is considered frequently taken"), + cl::init(1000)); + +static cl::opt<bool> + WidenBranchGuards("guard-widening-widen-branch-guards", cl::Hidden, + cl::desc("Whether or not we should widen guards " + "expressed as branches by widenable conditions"), + cl::init(true)); + +namespace { + +// Get the condition of \p I. It can either be a guard or a conditional branch. +static Value *getCondition(Instruction *I) { + if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { + assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && + "Bad guard intrinsic?"); + return GI->getArgOperand(0); + } + if (isGuardAsWidenableBranch(I)) { + auto *Cond = cast<BranchInst>(I)->getCondition(); + return cast<BinaryOperator>(Cond)->getOperand(0); + } + return cast<BranchInst>(I)->getCondition(); +} + +// Set the condition for \p I to \p NewCond. \p I can either be a guard or a +// conditional branch. +static void setCondition(Instruction *I, Value *NewCond) { + if (IntrinsicInst *GI = dyn_cast<IntrinsicInst>(I)) { + assert(GI->getIntrinsicID() == Intrinsic::experimental_guard && + "Bad guard intrinsic?"); + GI->setArgOperand(0, NewCond); + return; + } + cast<BranchInst>(I)->setCondition(NewCond); +} + +// Eliminates the guard instruction properly. +static void eliminateGuard(Instruction *GuardInst) { + GuardInst->eraseFromParent(); + ++GuardsEliminated; +} + +class GuardWideningImpl { + DominatorTree &DT; + PostDominatorTree *PDT; + LoopInfo &LI; + BranchProbabilityInfo *BPI; + + /// Together, these describe the region of interest. This might be all of + /// the blocks within a function, or only a given loop's blocks and preheader. + DomTreeNode *Root; + std::function<bool(BasicBlock*)> BlockFilter; + + /// The set of guards and conditional branches whose conditions have been + /// widened into dominating guards. + SmallVector<Instruction *, 16> EliminatedGuardsAndBranches; + + /// The set of guards which have been widened to include conditions to other + /// guards. + DenseSet<Instruction *> WidenedGuards; + + /// Try to eliminate instruction \p Instr by widening it into an earlier + /// dominating guard. \p DFSI is the DFS iterator on the dominator tree that + /// is currently visiting the block containing \p Guard, and \p GuardsPerBlock + /// maps BasicBlocks to the set of guards seen in that block. + bool eliminateInstrViaWidening( + Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, + const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & + GuardsPerBlock, bool InvertCondition = false); + + /// Used to keep track of which widening potential is more effective. + enum WideningScore { + /// Don't widen. + WS_IllegalOrNegative, + + /// Widening is performance neutral as far as the cycles spent in check + /// conditions goes (but can still help, e.g., code layout, having less + /// deopt state). + WS_Neutral, + + /// Widening is profitable. + WS_Positive, + + /// Widening is very profitable. Not significantly different from \c + /// WS_Positive, except by the order. + WS_VeryPositive + }; + + static StringRef scoreTypeToString(WideningScore WS); + + /// Compute the score for widening the condition in \p DominatedInstr + /// into \p DominatingGuard. If \p InvertCond is set, then we widen the + /// inverted condition of the dominating guard. + WideningScore computeWideningScore(Instruction *DominatedInstr, + Instruction *DominatingGuard, + bool InvertCond); + + /// Helper to check if \p V can be hoisted to \p InsertPos. + bool isAvailableAt(const Value *V, const Instruction *InsertPos) const { + SmallPtrSet<const Instruction *, 8> Visited; + return isAvailableAt(V, InsertPos, Visited); + } + + bool isAvailableAt(const Value *V, const Instruction *InsertPos, + SmallPtrSetImpl<const Instruction *> &Visited) const; + + /// Helper to hoist \p V to \p InsertPos. Guaranteed to succeed if \c + /// isAvailableAt returned true. + void makeAvailableAt(Value *V, Instruction *InsertPos) const; + + /// Common helper used by \c widenGuard and \c isWideningCondProfitable. Try + /// to generate an expression computing the logical AND of \p Cond0 and (\p + /// Cond1 XOR \p InvertCondition). + /// Return true if the expression computing the AND is only as + /// expensive as computing one of the two. If \p InsertPt is true then + /// actually generate the resulting expression, make it available at \p + /// InsertPt and return it in \p Result (else no change to the IR is made). + bool widenCondCommon(Value *Cond0, Value *Cond1, Instruction *InsertPt, + Value *&Result, bool InvertCondition); + + /// Represents a range check of the form \c Base + \c Offset u< \c Length, + /// with the constraint that \c Length is not negative. \c CheckInst is the + /// pre-existing instruction in the IR that computes the result of this range + /// check. + class RangeCheck { + const Value *Base; + const ConstantInt *Offset; + const Value *Length; + ICmpInst *CheckInst; + + public: + explicit RangeCheck(const Value *Base, const ConstantInt *Offset, + const Value *Length, ICmpInst *CheckInst) + : Base(Base), Offset(Offset), Length(Length), CheckInst(CheckInst) {} + + void setBase(const Value *NewBase) { Base = NewBase; } + void setOffset(const ConstantInt *NewOffset) { Offset = NewOffset; } + + const Value *getBase() const { return Base; } + const ConstantInt *getOffset() const { return Offset; } + const APInt &getOffsetValue() const { return getOffset()->getValue(); } + const Value *getLength() const { return Length; }; + ICmpInst *getCheckInst() const { return CheckInst; } + + void print(raw_ostream &OS, bool PrintTypes = false) { + OS << "Base: "; + Base->printAsOperand(OS, PrintTypes); + OS << " Offset: "; + Offset->printAsOperand(OS, PrintTypes); + OS << " Length: "; + Length->printAsOperand(OS, PrintTypes); + } + + LLVM_DUMP_METHOD void dump() { + print(dbgs()); + dbgs() << "\n"; + } + }; + + /// Parse \p CheckCond into a conjunction (logical-and) of range checks; and + /// append them to \p Checks. Returns true on success, may clobber \c Checks + /// on failure. + bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks) { + SmallPtrSet<const Value *, 8> Visited; + return parseRangeChecks(CheckCond, Checks, Visited); + } + + bool parseRangeChecks(Value *CheckCond, SmallVectorImpl<RangeCheck> &Checks, + SmallPtrSetImpl<const Value *> &Visited); + + /// Combine the checks in \p Checks into a smaller set of checks and append + /// them into \p CombinedChecks. Return true on success (i.e. all of checks + /// in \p Checks were combined into \p CombinedChecks). Clobbers \p Checks + /// and \p CombinedChecks on success and on failure. + bool combineRangeChecks(SmallVectorImpl<RangeCheck> &Checks, + SmallVectorImpl<RangeCheck> &CombinedChecks) const; + + /// Can we compute the logical AND of \p Cond0 and \p Cond1 for the price of + /// computing only one of the two expressions? + bool isWideningCondProfitable(Value *Cond0, Value *Cond1, bool InvertCond) { + Value *ResultUnused; + return widenCondCommon(Cond0, Cond1, /*InsertPt=*/nullptr, ResultUnused, + InvertCond); + } + + /// If \p InvertCondition is false, Widen \p ToWiden to fail if + /// \p NewCondition is false, otherwise make it fail if \p NewCondition is + /// true (in addition to whatever it is already checking). + void widenGuard(Instruction *ToWiden, Value *NewCondition, + bool InvertCondition) { + Value *Result; + widenCondCommon(getCondition(ToWiden), NewCondition, ToWiden, Result, + InvertCondition); + Value *WidenableCondition = nullptr; + if (isGuardAsWidenableBranch(ToWiden)) { + auto *Cond = cast<BranchInst>(ToWiden)->getCondition(); + WidenableCondition = cast<BinaryOperator>(Cond)->getOperand(1); + } + if (WidenableCondition) + Result = BinaryOperator::CreateAnd(Result, WidenableCondition, + "guard.chk", ToWiden); + setCondition(ToWiden, Result); + } + +public: + + explicit GuardWideningImpl(DominatorTree &DT, PostDominatorTree *PDT, + LoopInfo &LI, BranchProbabilityInfo *BPI, + DomTreeNode *Root, + std::function<bool(BasicBlock*)> BlockFilter) + : DT(DT), PDT(PDT), LI(LI), BPI(BPI), Root(Root), BlockFilter(BlockFilter) + {} + + /// The entry point for this pass. + bool run(); +}; +} + +static bool isSupportedGuardInstruction(const Instruction *Insn) { + if (isGuard(Insn)) + return true; + if (WidenBranchGuards && isGuardAsWidenableBranch(Insn)) + return true; + return false; +} + +bool GuardWideningImpl::run() { + DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> GuardsInBlock; + bool Changed = false; + Optional<BranchProbability> LikelyTaken = None; + if (WidenFrequentBranches && BPI) { + unsigned Threshold = FrequentBranchThreshold; + assert(Threshold > 0 && "Zero threshold makes no sense!"); + LikelyTaken = BranchProbability(Threshold - 1, Threshold); + } + + for (auto DFI = df_begin(Root), DFE = df_end(Root); + DFI != DFE; ++DFI) { + auto *BB = (*DFI)->getBlock(); + if (!BlockFilter(BB)) + continue; + + auto &CurrentList = GuardsInBlock[BB]; + + for (auto &I : *BB) + if (isSupportedGuardInstruction(&I)) + CurrentList.push_back(cast<Instruction>(&I)); + + for (auto *II : CurrentList) + Changed |= eliminateInstrViaWidening(II, DFI, GuardsInBlock); + if (WidenFrequentBranches && BPI) + if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator())) + if (BI->isConditional()) { + // If one of branches of a conditional is likely taken, try to + // eliminate it. + if (BPI->getEdgeProbability(BB, 0U) >= *LikelyTaken) + Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock); + else if (BPI->getEdgeProbability(BB, 1U) >= *LikelyTaken) + Changed |= eliminateInstrViaWidening(BI, DFI, GuardsInBlock, + /*InvertCondition*/true); + } + } + + assert(EliminatedGuardsAndBranches.empty() || Changed); + for (auto *I : EliminatedGuardsAndBranches) + if (!WidenedGuards.count(I)) { + assert(isa<ConstantInt>(getCondition(I)) && "Should be!"); + if (isSupportedGuardInstruction(I)) + eliminateGuard(I); + else { + assert(isa<BranchInst>(I) && + "Eliminated something other than guard or branch?"); + ++CondBranchEliminated; + } + } + + return Changed; +} + +bool GuardWideningImpl::eliminateInstrViaWidening( + Instruction *Instr, const df_iterator<DomTreeNode *> &DFSI, + const DenseMap<BasicBlock *, SmallVector<Instruction *, 8>> & + GuardsInBlock, bool InvertCondition) { + // Ignore trivial true or false conditions. These instructions will be + // trivially eliminated by any cleanup pass. Do not erase them because other + // guards can possibly be widened into them. + if (isa<ConstantInt>(getCondition(Instr))) + return false; + + Instruction *BestSoFar = nullptr; + auto BestScoreSoFar = WS_IllegalOrNegative; + + // In the set of dominating guards, find the one we can merge GuardInst with + // for the most profit. + for (unsigned i = 0, e = DFSI.getPathLength(); i != e; ++i) { + auto *CurBB = DFSI.getPath(i)->getBlock(); + if (!BlockFilter(CurBB)) + break; + assert(GuardsInBlock.count(CurBB) && "Must have been populated by now!"); + const auto &GuardsInCurBB = GuardsInBlock.find(CurBB)->second; + + auto I = GuardsInCurBB.begin(); + auto E = Instr->getParent() == CurBB + ? std::find(GuardsInCurBB.begin(), GuardsInCurBB.end(), Instr) + : GuardsInCurBB.end(); + +#ifndef NDEBUG + { + unsigned Index = 0; + for (auto &I : *CurBB) { + if (Index == GuardsInCurBB.size()) + break; + if (GuardsInCurBB[Index] == &I) + Index++; + } + assert(Index == GuardsInCurBB.size() && + "Guards expected to be in order!"); + } +#endif + + assert((i == (e - 1)) == (Instr->getParent() == CurBB) && "Bad DFS?"); + + for (auto *Candidate : make_range(I, E)) { + auto Score = computeWideningScore(Instr, Candidate, InvertCondition); + LLVM_DEBUG(dbgs() << "Score between " << *getCondition(Instr) + << " and " << *getCondition(Candidate) << " is " + << scoreTypeToString(Score) << "\n"); + if (Score > BestScoreSoFar) { + BestScoreSoFar = Score; + BestSoFar = Candidate; + } + } + } + + if (BestScoreSoFar == WS_IllegalOrNegative) { + LLVM_DEBUG(dbgs() << "Did not eliminate guard " << *Instr << "\n"); + return false; + } + + assert(BestSoFar != Instr && "Should have never visited same guard!"); + assert(DT.dominates(BestSoFar, Instr) && "Should be!"); + + LLVM_DEBUG(dbgs() << "Widening " << *Instr << " into " << *BestSoFar + << " with score " << scoreTypeToString(BestScoreSoFar) + << "\n"); + widenGuard(BestSoFar, getCondition(Instr), InvertCondition); + auto NewGuardCondition = InvertCondition + ? ConstantInt::getFalse(Instr->getContext()) + : ConstantInt::getTrue(Instr->getContext()); + setCondition(Instr, NewGuardCondition); + EliminatedGuardsAndBranches.push_back(Instr); + WidenedGuards.insert(BestSoFar); + return true; +} + +GuardWideningImpl::WideningScore +GuardWideningImpl::computeWideningScore(Instruction *DominatedInstr, + Instruction *DominatingGuard, + bool InvertCond) { + Loop *DominatedInstrLoop = LI.getLoopFor(DominatedInstr->getParent()); + Loop *DominatingGuardLoop = LI.getLoopFor(DominatingGuard->getParent()); + bool HoistingOutOfLoop = false; + + if (DominatingGuardLoop != DominatedInstrLoop) { + // Be conservative and don't widen into a sibling loop. TODO: If the + // sibling is colder, we should consider allowing this. + if (DominatingGuardLoop && + !DominatingGuardLoop->contains(DominatedInstrLoop)) + return WS_IllegalOrNegative; + + HoistingOutOfLoop = true; + } + + if (!isAvailableAt(getCondition(DominatedInstr), DominatingGuard)) + return WS_IllegalOrNegative; + + // If the guard was conditional executed, it may never be reached + // dynamically. There are two potential downsides to hoisting it out of the + // conditionally executed region: 1) we may spuriously deopt without need and + // 2) we have the extra cost of computing the guard condition in the common + // case. At the moment, we really only consider the second in our heuristic + // here. TODO: evaluate cost model for spurious deopt + // NOTE: As written, this also lets us hoist right over another guard which + // is essentially just another spelling for control flow. + if (isWideningCondProfitable(getCondition(DominatedInstr), + getCondition(DominatingGuard), InvertCond)) + return HoistingOutOfLoop ? WS_VeryPositive : WS_Positive; + + if (HoistingOutOfLoop) + return WS_Positive; + + // Returns true if we might be hoisting above explicit control flow. Note + // that this completely ignores implicit control flow (guards, calls which + // throw, etc...). That choice appears arbitrary. + auto MaybeHoistingOutOfIf = [&]() { + auto *DominatingBlock = DominatingGuard->getParent(); + auto *DominatedBlock = DominatedInstr->getParent(); + if (isGuardAsWidenableBranch(DominatingGuard)) + DominatingBlock = cast<BranchInst>(DominatingGuard)->getSuccessor(0); + + // Same Block? + if (DominatedBlock == DominatingBlock) + return false; + // Obvious successor (common loop header/preheader case) + if (DominatedBlock == DominatingBlock->getUniqueSuccessor()) + return false; + // TODO: diamond, triangle cases + if (!PDT) return true; + return !PDT->dominates(DominatedBlock, DominatingBlock); + }; + + return MaybeHoistingOutOfIf() ? WS_IllegalOrNegative : WS_Neutral; +} + +bool GuardWideningImpl::isAvailableAt( + const Value *V, const Instruction *Loc, + SmallPtrSetImpl<const Instruction *> &Visited) const { + auto *Inst = dyn_cast<Instruction>(V); + if (!Inst || DT.dominates(Inst, Loc) || Visited.count(Inst)) + return true; + + if (!isSafeToSpeculativelyExecute(Inst, Loc, &DT) || + Inst->mayReadFromMemory()) + return false; + + Visited.insert(Inst); + + // We only want to go _up_ the dominance chain when recursing. + assert(!isa<PHINode>(Loc) && + "PHIs should return false for isSafeToSpeculativelyExecute"); + assert(DT.isReachableFromEntry(Inst->getParent()) && + "We did a DFS from the block entry!"); + return all_of(Inst->operands(), + [&](Value *Op) { return isAvailableAt(Op, Loc, Visited); }); +} + +void GuardWideningImpl::makeAvailableAt(Value *V, Instruction *Loc) const { + auto *Inst = dyn_cast<Instruction>(V); + if (!Inst || DT.dominates(Inst, Loc)) + return; + + assert(isSafeToSpeculativelyExecute(Inst, Loc, &DT) && + !Inst->mayReadFromMemory() && "Should've checked with isAvailableAt!"); + + for (Value *Op : Inst->operands()) + makeAvailableAt(Op, Loc); + + Inst->moveBefore(Loc); +} + +bool GuardWideningImpl::widenCondCommon(Value *Cond0, Value *Cond1, + Instruction *InsertPt, Value *&Result, + bool InvertCondition) { + using namespace llvm::PatternMatch; + + { + // L >u C0 && L >u C1 -> L >u max(C0, C1) + ConstantInt *RHS0, *RHS1; + Value *LHS; + ICmpInst::Predicate Pred0, Pred1; + if (match(Cond0, m_ICmp(Pred0, m_Value(LHS), m_ConstantInt(RHS0))) && + match(Cond1, m_ICmp(Pred1, m_Specific(LHS), m_ConstantInt(RHS1)))) { + if (InvertCondition) + Pred1 = ICmpInst::getInversePredicate(Pred1); + + ConstantRange CR0 = + ConstantRange::makeExactICmpRegion(Pred0, RHS0->getValue()); + ConstantRange CR1 = + ConstantRange::makeExactICmpRegion(Pred1, RHS1->getValue()); + + // SubsetIntersect is a subset of the actual mathematical intersection of + // CR0 and CR1, while SupersetIntersect is a superset of the actual + // mathematical intersection. If these two ConstantRanges are equal, then + // we know we were able to represent the actual mathematical intersection + // of CR0 and CR1, and can use the same to generate an icmp instruction. + // + // Given what we're doing here and the semantics of guards, it would + // actually be correct to just use SubsetIntersect, but that may be too + // aggressive in cases we care about. + auto SubsetIntersect = CR0.inverse().unionWith(CR1.inverse()).inverse(); + auto SupersetIntersect = CR0.intersectWith(CR1); + + APInt NewRHSAP; + CmpInst::Predicate Pred; + if (SubsetIntersect == SupersetIntersect && + SubsetIntersect.getEquivalentICmp(Pred, NewRHSAP)) { + if (InsertPt) { + ConstantInt *NewRHS = ConstantInt::get(Cond0->getContext(), NewRHSAP); + Result = new ICmpInst(InsertPt, Pred, LHS, NewRHS, "wide.chk"); + } + return true; + } + } + } + + { + SmallVector<GuardWideningImpl::RangeCheck, 4> Checks, CombinedChecks; + // TODO: Support InvertCondition case? + if (!InvertCondition && + parseRangeChecks(Cond0, Checks) && parseRangeChecks(Cond1, Checks) && + combineRangeChecks(Checks, CombinedChecks)) { + if (InsertPt) { + Result = nullptr; + for (auto &RC : CombinedChecks) { + makeAvailableAt(RC.getCheckInst(), InsertPt); + if (Result) + Result = BinaryOperator::CreateAnd(RC.getCheckInst(), Result, "", + InsertPt); + else + Result = RC.getCheckInst(); + } + assert(Result && "Failed to find result value"); + Result->setName("wide.chk"); + } + return true; + } + } + + // Base case -- just logical-and the two conditions together. + + if (InsertPt) { + makeAvailableAt(Cond0, InsertPt); + makeAvailableAt(Cond1, InsertPt); + if (InvertCondition) + Cond1 = BinaryOperator::CreateNot(Cond1, "inverted", InsertPt); + Result = BinaryOperator::CreateAnd(Cond0, Cond1, "wide.chk", InsertPt); + } + + // We were not able to compute Cond0 AND Cond1 for the price of one. + return false; +} + +bool GuardWideningImpl::parseRangeChecks( + Value *CheckCond, SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, + SmallPtrSetImpl<const Value *> &Visited) { + if (!Visited.insert(CheckCond).second) + return true; + + using namespace llvm::PatternMatch; + + { + Value *AndLHS, *AndRHS; + if (match(CheckCond, m_And(m_Value(AndLHS), m_Value(AndRHS)))) + return parseRangeChecks(AndLHS, Checks) && + parseRangeChecks(AndRHS, Checks); + } + + auto *IC = dyn_cast<ICmpInst>(CheckCond); + if (!IC || !IC->getOperand(0)->getType()->isIntegerTy() || + (IC->getPredicate() != ICmpInst::ICMP_ULT && + IC->getPredicate() != ICmpInst::ICMP_UGT)) + return false; + + const Value *CmpLHS = IC->getOperand(0), *CmpRHS = IC->getOperand(1); + if (IC->getPredicate() == ICmpInst::ICMP_UGT) + std::swap(CmpLHS, CmpRHS); + + auto &DL = IC->getModule()->getDataLayout(); + + GuardWideningImpl::RangeCheck Check( + CmpLHS, cast<ConstantInt>(ConstantInt::getNullValue(CmpRHS->getType())), + CmpRHS, IC); + + if (!isKnownNonNegative(Check.getLength(), DL)) + return false; + + // What we have in \c Check now is a correct interpretation of \p CheckCond. + // Try to see if we can move some constant offsets into the \c Offset field. + + bool Changed; + auto &Ctx = CheckCond->getContext(); + + do { + Value *OpLHS; + ConstantInt *OpRHS; + Changed = false; + +#ifndef NDEBUG + auto *BaseInst = dyn_cast<Instruction>(Check.getBase()); + assert((!BaseInst || DT.isReachableFromEntry(BaseInst->getParent())) && + "Unreachable instruction?"); +#endif + + if (match(Check.getBase(), m_Add(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { + Check.setBase(OpLHS); + APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); + Check.setOffset(ConstantInt::get(Ctx, NewOffset)); + Changed = true; + } else if (match(Check.getBase(), + m_Or(m_Value(OpLHS), m_ConstantInt(OpRHS)))) { + KnownBits Known = computeKnownBits(OpLHS, DL); + if ((OpRHS->getValue() & Known.Zero) == OpRHS->getValue()) { + Check.setBase(OpLHS); + APInt NewOffset = Check.getOffsetValue() + OpRHS->getValue(); + Check.setOffset(ConstantInt::get(Ctx, NewOffset)); + Changed = true; + } + } + } while (Changed); + + Checks.push_back(Check); + return true; +} + +bool GuardWideningImpl::combineRangeChecks( + SmallVectorImpl<GuardWideningImpl::RangeCheck> &Checks, + SmallVectorImpl<GuardWideningImpl::RangeCheck> &RangeChecksOut) const { + unsigned OldCount = Checks.size(); + while (!Checks.empty()) { + // Pick all of the range checks with a specific base and length, and try to + // merge them. + const Value *CurrentBase = Checks.front().getBase(); + const Value *CurrentLength = Checks.front().getLength(); + + SmallVector<GuardWideningImpl::RangeCheck, 3> CurrentChecks; + + auto IsCurrentCheck = [&](GuardWideningImpl::RangeCheck &RC) { + return RC.getBase() == CurrentBase && RC.getLength() == CurrentLength; + }; + + copy_if(Checks, std::back_inserter(CurrentChecks), IsCurrentCheck); + Checks.erase(remove_if(Checks, IsCurrentCheck), Checks.end()); + + assert(CurrentChecks.size() != 0 && "We know we have at least one!"); + + if (CurrentChecks.size() < 3) { + RangeChecksOut.insert(RangeChecksOut.end(), CurrentChecks.begin(), + CurrentChecks.end()); + continue; + } + + // CurrentChecks.size() will typically be 3 here, but so far there has been + // no need to hard-code that fact. + + llvm::sort(CurrentChecks, [&](const GuardWideningImpl::RangeCheck &LHS, + const GuardWideningImpl::RangeCheck &RHS) { + return LHS.getOffsetValue().slt(RHS.getOffsetValue()); + }); + + // Note: std::sort should not invalidate the ChecksStart iterator. + + const ConstantInt *MinOffset = CurrentChecks.front().getOffset(); + const ConstantInt *MaxOffset = CurrentChecks.back().getOffset(); + + unsigned BitWidth = MaxOffset->getValue().getBitWidth(); + if ((MaxOffset->getValue() - MinOffset->getValue()) + .ugt(APInt::getSignedMinValue(BitWidth))) + return false; + + APInt MaxDiff = MaxOffset->getValue() - MinOffset->getValue(); + const APInt &HighOffset = MaxOffset->getValue(); + auto OffsetOK = [&](const GuardWideningImpl::RangeCheck &RC) { + return (HighOffset - RC.getOffsetValue()).ult(MaxDiff); + }; + + if (MaxDiff.isMinValue() || + !std::all_of(std::next(CurrentChecks.begin()), CurrentChecks.end(), + OffsetOK)) + return false; + + // We have a series of f+1 checks as: + // + // I+k_0 u< L ... Chk_0 + // I+k_1 u< L ... Chk_1 + // ... + // I+k_f u< L ... Chk_f + // + // with forall i in [0,f]: k_f-k_i u< k_f-k_0 ... Precond_0 + // k_f-k_0 u< INT_MIN+k_f ... Precond_1 + // k_f != k_0 ... Precond_2 + // + // Claim: + // Chk_0 AND Chk_f implies all the other checks + // + // Informal proof sketch: + // + // We will show that the integer range [I+k_0,I+k_f] does not unsigned-wrap + // (i.e. going from I+k_0 to I+k_f does not cross the -1,0 boundary) and + // thus I+k_f is the greatest unsigned value in that range. + // + // This combined with Ckh_(f+1) shows that everything in that range is u< L. + // Via Precond_0 we know that all of the indices in Chk_0 through Chk_(f+1) + // lie in [I+k_0,I+k_f], this proving our claim. + // + // To see that [I+k_0,I+k_f] is not a wrapping range, note that there are + // two possibilities: I+k_0 u< I+k_f or I+k_0 >u I+k_f (they can't be equal + // since k_0 != k_f). In the former case, [I+k_0,I+k_f] is not a wrapping + // range by definition, and the latter case is impossible: + // + // 0-----I+k_f---I+k_0----L---INT_MAX,INT_MIN------------------(-1) + // xxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + // + // For Chk_0 to succeed, we'd have to have k_f-k_0 (the range highlighted + // with 'x' above) to be at least >u INT_MIN. + + RangeChecksOut.emplace_back(CurrentChecks.front()); + RangeChecksOut.emplace_back(CurrentChecks.back()); + } + + assert(RangeChecksOut.size() <= OldCount && "We pessimized!"); + return RangeChecksOut.size() != OldCount; +} + +#ifndef NDEBUG +StringRef GuardWideningImpl::scoreTypeToString(WideningScore WS) { + switch (WS) { + case WS_IllegalOrNegative: + return "IllegalOrNegative"; + case WS_Neutral: + return "Neutral"; + case WS_Positive: + return "Positive"; + case WS_VeryPositive: + return "VeryPositive"; + } + + llvm_unreachable("Fully covered switch above!"); +} +#endif + +PreservedAnalyses GuardWideningPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &LI = AM.getResult<LoopAnalysis>(F); + auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F); + BranchProbabilityInfo *BPI = nullptr; + if (WidenFrequentBranches) + BPI = AM.getCachedResult<BranchProbabilityAnalysis>(F); + if (!GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(), + [](BasicBlock*) { return true; } ).run()) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + return PA; +} + +PreservedAnalyses GuardWideningPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + + const auto &FAM = + AM.getResult<FunctionAnalysisManagerLoopProxy>(L, AR).getManager(); + Function &F = *L.getHeader()->getParent(); + BranchProbabilityInfo *BPI = nullptr; + if (WidenFrequentBranches) + BPI = FAM.getCachedResult<BranchProbabilityAnalysis>(F); + + BasicBlock *RootBB = L.getLoopPredecessor(); + if (!RootBB) + RootBB = L.getHeader(); + auto BlockFilter = [&](BasicBlock *BB) { + return BB == RootBB || L.contains(BB); + }; + if (!GuardWideningImpl(AR.DT, nullptr, AR.LI, BPI, + AR.DT.getNode(RootBB), + BlockFilter).run()) + return PreservedAnalyses::all(); + + return getLoopPassPreservedAnalyses(); +} + +namespace { +struct GuardWideningLegacyPass : public FunctionPass { + static char ID; + + GuardWideningLegacyPass() : FunctionPass(ID) { + initializeGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); + BranchProbabilityInfo *BPI = nullptr; + if (WidenFrequentBranches) + BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + return GuardWideningImpl(DT, &PDT, LI, BPI, DT.getRootNode(), + [](BasicBlock*) { return true; } ).run(); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<PostDominatorTreeWrapperPass>(); + AU.addRequired<LoopInfoWrapperPass>(); + if (WidenFrequentBranches) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); + } +}; + +/// Same as above, but restricted to a single loop at a time. Can be +/// scheduled with other loop passes w/o breaking out of LPM +struct LoopGuardWideningLegacyPass : public LoopPass { + static char ID; + + LoopGuardWideningLegacyPass() : LoopPass(ID) { + initializeLoopGuardWideningLegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnLoop(Loop *L, LPPassManager &LPM) override { + if (skipLoop(L)) + return false; + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + auto *PDTWP = getAnalysisIfAvailable<PostDominatorTreeWrapperPass>(); + auto *PDT = PDTWP ? &PDTWP->getPostDomTree() : nullptr; + BasicBlock *RootBB = L->getLoopPredecessor(); + if (!RootBB) + RootBB = L->getHeader(); + auto BlockFilter = [&](BasicBlock *BB) { + return BB == RootBB || L->contains(BB); + }; + BranchProbabilityInfo *BPI = nullptr; + if (WidenFrequentBranches) + BPI = &getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); + return GuardWideningImpl(DT, PDT, LI, BPI, + DT.getNode(RootBB), BlockFilter).run(); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + if (WidenFrequentBranches) + AU.addRequired<BranchProbabilityInfoWrapperPass>(); + AU.setPreservesCFG(); + getLoopAnalysisUsage(AU); + AU.addPreserved<PostDominatorTreeWrapperPass>(); + } +}; +} + +char GuardWideningLegacyPass::ID = 0; +char LoopGuardWideningLegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(GuardWideningLegacyPass, "guard-widening", "Widen guards", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +if (WidenFrequentBranches) + INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(GuardWideningLegacyPass, "guard-widening", "Widen guards", + false, false) + +INITIALIZE_PASS_BEGIN(LoopGuardWideningLegacyPass, "loop-guard-widening", + "Widen guards (within a single loop, as a loop pass)", + false, false) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +if (WidenFrequentBranches) + INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(LoopGuardWideningLegacyPass, "loop-guard-widening", + "Widen guards (within a single loop, as a loop pass)", + false, false) + +FunctionPass *llvm::createGuardWideningPass() { + return new GuardWideningLegacyPass(); +} + +Pass *llvm::createLoopGuardWideningPass() { + return new LoopGuardWideningLegacyPass(); +} |
