aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp591
1 files changed, 454 insertions, 137 deletions
diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
index b2bc75c19709..77ce3d2fb563 100644
--- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -46,6 +46,7 @@
#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -377,6 +378,7 @@ class TypePromotionTransaction;
}
void removeAllAssertingVHReferences(Value *V);
+ bool eliminateAssumptions(Function &F);
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -404,6 +406,7 @@ class TypePromotionTransaction;
bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
bool fixupDbgValue(Instruction *I);
bool placeDbgValues(Function &F);
+ bool placePseudoProbes(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
bool tryToPromoteExts(TypePromotionTransaction &TPT,
@@ -506,6 +509,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
}
}
+ // Get rid of @llvm.assume builtins before attempting to eliminate empty
+ // blocks, since there might be blocks that only contain @llvm.assume calls
+ // (plus arguments that we can get rid of).
+ EverMadeChange |= eliminateAssumptions(F);
+
// Eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
@@ -566,10 +574,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
MadeChange |= ConstantFoldTerminator(&BB, true);
if (!MadeChange) continue;
- for (SmallVectorImpl<BasicBlock*>::iterator
- II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_empty(*II))
- WorkList.insert(*II);
+ for (BasicBlock *Succ : Successors)
+ if (pred_empty(Succ))
+ WorkList.insert(Succ);
}
// Delete the dead blocks and any of their dead successors.
@@ -580,10 +587,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
DeleteDeadBlock(BB);
- for (SmallVectorImpl<BasicBlock*>::iterator
- II = Successors.begin(), IE = Successors.end(); II != IE; ++II)
- if (pred_empty(*II))
- WorkList.insert(*II);
+ for (BasicBlock *Succ : Successors)
+ if (pred_empty(Succ))
+ WorkList.insert(Succ);
}
// Merge pairs of basic blocks with unconditional branches, connected by
@@ -607,6 +613,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// Do this last to clean up use-before-def scenarios introduced by other
// preparatory transforms.
EverMadeChange |= placeDbgValues(F);
+ EverMadeChange |= placePseudoProbes(F);
#ifndef NDEBUG
if (VerifyBFIUpdates)
@@ -616,6 +623,26 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
return EverMadeChange;
}
+bool CodeGenPrepare::eliminateAssumptions(Function &F) {
+ bool MadeChange = false;
+ for (BasicBlock &BB : F) {
+ CurInstIterator = BB.begin();
+ while (CurInstIterator != BB.end()) {
+ Instruction *I = &*(CurInstIterator++);
+ if (auto *Assume = dyn_cast<AssumeInst>(I)) {
+ MadeChange = true;
+ Value *Operand = Assume->getOperand(0);
+ Assume->eraseFromParent();
+
+ resetIteratorIfInvalidatedWhileCalling(&BB, [&]() {
+ RecursivelyDeleteTriviallyDeadInstructions(Operand, TLInfo, nullptr);
+ });
+ }
+ }
+ }
+ return MadeChange;
+}
+
/// An instruction is about to be deleted, so remove all references to it in our
/// GEP-tracking data strcutures.
void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
@@ -780,8 +807,8 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
// Skip merging if the block's successor is also a successor to any callbr
// that leads to this block.
// FIXME: Is this really needed? Is this a correctness issue?
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (auto *CBI = dyn_cast<CallBrInst>((Pred)->getTerminator()))
for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
if (DestBB == CBI->getSuccessor(i))
return false;
@@ -822,9 +849,7 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
// Find all other incoming blocks from which incoming values of all PHIs in
// DestBB are the same as the ones from BB.
- for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
- ++PI) {
- BasicBlock *DestBBPred = *PI;
+ for (BasicBlock *DestBBPred : predecessors(DestBB)) {
if (DestBBPred == BB)
continue;
@@ -964,8 +989,8 @@ void CodeGenPrepare::eliminateMostlyEmptyBlock(BasicBlock *BB) {
for (unsigned i = 0, e = BBPN->getNumIncomingValues(); i != e; ++i)
PN.addIncoming(InVal, BBPN->getIncomingBlock(i));
} else {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- PN.addIncoming(InVal, *PI);
+ for (BasicBlock *Pred : predecessors(BB))
+ PN.addIncoming(InVal, Pred);
}
}
}
@@ -1280,11 +1305,83 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
return SinkCast(CI);
}
+// Match a simple increment by constant operation. Note that if a sub is
+// matched, the step is negated (as if the step had been canonicalized to
+// an add, even though we leave the instruction alone.)
+bool matchIncrement(const Instruction* IVInc, Instruction *&LHS,
+ Constant *&Step) {
+ if (match(IVInc, m_Add(m_Instruction(LHS), m_Constant(Step))) ||
+ match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::uadd_with_overflow>(
+ m_Instruction(LHS), m_Constant(Step)))))
+ return true;
+ if (match(IVInc, m_Sub(m_Instruction(LHS), m_Constant(Step))) ||
+ match(IVInc, m_ExtractValue<0>(m_Intrinsic<Intrinsic::usub_with_overflow>(
+ m_Instruction(LHS), m_Constant(Step))))) {
+ Step = ConstantExpr::getNeg(Step);
+ return true;
+ }
+ return false;
+}
+
+/// If given \p PN is an inductive variable with value IVInc coming from the
+/// backedge, and on each iteration it gets increased by Step, return pair
+/// <IVInc, Step>. Otherwise, return None.
+static Optional<std::pair<Instruction *, Constant *> >
+getIVIncrement(const PHINode *PN, const LoopInfo *LI) {
+ const Loop *L = LI->getLoopFor(PN->getParent());
+ if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
+ return None;
+ auto *IVInc =
+ dyn_cast<Instruction>(PN->getIncomingValueForBlock(L->getLoopLatch()));
+ if (!IVInc || LI->getLoopFor(IVInc->getParent()) != L)
+ return None;
+ Instruction *LHS = nullptr;
+ Constant *Step = nullptr;
+ if (matchIncrement(IVInc, LHS, Step) && LHS == PN)
+ return std::make_pair(IVInc, Step);
+ return None;
+}
+
+static bool isIVIncrement(const Value *V, const LoopInfo *LI) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I)
+ return false;
+ Instruction *LHS = nullptr;
+ Constant *Step = nullptr;
+ if (!matchIncrement(I, LHS, Step))
+ return false;
+ if (auto *PN = dyn_cast<PHINode>(LHS))
+ if (auto IVInc = getIVIncrement(PN, LI))
+ return IVInc->first == I;
+ return false;
+}
+
bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
Value *Arg0, Value *Arg1,
CmpInst *Cmp,
Intrinsic::ID IID) {
- if (BO->getParent() != Cmp->getParent()) {
+ auto IsReplacableIVIncrement = [this, &Cmp](BinaryOperator *BO) {
+ if (!isIVIncrement(BO, LI))
+ return false;
+ const Loop *L = LI->getLoopFor(BO->getParent());
+ assert(L && "L should not be null after isIVIncrement()");
+ // Do not risk on moving increment into a child loop.
+ if (LI->getLoopFor(Cmp->getParent()) != L)
+ return false;
+
+ // Finally, we need to ensure that the insert point will dominate all
+ // existing uses of the increment.
+
+ auto &DT = getDT(*BO->getParent()->getParent());
+ if (DT.dominates(Cmp->getParent(), BO->getParent()))
+ // If we're moving up the dom tree, all uses are trivially dominated.
+ // (This is the common case for code produced by LSR.)
+ return true;
+
+ // Otherwise, special case the single use in the phi recurrence.
+ return BO->hasOneUse() && DT.dominates(Cmp->getParent(), L->getLoopLatch());
+ };
+ if (BO->getParent() != Cmp->getParent() && !IsReplacableIVIncrement(BO)) {
// We used to use a dominator tree here to allow multi-block optimization.
// But that was problematic because:
// 1. It could cause a perf regression by hoisting the math op into the
@@ -1295,6 +1392,14 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
// This is because we recompute the DT on every change in the main CGP
// run-loop. The recomputing is probably unnecessary in many cases, so if
// that was fixed, using a DT here would be ok.
+ //
+ // There is one important particular case we still want to handle: if BO is
+ // the IV increment. Important properties that make it profitable:
+ // - We can speculate IV increment anywhere in the loop (as long as the
+ // indvar Phi is its only user);
+ // - Upon computing Cmp, we effectively compute something equivalent to the
+ // IV increment (despite it loops differently in the IR). So moving it up
+ // to the cmp point does not really increase register pressure.
return false;
}
@@ -1936,6 +2041,10 @@ static bool despeculateCountZeros(IntrinsicInst *CountZeros,
if (Ty->isVectorTy() || SizeInBits > DL->getLargestLegalIntTypeSizeInBits())
return false;
+ // Bail if the value is never zero.
+ if (llvm::isKnownNonZero(CountZeros->getOperand(0), *DL))
+ return false;
+
// The intrinsic will be sunk behind a compare against zero and branch.
BasicBlock *StartBlock = CountZeros->getParent();
BasicBlock *CallBlock = StartBlock->splitBasicBlock(CountZeros, "cond.false");
@@ -2061,18 +2170,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
if (II) {
switch (II->getIntrinsicID()) {
default: break;
- case Intrinsic::assume: {
- Value *Operand = II->getOperand(0);
- II->eraseFromParent();
- // Prune the operand, it's most likely dead.
- resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
- RecursivelyDeleteTriviallyDeadInstructions(
- Operand, TLInfo, nullptr,
- [&](Value *V) { removeAllAssertingVHReferences(V); });
- });
- return true;
- }
-
+ case Intrinsic::assume:
+ llvm_unreachable("llvm.assume should have been removed already");
case Intrinsic::experimental_widenable_condition: {
// Give up on future widening oppurtunties so that we can fold away dead
// paths and merge blocks before going into block-local instruction
@@ -2242,21 +2341,25 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
if (PN && PN->getParent() != BB)
return false;
- // Make sure there are no instructions between the PHI and return, or that the
- // return is the first instruction in the block.
- if (PN) {
- BasicBlock::iterator BI = BB->begin();
- // Skip over debug and the bitcast.
- do {
- ++BI;
- } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI || &*BI == EVI ||
- isa<PseudoProbeInst>(BI));
- if (&*BI != RetI)
- return false;
- } else {
- if (BB->getFirstNonPHIOrDbg(true) != RetI)
- return false;
- }
+ auto isLifetimeEndOrBitCastFor = [](const Instruction *Inst) {
+ const BitCastInst *BC = dyn_cast<BitCastInst>(Inst);
+ if (BC && BC->hasOneUse())
+ Inst = BC->user_back();
+
+ if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+ return II->getIntrinsicID() == Intrinsic::lifetime_end;
+ return false;
+ };
+
+ // Make sure there are no instructions between the first instruction
+ // and return.
+ const Instruction *BI = BB->getFirstNonPHI();
+ // Skip over debug and the bitcast.
+ while (isa<DbgInfoIntrinsic>(BI) || BI == BCI || BI == EVI ||
+ isa<PseudoProbeInst>(BI) || isLifetimeEndOrBitCastFor(BI))
+ BI = BI->getNextNode();
+ if (BI != RetI)
+ return false;
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
/// call.
@@ -2276,14 +2379,14 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
}
} else {
SmallPtrSet<BasicBlock*, 4> VisitedBBs;
- for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
- if (!VisitedBBs.insert(*PI).second)
+ for (BasicBlock *Pred : predecessors(BB)) {
+ if (!VisitedBBs.insert(Pred).second)
continue;
- if (Instruction *I = (*PI)->rbegin()->getPrevNonDebugInstruction(true)) {
+ if (Instruction *I = Pred->rbegin()->getPrevNonDebugInstruction(true)) {
CallInst *CI = dyn_cast<CallInst>(I);
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCallBBs.push_back(*PI);
+ TailCallBBs.push_back(Pred);
}
}
}
@@ -2775,11 +2878,16 @@ class TypePromotionTransaction {
/// Keep track of the debug users.
SmallVector<DbgValueInst *, 1> DbgValues;
+ /// Keep track of the new value so that we can undo it by replacing
+ /// instances of the new value with the original value.
+ Value *New;
+
using use_iterator = SmallVectorImpl<InstructionAndIdx>::iterator;
public:
/// Replace all the use of \p Inst by \p New.
- UsesReplacer(Instruction *Inst, Value *New) : TypePromotionAction(Inst) {
+ UsesReplacer(Instruction *Inst, Value *New)
+ : TypePromotionAction(Inst), New(New) {
LLVM_DEBUG(dbgs() << "Do: UsersReplacer: " << *Inst << " with " << *New
<< "\n");
// Record the original uses.
@@ -2798,20 +2906,14 @@ class TypePromotionTransaction {
/// Reassign the original uses of Inst to Inst.
void undo() override {
LLVM_DEBUG(dbgs() << "Undo: UsersReplacer: " << *Inst << "\n");
- for (use_iterator UseIt = OriginalUses.begin(),
- EndIt = OriginalUses.end();
- UseIt != EndIt; ++UseIt) {
- UseIt->Inst->setOperand(UseIt->Idx, Inst);
- }
+ for (InstructionAndIdx &Use : OriginalUses)
+ Use.Inst->setOperand(Use.Idx, Inst);
// RAUW has replaced all original uses with references to the new value,
// including the debug uses. Since we are undoing the replacements,
// the original debug uses must also be reinstated to maintain the
// correctness and utility of debug value instructions.
- for (auto *DVI: DbgValues) {
- LLVMContext &Ctx = Inst->getType()->getContext();
- auto *MV = MetadataAsValue::get(Ctx, ValueAsMetadata::get(Inst));
- DVI->setOperand(0, MV);
- }
+ for (auto *DVI : DbgValues)
+ DVI->replaceVariableLocationOp(New, Inst);
}
};
@@ -2981,9 +3083,8 @@ TypePromotionTransaction::getRestorationPoint() const {
}
bool TypePromotionTransaction::commit() {
- for (CommitPt It = Actions.begin(), EndIt = Actions.end(); It != EndIt;
- ++It)
- (*It)->commit();
+ for (std::unique_ptr<TypePromotionAction> &Action : Actions)
+ Action->commit();
bool Modified = !Actions.empty();
Actions.clear();
return Modified;
@@ -3007,6 +3108,8 @@ class AddressingModeMatcher {
const TargetLowering &TLI;
const TargetRegisterInfo &TRI;
const DataLayout &DL;
+ const LoopInfo &LI;
+ const std::function<const DominatorTree &()> getDTFn;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
/// the memory instruction that we're computing this address for.
@@ -3042,16 +3145,18 @@ class AddressingModeMatcher {
AddressingModeMatcher(
SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,
- const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,
- ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,
- InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
+ const TargetRegisterInfo &TRI, const LoopInfo &LI,
+ const std::function<const DominatorTree &()> getDTFn,
+ Type *AT, unsigned AS, Instruction *MI, ExtAddrMode &AM,
+ const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
+ TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)
: AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
- DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
- MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
- PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP),
- OptSize(OptSize), PSI(PSI), BFI(BFI) {
+ DL(MI->getModule()->getDataLayout()), LI(LI), getDTFn(getDTFn),
+ AccessTy(AT), AddrSpace(AS), MemoryInst(MI), AddrMode(AM),
+ InsertedInsts(InsertedInsts), PromotedInsts(PromotedInsts), TPT(TPT),
+ LargeOffsetGEP(LargeOffsetGEP), OptSize(OptSize), PSI(PSI), BFI(BFI) {
IgnoreProfitability = false;
}
@@ -3066,18 +3171,18 @@ public:
static ExtAddrMode
Match(Value *V, Type *AccessTy, unsigned AS, Instruction *MemoryInst,
SmallVectorImpl<Instruction *> &AddrModeInsts,
- const TargetLowering &TLI, const TargetRegisterInfo &TRI,
- const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,
- TypePromotionTransaction &TPT,
+ const TargetLowering &TLI, const LoopInfo &LI,
+ const std::function<const DominatorTree &()> getDTFn,
+ const TargetRegisterInfo &TRI, const SetOfInstrs &InsertedInsts,
+ InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT,
std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP,
bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,
- MemoryInst, Result, InsertedInsts,
- PromotedInsts, TPT, LargeOffsetGEP,
- OptSize, PSI, BFI)
- .matchAddr(V, 0);
+ bool Success = AddressingModeMatcher(
+ AddrModeInsts, TLI, TRI, LI, getDTFn, AccessTy, AS, MemoryInst, Result,
+ InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+ BFI).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
return Result;
}
@@ -3773,11 +3878,12 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
// Okay, we decided that we can add ScaleReg+Scale to AddrMode. Check now
// to see if ScaleReg is actually X+C. If so, we can turn this into adding
- // X*Scale + C*Scale to addr mode.
+ // X*Scale + C*Scale to addr mode. If we found available IV increment, do not
+ // go any further: we can reuse it and cannot eliminate it.
ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
- if (isa<Instruction>(ScaleReg) && // not a constant expr.
+ if (isa<Instruction>(ScaleReg) && // not a constant expr.
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI))) &&
- CI->getValue().isSignedIntN(64)) {
+ !isIVIncrement(ScaleReg, &LI) && CI->getValue().isSignedIntN(64)) {
TestAddrMode.InBounds = false;
TestAddrMode.ScaledReg = AddLHS;
TestAddrMode.BaseOffs += CI->getSExtValue() * TestAddrMode.Scale;
@@ -3789,9 +3895,75 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
AddrMode = TestAddrMode;
return true;
}
+ // Restore status quo.
+ TestAddrMode = AddrMode;
+ }
+
+ // If this is an add recurrence with a constant step, return the increment
+ // instruction and the canonicalized step.
+ auto GetConstantStep = [this](const Value * V)
+ ->Optional<std::pair<Instruction *, APInt> > {
+ auto *PN = dyn_cast<PHINode>(V);
+ if (!PN)
+ return None;
+ auto IVInc = getIVIncrement(PN, &LI);
+ if (!IVInc)
+ return None;
+ // TODO: The result of the intrinsics above is two-compliment. However when
+ // IV inc is expressed as add or sub, iv.next is potentially a poison value.
+ // If it has nuw or nsw flags, we need to make sure that these flags are
+ // inferrable at the point of memory instruction. Otherwise we are replacing
+ // well-defined two-compliment computation with poison. Currently, to avoid
+ // potentially complex analysis needed to prove this, we reject such cases.
+ if (auto *OIVInc = dyn_cast<OverflowingBinaryOperator>(IVInc->first))
+ if (OIVInc->hasNoSignedWrap() || OIVInc->hasNoUnsignedWrap())
+ return None;
+ if (auto *ConstantStep = dyn_cast<ConstantInt>(IVInc->second))
+ return std::make_pair(IVInc->first, ConstantStep->getValue());
+ return None;
+ };
+
+ // Try to account for the following special case:
+ // 1. ScaleReg is an inductive variable;
+ // 2. We use it with non-zero offset;
+ // 3. IV's increment is available at the point of memory instruction.
+ //
+ // In this case, we may reuse the IV increment instead of the IV Phi to
+ // achieve the following advantages:
+ // 1. If IV step matches the offset, we will have no need in the offset;
+ // 2. Even if they don't match, we will reduce the overlap of living IV
+ // and IV increment, that will potentially lead to better register
+ // assignment.
+ if (AddrMode.BaseOffs) {
+ if (auto IVStep = GetConstantStep(ScaleReg)) {
+ Instruction *IVInc = IVStep->first;
+ // The following assert is important to ensure a lack of infinite loops.
+ // This transforms is (intentionally) the inverse of the one just above.
+ // If they don't agree on the definition of an increment, we'd alternate
+ // back and forth indefinitely.
+ assert(isIVIncrement(IVInc, &LI) && "implied by GetConstantStep");
+ APInt Step = IVStep->second;
+ APInt Offset = Step * AddrMode.Scale;
+ if (Offset.isSignedIntN(64)) {
+ TestAddrMode.InBounds = false;
+ TestAddrMode.ScaledReg = IVInc;
+ TestAddrMode.BaseOffs -= Offset.getLimitedValue();
+ // If this addressing mode is legal, commit it..
+ // (Note that we defer the (expensive) domtree base legality check
+ // to the very last possible point.)
+ if (TLI.isLegalAddressingMode(DL, TestAddrMode, AccessTy, AddrSpace) &&
+ getDTFn().dominates(IVInc, MemoryInst)) {
+ AddrModeInsts.push_back(cast<Instruction>(IVInc));
+ AddrMode = TestAddrMode;
+ return true;
+ }
+ // Restore status quo.
+ TestAddrMode = AddrMode;
+ }
+ }
}
- // Otherwise, not (x+c)*scale, just return what we have.
+ // Otherwise, just return what we have.
return true;
}
@@ -4881,9 +5053,10 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
0);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(
- MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI, LI, getDTFn,
+ AddressAccessTy, AS, MemoryInst, Result,
+ InsertedInsts, PromotedInsts, TPT,
+ LargeOffsetGEP, OptSize, PSI, BFI);
Matcher.IgnoreProfitability = true;
bool Success = Matcher.matchAddr(Address, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -4986,9 +5159,16 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
AddrModeInsts.clear();
std::pair<AssertingVH<GetElementPtrInst>, int64_t> LargeOffsetGEP(nullptr,
0);
+ // Defer the query (and possible computation of) the dom tree to point of
+ // actual use. It's expected that most address matches don't actually need
+ // the domtree.
+ auto getDTFn = [MemoryInst, this]() -> const DominatorTree & {
+ Function *F = MemoryInst->getParent()->getParent();
+ return this->getDT(*F);
+ };
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI,
- InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
+ V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *LI, getDTFn,
+ *TRI, InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI,
BFI.get());
GetElementPtrInst *GEP = LargeOffsetGEP.first;
@@ -5373,14 +5553,19 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
IRBuilder<> Builder(MemoryInst);
+ Type *SourceTy = GEP->getSourceElementType();
Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
// If the final index isn't a vector, emit a scalar GEP containing all ops
// and a vector GEP with all zeroes final index.
if (!Ops[FinalIndex]->getType()->isVectorTy()) {
- NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front());
+ NewAddr = Builder.CreateGEP(SourceTy, Ops[0],
+ makeArrayRef(Ops).drop_front());
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
- NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
+ auto *SecondTy = GetElementPtrInst::getIndexedType(
+ SourceTy, makeArrayRef(Ops).drop_front());
+ NewAddr =
+ Builder.CreateGEP(SecondTy, NewAddr, Constant::getNullValue(IndexTy));
} else {
Value *Base = Ops[0];
Value *Index = Ops[FinalIndex];
@@ -5389,11 +5574,14 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
if (Ops.size() != 2) {
// Replace the last index with 0.
Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
- Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front());
+ Base = Builder.CreateGEP(SourceTy, Base,
+ makeArrayRef(Ops).drop_front());
+ SourceTy = GetElementPtrInst::getIndexedType(
+ SourceTy, makeArrayRef(Ops).drop_front());
}
// Now create the GEP with scalar pointer and vector index.
- NewAddr = Builder.CreateGEP(Base, Index);
+ NewAddr = Builder.CreateGEP(SourceTy, Base, Index);
}
} else if (!isa<Constant>(Ptr)) {
// Not a GEP, maybe its a splat and we can create a GEP to enable
@@ -5409,7 +5597,16 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
// Emit a vector GEP with a scalar pointer and all 0s vector index.
Type *ScalarIndexTy = DL->getIndexType(V->getType()->getScalarType());
auto *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
- NewAddr = Builder.CreateGEP(V, Constant::getNullValue(IndexTy));
+ Type *ScalarTy;
+ if (cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
+ Intrinsic::masked_gather) {
+ ScalarTy = MemoryInst->getType()->getScalarType();
+ } else {
+ assert(cast<IntrinsicInst>(MemoryInst)->getIntrinsicID() ==
+ Intrinsic::masked_scatter);
+ ScalarTy = MemoryInst->getOperand(0)->getType()->getScalarType();
+ }
+ NewAddr = Builder.CreateGEP(ScalarTy, V, Constant::getNullValue(IndexTy));
} else {
// Constant, SelectionDAGBuilder knows to check if its a splat.
return false;
@@ -6272,6 +6469,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
EVT LoadResultVT = TLI->getValueType(*DL, Load->getType());
unsigned BitWidth = LoadResultVT.getSizeInBits();
+ // If the BitWidth is 0, do not try to optimize the type
+ if (BitWidth == 0)
+ return false;
+
APInt DemandBits(BitWidth, 0);
APInt WidestAndBits(BitWidth, 0);
@@ -6409,7 +6610,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
uint64_t Sum = TrueWeight + FalseWeight;
if (Sum != 0) {
auto Probability = BranchProbability::getBranchProbability(Max, Sum);
- if (Probability > TLI->getPredictableBranchThreshold())
+ if (Probability > TTI->getPredictableBranchThreshold())
return true;
}
}
@@ -6795,7 +6996,8 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
Value *Cond = SI->getCondition();
Type *OldType = Cond->getType();
LLVMContext &Context = Cond->getContext();
- MVT RegType = TLI->getRegisterType(Context, TLI->getValueType(*DL, OldType));
+ EVT OldVT = TLI->getValueType(*DL, OldType);
+ MVT RegType = TLI->getRegisterType(Context, OldVT);
unsigned RegWidth = RegType.getSizeInBits();
if (RegWidth <= cast<IntegerType>(OldType)->getBitWidth())
@@ -6809,14 +7011,21 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
// where N is the number of cases in the switch.
auto *NewType = Type::getIntNTy(Context, RegWidth);
- // Zero-extend the switch condition and case constants unless the switch
- // condition is a function argument that is already being sign-extended.
- // In that case, we can avoid an unnecessary mask/extension by sign-extending
- // everything instead.
+ // Extend the switch condition and case constants using the target preferred
+ // extend unless the switch condition is a function argument with an extend
+ // attribute. In that case, we can avoid an unnecessary mask/extension by
+ // matching the argument extension instead.
Instruction::CastOps ExtType = Instruction::ZExt;
- if (auto *Arg = dyn_cast<Argument>(Cond))
+ // Some targets prefer SExt over ZExt.
+ if (TLI->isSExtCheaperThanZExt(OldVT, RegType))
+ ExtType = Instruction::SExt;
+
+ if (auto *Arg = dyn_cast<Argument>(Cond)) {
if (Arg->hasSExtAttr())
ExtType = Instruction::SExt;
+ if (Arg->hasZExtAttr())
+ ExtType = Instruction::ZExt;
+ }
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
ExtInst->insertBefore(SI);
@@ -6927,11 +7136,10 @@ class VectorPromoteHelper {
StoreInst *ST = cast<StoreInst>(CombineInst);
unsigned AS = ST->getPointerAddressSpace();
- unsigned Align = ST->getAlignment();
// Check if this store is supported.
if (!TLI.allowsMisalignedMemoryAccesses(
TLI.getValueType(DL, ST->getValueOperand()->getType()), AS,
- Align)) {
+ ST->getAlign())) {
// If this is not supported, there is no way we can combine
// the extract with the store.
return false;
@@ -6940,9 +7148,9 @@ class VectorPromoteHelper {
// The scalar chain of computation has to pay for the transition
// scalar to vector.
// The vector chain has to account for the combining cost.
- uint64_t ScalarCost =
+ InstructionCost ScalarCost =
TTI.getVectorInstrCost(Transition->getOpcode(), PromotedType, Index);
- uint64_t VectorCost = StoreExtractCombineCost;
+ InstructionCost VectorCost = StoreExtractCombineCost;
enum TargetTransformInfo::TargetCostKind CostKind =
TargetTransformInfo::TCK_RecipThroughput;
for (const auto &Inst : InstsToBePromoted) {
@@ -7483,9 +7691,8 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
for (GetElementPtrInst *UGEPI : UGEPIs) {
ConstantInt *UGEPIIdx = cast<ConstantInt>(UGEPI->getOperand(1));
APInt NewIdx = UGEPIIdx->getValue() - GEPIIdx->getValue();
- unsigned ImmCost =
- TTI->getIntImmCost(NewIdx, GEPIIdx->getType(),
- TargetTransformInfo::TCK_SizeAndLatency);
+ InstructionCost ImmCost = TTI->getIntImmCost(
+ NewIdx, GEPIIdx->getType(), TargetTransformInfo::TCK_SizeAndLatency);
if (ImmCost > TargetTransformInfo::TCC_Basic)
return false;
}
@@ -7511,6 +7718,67 @@ static bool tryUnmergingGEPsAcrossIndirectBr(GetElementPtrInst *GEPI,
return true;
}
+static bool optimizeBranch(BranchInst *Branch, const TargetLowering &TLI) {
+ // Try and convert
+ // %c = icmp ult %x, 8
+ // br %c, bla, blb
+ // %tc = lshr %x, 3
+ // to
+ // %tc = lshr %x, 3
+ // %c = icmp eq %tc, 0
+ // br %c, bla, blb
+ // Creating the cmp to zero can be better for the backend, especially if the
+ // lshr produces flags that can be used automatically.
+ if (!TLI.preferZeroCompareBranch() || !Branch->isConditional())
+ return false;
+
+ ICmpInst *Cmp = dyn_cast<ICmpInst>(Branch->getCondition());
+ if (!Cmp || !isa<ConstantInt>(Cmp->getOperand(1)) || !Cmp->hasOneUse())
+ return false;
+
+ Value *X = Cmp->getOperand(0);
+ APInt CmpC = cast<ConstantInt>(Cmp->getOperand(1))->getValue();
+
+ for (auto *U : X->users()) {
+ Instruction *UI = dyn_cast<Instruction>(U);
+ // A quick dominance check
+ if (!UI ||
+ (UI->getParent() != Branch->getParent() &&
+ UI->getParent() != Branch->getSuccessor(0) &&
+ UI->getParent() != Branch->getSuccessor(1)) ||
+ (UI->getParent() != Branch->getParent() &&
+ !UI->getParent()->getSinglePredecessor()))
+ continue;
+
+ if (CmpC.isPowerOf2() && Cmp->getPredicate() == ICmpInst::ICMP_ULT &&
+ match(UI, m_Shr(m_Specific(X), m_SpecificInt(CmpC.logBase2())))) {
+ IRBuilder<> Builder(Branch);
+ if (UI->getParent() != Branch->getParent())
+ UI->moveBefore(Branch);
+ Value *NewCmp = Builder.CreateCmp(ICmpInst::ICMP_EQ, UI,
+ ConstantInt::get(UI->getType(), 0));
+ LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
+ LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
+ Cmp->replaceAllUsesWith(NewCmp);
+ return true;
+ }
+ if (Cmp->isEquality() &&
+ (match(UI, m_Add(m_Specific(X), m_SpecificInt(-CmpC))) ||
+ match(UI, m_Sub(m_Specific(X), m_SpecificInt(CmpC))))) {
+ IRBuilder<> Builder(Branch);
+ if (UI->getParent() != Branch->getParent())
+ UI->moveBefore(Branch);
+ Value *NewCmp = Builder.CreateCmp(Cmp->getPredicate(), UI,
+ ConstantInt::get(UI->getType(), 0));
+ LLVM_DEBUG(dbgs() << "Converting " << *Cmp << "\n");
+ LLVM_DEBUG(dbgs() << " to compare on zero: " << *NewCmp << "\n");
+ Cmp->replaceAllUsesWith(NewCmp);
+ return true;
+ }
+ }
+ return false;
+}
+
bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
// Bail out if we inserted the instruction to prevent optimizations from
// stepping on each other's toes.
@@ -7672,6 +7940,8 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return optimizeSwitchInst(cast<SwitchInst>(I));
case Instruction::ExtractElement:
return optimizeExtractElementInst(cast<ExtractElementInst>(I));
+ case Instruction::Br:
+ return optimizeBranch(cast<BranchInst>(I), *TLI);
}
return false;
@@ -7731,19 +8001,23 @@ bool CodeGenPrepare::fixupDbgValue(Instruction *I) {
DbgValueInst &DVI = *cast<DbgValueInst>(I);
// Does this dbg.value refer to a sunk address calculation?
- Value *Location = DVI.getVariableLocation();
- WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
- Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
- if (SunkAddr) {
- // Point dbg.value at locally computed address, which should give the best
- // opportunity to be accurately lowered. This update may change the type of
- // pointer being referred to; however this makes no difference to debugging
- // information, and we can't generate bitcasts that may affect codegen.
- DVI.setOperand(0, MetadataAsValue::get(DVI.getContext(),
- ValueAsMetadata::get(SunkAddr)));
- return true;
- }
- return false;
+ bool AnyChange = false;
+ SmallDenseSet<Value *> LocationOps(DVI.location_ops().begin(),
+ DVI.location_ops().end());
+ for (Value *Location : LocationOps) {
+ WeakTrackingVH SunkAddrVH = SunkAddrs[Location];
+ Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr;
+ if (SunkAddr) {
+ // Point dbg.value at locally computed address, which should give the best
+ // opportunity to be accurately lowered. This update may change the type
+ // of pointer being referred to; however this makes no difference to
+ // debugging information, and we can't generate bitcasts that may affect
+ // codegen.
+ DVI.replaceVariableLocationOp(Location, SunkAddr);
+ AnyChange = true;
+ }
+ }
+ return AnyChange;
}
// A llvm.dbg.value may be using a value before its definition, due to
@@ -7762,30 +8036,73 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
if (!DVI)
continue;
- Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue());
+ SmallVector<Instruction *, 4> VIs;
+ for (Value *V : DVI->getValues())
+ if (Instruction *VI = dyn_cast_or_null<Instruction>(V))
+ VIs.push_back(VI);
+
+ // This DVI may depend on multiple instructions, complicating any
+ // potential sink. This block takes the defensive approach, opting to
+ // "undef" the DVI if it has more than one instruction and any of them do
+ // not dominate DVI.
+ for (Instruction *VI : VIs) {
+ if (VI->isTerminator())
+ continue;
- if (!VI || VI->isTerminator())
- continue;
+ // If VI is a phi in a block with an EHPad terminator, we can't insert
+ // after it.
+ if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
+ continue;
- // If VI is a phi in a block with an EHPad terminator, we can't insert
- // after it.
- if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad())
- continue;
+ // If the defining instruction dominates the dbg.value, we do not need
+ // to move the dbg.value.
+ if (DT.dominates(VI, DVI))
+ continue;
- // If the defining instruction dominates the dbg.value, we do not need
- // to move the dbg.value.
- if (DT.dominates(VI, DVI))
- continue;
+ // If we depend on multiple instructions and any of them doesn't
+ // dominate this DVI, we probably can't salvage it: moving it to
+ // after any of the instructions could cause us to lose the others.
+ if (VIs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Unable to find valid location for Debug Value, undefing:\n"
+ << *DVI);
+ DVI->setUndef();
+ break;
+ }
- LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
- << *DVI << ' ' << *VI);
- DVI->removeFromParent();
- if (isa<PHINode>(VI))
- DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
- else
- DVI->insertAfter(VI);
- MadeChange = true;
- ++NumDbgValueMoved;
+ LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n"
+ << *DVI << ' ' << *VI);
+ DVI->removeFromParent();
+ if (isa<PHINode>(VI))
+ DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt());
+ else
+ DVI->insertAfter(VI);
+ MadeChange = true;
+ ++NumDbgValueMoved;
+ }
+ }
+ }
+ return MadeChange;
+}
+
+// Group scattered pseudo probes in a block to favor SelectionDAG. Scattered
+// probes can be chained dependencies of other regular DAG nodes and block DAG
+// combine optimizations.
+bool CodeGenPrepare::placePseudoProbes(Function &F) {
+ bool MadeChange = false;
+ for (auto &Block : F) {
+ // Move the rest probes to the beginning of the block.
+ auto FirstInst = Block.getFirstInsertionPt();
+ while (FirstInst != Block.end() && FirstInst->isDebugOrPseudoInst())
+ ++FirstInst;
+ BasicBlock::iterator I(FirstInst);
+ I++;
+ while (I != Block.end()) {
+ if (auto *II = dyn_cast<PseudoProbeInst>(I++)) {
+ II->moveBefore(&*FirstInst);
+ MadeChange = true;
+ }
}
}
return MadeChange;