aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen/CodeGenPrepare.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen/CodeGenPrepare.cpp')
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp928
1 files changed, 677 insertions, 251 deletions
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 934b470f13b5..2bdd189557b4 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -15,10 +15,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BlockFrequencyInfo.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -53,8 +55,10 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/BuildLibCalls.h"
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
+#include "llvm/Transforms/Utils/Cloning.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
using namespace llvm;
using namespace llvm::PatternMatch;
@@ -77,7 +81,6 @@ STATISTIC(NumAndUses, "Number of uses of and mask instructions optimized");
STATISTIC(NumRetsDup, "Number of return instructions duplicated");
STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved");
STATISTIC(NumSelectsExpanded, "Number of selects turned into branches");
-STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches");
STATISTIC(NumStoreExtractExposed, "Number of store(extractelement) exposed");
static cl::opt<bool> DisableBranchOpts(
@@ -93,7 +96,7 @@ static cl::opt<bool> DisableSelectToBranch(
cl::desc("Disable select to branch conversion."));
static cl::opt<bool> AddrSinkUsingGEPs(
- "addr-sink-using-gep", cl::Hidden, cl::init(false),
+ "addr-sink-using-gep", cl::Hidden, cl::init(true),
cl::desc("Address sinking in CGP using GEPs."));
static cl::opt<bool> EnableAndCmpSinking(
@@ -135,15 +138,24 @@ static cl::opt<bool> ForceSplitStore(
"force-split-store", cl::Hidden, cl::init(false),
cl::desc("Force store splitting no matter what the target query says."));
+static cl::opt<bool>
+EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
+ cl::desc("Enable merging of redundant sexts when one is dominating"
+ " the other."), cl::init(true));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+typedef SmallVector<Instruction *, 16> SExts;
+typedef DenseMap<Value *, SExts> ValueToSExts;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
const TargetMachine *TM;
+ const TargetSubtargetInfo *SubtargetInfo;
const TargetLowering *TLI;
+ const TargetRegisterInfo *TRI;
const TargetTransformInfo *TTI;
const TargetLibraryInfo *TLInfo;
const LoopInfo *LI;
@@ -165,6 +177,15 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
+ /// Keep track of instructions removed during promotion.
+ SetOfInstrs RemovedInsts;
+
+ /// Keep track of sext chains based on their initial value.
+ DenseMap<Value *, Instruction *> SeenChainsForSExt;
+
+ /// Keep track of SExt promoted.
+ ValueToSExts ValToSExtendedUses;
+
/// True if CFG is modified in any way.
bool ModifiedDT;
@@ -206,7 +227,7 @@ class TypePromotionTransaction;
Type *AccessTy, unsigned AS);
bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
- bool moveExtToFormExtLoad(Instruction *&I);
+ bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *I);
bool optimizeSelectInst(SelectInst *SI);
@@ -215,13 +236,21 @@ class TypePromotionTransaction;
bool optimizeExtractElementInst(Instruction *Inst);
bool dupRetToEnableTailCallOpts(BasicBlock *BB);
bool placeDbgValues(Function &F);
- bool sinkAndCmp(Function &F);
- bool extLdPromotion(TypePromotionTransaction &TPT, LoadInst *&LI,
- Instruction *&Inst,
- const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInstCost);
+ bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
+ LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
+ bool tryToPromoteExts(TypePromotionTransaction &TPT,
+ const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost = 0);
+ bool mergeSExts(Function &F);
+ bool performAddressTypePromotion(
+ Instruction *&Inst,
+ bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
+ bool splitIndirectCriticalEdges(Function &F);
};
}
@@ -250,8 +279,11 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
BPI.reset();
ModifiedDT = false;
- if (TM)
- TLI = TM->getSubtargetImpl(F)->getTargetLowering();
+ if (TM) {
+ SubtargetInfo = TM->getSubtargetImpl(F);
+ TLI = SubtargetInfo->getTargetLowering();
+ TRI = SubtargetInfo->getRegisterInfo();
+ }
TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
@@ -260,9 +292,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (ProfileGuidedSectionPrefix) {
ProfileSummaryInfo *PSI =
getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- if (PSI->isFunctionEntryHot(&F))
+ if (PSI->isFunctionHotInCallGraph(&F))
F.setSectionPrefix(".hot");
- else if (PSI->isFunctionEntryCold(&F))
+ else if (PSI->isFunctionColdInCallGraph(&F))
F.setSectionPrefix(".cold");
}
@@ -290,18 +322,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// find a node corresponding to the value.
EverMadeChange |= placeDbgValues(F);
- // If there is a mask, compare against zero, and branch that can be combined
- // into a single target instruction, push the mask and compare into branch
- // users. Do this before OptimizeBlock -> OptimizeInst ->
- // OptimizeCmpExpression, which perturbs the pattern being searched for.
- if (!DisableBranchOpts) {
- EverMadeChange |= sinkAndCmp(F);
+ if (!DisableBranchOpts)
EverMadeChange |= splitBranchCondition(F);
- }
+
+ // Split some critical edges where one of the sources is an indirect branch,
+ // to help generate sane code for PHIs involving such edges.
+ EverMadeChange |= splitIndirectCriticalEdges(F);
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
+ SeenChainsForSExt.clear();
+ ValToSExtendedUses.clear();
+ RemovedInsts.clear();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -311,6 +344,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
if (ModifiedDTOnIteration)
break;
}
+ if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
+ MadeChange |= mergeSExts(F);
+
+ // Really free removed instructions during promotion.
+ for (Instruction *I : RemovedInsts)
+ delete I;
+
EverMadeChange |= MadeChange;
}
@@ -432,6 +472,154 @@ BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
return DestBB;
}
+// Return the unique indirectbr predecessor of a block. This may return null
+// even if such a predecessor exists, if it's not useful for splitting.
+// If a predecessor is found, OtherPreds will contain all other (non-indirectbr)
+// predecessors of BB.
+static BasicBlock *
+findIBRPredecessor(BasicBlock *BB, SmallVectorImpl<BasicBlock *> &OtherPreds) {
+ // If the block doesn't have any PHIs, we don't care about it, since there's
+ // no point in splitting it.
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (!PN)
+ return nullptr;
+
+ // Verify we have exactly one IBR predecessor.
+ // Conservatively bail out if one of the other predecessors is not a "regular"
+ // terminator (that is, not a switch or a br).
+ BasicBlock *IBB = nullptr;
+ for (unsigned Pred = 0, E = PN->getNumIncomingValues(); Pred != E; ++Pred) {
+ BasicBlock *PredBB = PN->getIncomingBlock(Pred);
+ TerminatorInst *PredTerm = PredBB->getTerminator();
+ switch (PredTerm->getOpcode()) {
+ case Instruction::IndirectBr:
+ if (IBB)
+ return nullptr;
+ IBB = PredBB;
+ break;
+ case Instruction::Br:
+ case Instruction::Switch:
+ OtherPreds.push_back(PredBB);
+ continue;
+ default:
+ return nullptr;
+ }
+ }
+
+ return IBB;
+}
+
+// Split critical edges where the source of the edge is an indirectbr
+// instruction. This isn't always possible, but we can handle some easy cases.
+// This is useful because MI is unable to split such critical edges,
+// which means it will not be able to sink instructions along those edges.
+// This is especially painful for indirect branches with many successors, where
+// we end up having to prepare all outgoing values in the origin block.
+//
+// Our normal algorithm for splitting critical edges requires us to update
+// the outgoing edges of the edge origin block, but for an indirectbr this
+// is hard, since it would require finding and updating the block addresses
+// the indirect branch uses. But if a block only has a single indirectbr
+// predecessor, with the others being regular branches, we can do it in a
+// different way.
+// Say we have A -> D, B -> D, I -> D where only I -> D is an indirectbr.
+// We can split D into D0 and D1, where D0 contains only the PHIs from D,
+// and D1 is the D block body. We can then duplicate D0 as D0A and D0B, and
+// create the following structure:
+// A -> D0A, B -> D0A, I -> D0B, D0A -> D1, D0B -> D1
+bool CodeGenPrepare::splitIndirectCriticalEdges(Function &F) {
+ // Check whether the function has any indirectbrs, and collect which blocks
+ // they may jump to. Since most functions don't have indirect branches,
+ // this lowers the common case's overhead to O(Blocks) instead of O(Edges).
+ SmallSetVector<BasicBlock *, 16> Targets;
+ for (auto &BB : F) {
+ auto *IBI = dyn_cast<IndirectBrInst>(BB.getTerminator());
+ if (!IBI)
+ continue;
+
+ for (unsigned Succ = 0, E = IBI->getNumSuccessors(); Succ != E; ++Succ)
+ Targets.insert(IBI->getSuccessor(Succ));
+ }
+
+ if (Targets.empty())
+ return false;
+
+ bool Changed = false;
+ for (BasicBlock *Target : Targets) {
+ SmallVector<BasicBlock *, 16> OtherPreds;
+ BasicBlock *IBRPred = findIBRPredecessor(Target, OtherPreds);
+ // If we did not found an indirectbr, or the indirectbr is the only
+ // incoming edge, this isn't the kind of edge we're looking for.
+ if (!IBRPred || OtherPreds.empty())
+ continue;
+
+ // Don't even think about ehpads/landingpads.
+ Instruction *FirstNonPHI = Target->getFirstNonPHI();
+ if (FirstNonPHI->isEHPad() || Target->isLandingPad())
+ continue;
+
+ BasicBlock *BodyBlock = Target->splitBasicBlock(FirstNonPHI, ".split");
+ // It's possible Target was its own successor through an indirectbr.
+ // In this case, the indirectbr now comes from BodyBlock.
+ if (IBRPred == Target)
+ IBRPred = BodyBlock;
+
+ // At this point Target only has PHIs, and BodyBlock has the rest of the
+ // block's body. Create a copy of Target that will be used by the "direct"
+ // preds.
+ ValueToValueMapTy VMap;
+ BasicBlock *DirectSucc = CloneBasicBlock(Target, VMap, ".clone", &F);
+
+ for (BasicBlock *Pred : OtherPreds)
+ Pred->getTerminator()->replaceUsesOfWith(Target, DirectSucc);
+
+ // Ok, now fix up the PHIs. We know the two blocks only have PHIs, and that
+ // they are clones, so the number of PHIs are the same.
+ // (a) Remove the edge coming from IBRPred from the "Direct" PHI
+ // (b) Leave that as the only edge in the "Indirect" PHI.
+ // (c) Merge the two in the body block.
+ BasicBlock::iterator Indirect = Target->begin(),
+ End = Target->getFirstNonPHI()->getIterator();
+ BasicBlock::iterator Direct = DirectSucc->begin();
+ BasicBlock::iterator MergeInsert = BodyBlock->getFirstInsertionPt();
+
+ assert(&*End == Target->getTerminator() &&
+ "Block was expected to only contain PHIs");
+
+ while (Indirect != End) {
+ PHINode *DirPHI = cast<PHINode>(Direct);
+ PHINode *IndPHI = cast<PHINode>(Indirect);
+
+ // Now, clean up - the direct block shouldn't get the indirect value,
+ // and vice versa.
+ DirPHI->removeIncomingValue(IBRPred);
+ Direct++;
+
+ // Advance the pointer here, to avoid invalidation issues when the old
+ // PHI is erased.
+ Indirect++;
+
+ PHINode *NewIndPHI = PHINode::Create(IndPHI->getType(), 1, "ind", IndPHI);
+ NewIndPHI->addIncoming(IndPHI->getIncomingValueForBlock(IBRPred),
+ IBRPred);
+
+ // Create a PHI in the body block, to merge the direct and indirect
+ // predecessors.
+ PHINode *MergePHI =
+ PHINode::Create(IndPHI->getType(), 2, "merge", &*MergeInsert);
+ MergePHI->addIncoming(NewIndPHI, Target);
+ MergePHI->addIncoming(DirPHI, DirectSucc);
+
+ IndPHI->replaceAllUsesWith(MergePHI);
+ IndPHI->eraseFromParent();
+ }
+
+ Changed = true;
+ }
+
+ return Changed;
+}
+
/// Eliminate blocks that contain only PHI nodes, debug info directives, and an
/// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
/// edges in ways that are non-optimal for isel. Start by eliminating these
@@ -1090,6 +1278,83 @@ static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
return false;
}
+/// Duplicate and sink the given 'and' instruction into user blocks where it is
+/// used in a compare to allow isel to generate better code for targets where
+/// this operation can be combined.
+///
+/// Return true if any changes are made.
+static bool sinkAndCmp0Expression(Instruction *AndI,
+ const TargetLowering &TLI,
+ SetOfInstrs &InsertedInsts) {
+ // Double-check that we're not trying to optimize an instruction that was
+ // already optimized by some other part of this pass.
+ assert(!InsertedInsts.count(AndI) &&
+ "Attempting to optimize already optimized and instruction");
+ (void) InsertedInsts;
+
+ // Nothing to do for single use in same basic block.
+ if (AndI->hasOneUse() &&
+ AndI->getParent() == cast<Instruction>(*AndI->user_begin())->getParent())
+ return false;
+
+ // Try to avoid cases where sinking/duplicating is likely to increase register
+ // pressure.
+ if (!isa<ConstantInt>(AndI->getOperand(0)) &&
+ !isa<ConstantInt>(AndI->getOperand(1)) &&
+ AndI->getOperand(0)->hasOneUse() && AndI->getOperand(1)->hasOneUse())
+ return false;
+
+ for (auto *U : AndI->users()) {
+ Instruction *User = cast<Instruction>(U);
+
+ // Only sink for and mask feeding icmp with 0.
+ if (!isa<ICmpInst>(User))
+ return false;
+
+ auto *CmpC = dyn_cast<ConstantInt>(User->getOperand(1));
+ if (!CmpC || !CmpC->isZero())
+ return false;
+ }
+
+ if (!TLI.isMaskAndCmp0FoldingBeneficial(*AndI))
+ return false;
+
+ DEBUG(dbgs() << "found 'and' feeding only icmp 0;\n");
+ DEBUG(AndI->getParent()->dump());
+
+ // Push the 'and' into the same block as the icmp 0. There should only be
+ // one (icmp (and, 0)) in each block, since CSE/GVN should have removed any
+ // others, so we don't need to keep track of which BBs we insert into.
+ for (Value::user_iterator UI = AndI->user_begin(), E = AndI->user_end();
+ UI != E; ) {
+ Use &TheUse = UI.getUse();
+ Instruction *User = cast<Instruction>(*UI);
+
+ // Preincrement use iterator so we don't invalidate it.
+ ++UI;
+
+ DEBUG(dbgs() << "sinking 'and' use: " << *User << "\n");
+
+ // Keep the 'and' in the same place if the use is already in the same block.
+ Instruction *InsertPt =
+ User->getParent() == AndI->getParent() ? AndI : User;
+ Instruction *InsertedAnd =
+ BinaryOperator::Create(Instruction::And, AndI->getOperand(0),
+ AndI->getOperand(1), "", InsertPt);
+ // Propagate the debug info.
+ InsertedAnd->setDebugLoc(AndI->getDebugLoc());
+
+ // Replace a use of the 'and' with a use of the new 'and'.
+ TheUse = InsertedAnd;
+ ++NumAndUses;
+ DEBUG(User->getParent()->dump());
+ }
+
+ // We removed all uses, nuke the and.
+ AndI->eraseFromParent();
+ return true;
+}
+
/// Check if the candidates could be combined with a shift instruction, which
/// includes:
/// 1. Truncate instruction
@@ -2028,16 +2293,15 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
}
if (TLI) {
- // Unknown address space.
- // TODO: Target hook to pick which address space the intrinsic cares
- // about?
- unsigned AddrSpace = ~0u;
SmallVector<Value*, 2> PtrOps;
Type *AccessTy;
- if (TLI->GetAddrModeArguments(II, PtrOps, AccessTy, AddrSpace))
- while (!PtrOps.empty())
- if (optimizeMemoryInst(II, PtrOps.pop_back_val(), AccessTy, AddrSpace))
+ if (TLI->getAddrModeArguments(II, PtrOps, AccessTy))
+ while (!PtrOps.empty()) {
+ Value *PtrVal = PtrOps.pop_back_val();
+ unsigned AS = PtrVal->getType()->getPointerAddressSpace();
+ if (optimizeMemoryInst(II, PtrVal, AccessTy, AS))
return true;
+ }
}
}
@@ -2168,11 +2432,11 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
// Conservatively require the attributes of the call to match those of the
// return. Ignore noalias because it doesn't affect the call sequence.
- AttributeSet CalleeAttrs = CS.getAttributes();
- if (AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias) !=
- AttrBuilder(CalleeAttrs, AttributeSet::ReturnIndex).
- removeAttribute(Attribute::NoAlias))
+ AttributeList CalleeAttrs = CS.getAttributes();
+ if (AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias) !=
+ AttrBuilder(CalleeAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias))
continue;
// Make sure the call instruction is followed by an unconditional branch to
@@ -2561,25 +2825,30 @@ class TypePromotionTransaction {
OperandsHider Hider;
/// Keep track of the uses replaced, if any.
UsesReplacer *Replacer;
+ /// Keep track of instructions removed.
+ SetOfInstrs &RemovedInsts;
public:
/// \brief Remove all reference of \p Inst and optinally replace all its
/// uses with New.
+ /// \p RemovedInsts Keep track of the instructions removed by this Action.
/// \pre If !Inst->use_empty(), then New != nullptr
- InstructionRemover(Instruction *Inst, Value *New = nullptr)
+ InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
+ Value *New = nullptr)
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
- Replacer(nullptr) {
+ Replacer(nullptr), RemovedInsts(RemovedInsts) {
if (New)
Replacer = new UsesReplacer(Inst, New);
DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+ RemovedInsts.insert(Inst);
+ /// The instructions removed here will be freed after completing
+ /// optimizeBlock() for all blocks as we need to keep track of the
+ /// removed instructions during promotion.
Inst->removeFromParent();
}
~InstructionRemover() override { delete Replacer; }
- /// \brief Really remove the instruction.
- void commit() override { delete Inst; }
-
/// \brief Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
@@ -2588,6 +2857,7 @@ class TypePromotionTransaction {
if (Replacer)
Replacer->undo();
Hider.undo();
+ RemovedInsts.erase(Inst);
}
};
@@ -2596,6 +2866,10 @@ public:
/// The restoration point is a pointer to an action instead of an iterator
/// because the iterator may be invalidated but not the pointer.
typedef const TypePromotionAction *ConstRestorationPt;
+
+ TypePromotionTransaction(SetOfInstrs &RemovedInsts)
+ : RemovedInsts(RemovedInsts) {}
+
/// Advocate every changes made in that transaction.
void commit();
/// Undo all the changes made after the given point.
@@ -2627,6 +2901,7 @@ private:
/// The ordered list of actions made so far.
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
+ SetOfInstrs &RemovedInsts;
};
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
@@ -2638,7 +2913,8 @@ void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
- make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
+ make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
+ RemovedInsts, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
@@ -2705,8 +2981,8 @@ void TypePromotionTransaction::rollback(
/// This encapsulates the logic for matching the target-legal addressing modes.
class AddressingModeMatcher {
SmallVectorImpl<Instruction*> &AddrModeInsts;
- const TargetMachine &TM;
const TargetLowering &TLI;
+ const TargetRegisterInfo &TRI;
const DataLayout &DL;
/// AccessTy/MemoryInst - This is the type for the access (e.g. double) and
@@ -2731,14 +3007,14 @@ class AddressingModeMatcher {
bool IgnoreProfitability;
AddressingModeMatcher(SmallVectorImpl<Instruction *> &AMI,
- const TargetMachine &TM, Type *AT, unsigned AS,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI,
+ Type *AT, unsigned AS,
Instruction *MI, ExtAddrMode &AM,
const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT)
- : AddrModeInsts(AMI), TM(TM),
- TLI(*TM.getSubtargetImpl(*MI->getParent()->getParent())
- ->getTargetLowering()),
+ : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),
DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),
MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts),
PromotedInsts(PromotedInsts), TPT(TPT) {
@@ -2756,13 +3032,15 @@ public:
static ExtAddrMode Match(Value *V, Type *AccessTy, unsigned AS,
Instruction *MemoryInst,
SmallVectorImpl<Instruction*> &AddrModeInsts,
- const TargetMachine &TM,
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI,
const SetOfInstrs &InsertedInsts,
InstrToOrigTy &PromotedInsts,
TypePromotionTransaction &TPT) {
ExtAddrMode Result;
- bool Success = AddressingModeMatcher(AddrModeInsts, TM, AccessTy, AS,
+ bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI,
+ AccessTy, AS,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT).matchAddr(V, 0);
(void)Success; assert(Success && "Couldn't select *anything*?");
@@ -3583,18 +3861,18 @@ bool AddressingModeMatcher::matchAddr(Value *Addr, unsigned Depth) {
/// Check to see if all uses of OpVal by the specified inline asm call are due
/// to memory operands. If so, return true, otherwise return false.
static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
- const TargetMachine &TM) {
+ const TargetLowering &TLI,
+ const TargetRegisterInfo &TRI) {
const Function *F = CI->getParent()->getParent();
- const TargetLowering *TLI = TM.getSubtargetImpl(*F)->getTargetLowering();
- const TargetRegisterInfo *TRI = TM.getSubtargetImpl(*F)->getRegisterInfo();
TargetLowering::AsmOperandInfoVector TargetConstraints =
- TLI->ParseConstraints(F->getParent()->getDataLayout(), TRI,
+ TLI.ParseConstraints(F->getParent()->getDataLayout(), &TRI,
ImmutableCallSite(CI));
+
for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
// Compute the constraint code and ConstraintType to use.
- TLI->ComputeConstraintToUse(OpInfo, SDValue());
+ TLI.ComputeConstraintToUse(OpInfo, SDValue());
// If this asm operand is our Value*, and if it isn't an indirect memory
// operand, we can't fold it!
@@ -3613,7 +3891,8 @@ static bool IsOperandAMemoryOperand(CallInst *CI, InlineAsm *IA, Value *OpVal,
static bool FindAllMemoryUses(
Instruction *I,
SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,
- SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetMachine &TM) {
+ SmallPtrSetImpl<Instruction *> &ConsideredInsts,
+ const TargetLowering &TLI, const TargetRegisterInfo &TRI) {
// If we already considered this instruction, we're done.
if (!ConsideredInsts.insert(I).second)
return false;
@@ -3635,11 +3914,28 @@ static bool FindAllMemoryUses(
if (StoreInst *SI = dyn_cast<StoreInst>(UserI)) {
unsigned opNo = U.getOperandNo();
- if (opNo == 0) return true; // Storing addr, not into addr.
+ if (opNo != StoreInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
MemoryUses.push_back(std::make_pair(SI, opNo));
continue;
}
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UserI)) {
+ unsigned opNo = U.getOperandNo();
+ if (opNo != AtomicRMWInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(RMW, opNo));
+ continue;
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(UserI)) {
+ unsigned opNo = U.getOperandNo();
+ if (opNo != AtomicCmpXchgInst::getPointerOperandIndex())
+ return true; // Storing addr, not into addr.
+ MemoryUses.push_back(std::make_pair(CmpX, opNo));
+ continue;
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(UserI)) {
// If this is a cold call, we can sink the addressing calculation into
// the cold path. See optimizeCallInst
@@ -3650,12 +3946,12 @@ static bool FindAllMemoryUses(
if (!IA) return true;
// If this is a memory operand, we're cool, otherwise bail out.
- if (!IsOperandAMemoryOperand(CI, IA, I, TM))
+ if (!IsOperandAMemoryOperand(CI, IA, I, TLI, TRI))
return true;
continue;
}
- if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TM))
+ if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI))
return true;
}
@@ -3743,7 +4039,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// the use is just a particularly nice way of sinking it.
SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;
SmallPtrSet<Instruction*, 16> ConsideredInsts;
- if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TM))
+ if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI))
return false; // Has a non-memory, non-foldable use!
// Now that we know that all uses of this instruction are part of a chain of
@@ -3775,7 +4071,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
ExtAddrMode Result;
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
- AddressingModeMatcher Matcher(MatchedAddrModeInsts, TM, AddressAccessTy, AS,
+ AddressingModeMatcher Matcher(MatchedAddrModeInsts, TLI, TRI,
+ AddressAccessTy, AS,
MemoryInst, Result, InsertedInsts,
PromotedInsts, TPT);
Matcher.IgnoreProfitability = true;
@@ -3844,7 +4141,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
bool IsNumUsesConsensusValid = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
- TypePromotionTransaction TPT;
+ TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
while (!worklist.empty()) {
@@ -3869,7 +4166,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// addressing instructions might have.
SmallVector<Instruction*, 16> NewAddrModeInsts;
ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(
- V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TM,
+ V, AccessTy, AddrSpace, MemoryInst, NewAddrModeInsts, *TLI, *TRI,
InsertedInsts, PromotedInsts, TPT);
// This check is broken into two cases with very similar code to avoid using
@@ -3935,11 +4232,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
DEBUG(dbgs() << "CGP: Reusing nonlocal addrmode: " << AddrMode << " for "
<< *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
} else if (AddrSinkUsingGEPs ||
(!AddrSinkUsingGEPs.getNumOccurrences() && TM &&
- TM->getSubtargetImpl(*MemoryInst->getParent()->getParent())
- ->useAA())) {
+ SubtargetInfo->useAA())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -4042,7 +4338,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// We need to add this separately from the scale above to help with
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
- ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
@@ -4053,12 +4349,12 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
SunkAddr = ResultPtr;
} else {
if (ResultPtr->getType() != I8PtrTy)
- ResultPtr = Builder.CreateBitCast(ResultPtr, I8PtrTy);
+ ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
if (SunkAddr->getType() != Addr->getType())
- SunkAddr = Builder.CreateBitCast(SunkAddr, Addr->getType());
+ SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
}
} else {
DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode << " for "
@@ -4185,14 +4481,14 @@ bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
return MadeChange;
}
-/// \brief Check if all the uses of \p Inst are equivalent (or free) zero or
+/// \brief Check if all the uses of \p Val are equivalent (or free) zero or
/// sign extensions.
-static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
- assert(!Inst->use_empty() && "Input must have at least one use");
- const Instruction *FirstUser = cast<Instruction>(*Inst->user_begin());
+static bool hasSameExtUse(Value *Val, const TargetLowering &TLI) {
+ assert(!Val->use_empty() && "Input must have at least one use");
+ const Instruction *FirstUser = cast<Instruction>(*Val->user_begin());
bool IsSExt = isa<SExtInst>(FirstUser);
Type *ExtTy = FirstUser->getType();
- for (const User *U : Inst->users()) {
+ for (const User *U : Val->users()) {
const Instruction *UI = cast<Instruction>(U);
if ((IsSExt && !isa<SExtInst>(UI)) || (!IsSExt && !isa<ZExtInst>(UI)))
return false;
@@ -4202,11 +4498,11 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
continue;
// If IsSExt is true, we are in this situation:
- // a = Inst
+ // a = Val
// b = sext ty1 a to ty2
// c = sext ty1 a to ty3
// Assuming ty2 is shorter than ty3, this could be turned into:
- // a = Inst
+ // a = Val
// b = sext ty1 a to ty2
// c = sext ty2 b to ty3
// However, the last sext is not free.
@@ -4233,51 +4529,44 @@ static bool hasSameExtUse(Instruction *Inst, const TargetLowering &TLI) {
return true;
}
-/// \brief Try to form ExtLd by promoting \p Exts until they reach a
-/// load instruction.
-/// If an ext(load) can be formed, it is returned via \p LI for the load
-/// and \p Inst for the extension.
-/// Otherwise LI == nullptr and Inst == nullptr.
-/// When some promotion happened, \p TPT contains the proper state to
-/// revert them.
+/// \brief Try to speculatively promote extensions in \p Exts and continue
+/// promoting through newly promoted operands recursively as far as doing so is
+/// profitable. Save extensions profitably moved up, in \p ProfitablyMovedExts.
+/// When some promotion happened, \p TPT contains the proper state to revert
+/// them.
///
-/// \return true when promoting was necessary to expose the ext(load)
-/// opportunity, false otherwise.
-///
-/// Example:
-/// \code
-/// %ld = load i32* %addr
-/// %add = add nuw i32 %ld, 4
-/// %zext = zext i32 %add to i64
-/// \endcode
-/// =>
-/// \code
-/// %ld = load i32* %addr
-/// %zext = zext i32 %ld to i64
-/// %add = add nuw i64 %zext, 4
-/// \encode
-/// Thanks to the promotion, we can match zext(load i32*) to i64.
-bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
- LoadInst *&LI, Instruction *&Inst,
- const SmallVectorImpl<Instruction *> &Exts,
- unsigned CreatedInstsCost = 0) {
- // Iterate over all the extensions to see if one form an ext(load).
+/// \return true if some promotion happened, false otherwise.
+bool CodeGenPrepare::tryToPromoteExts(
+ TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
+ SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
+ unsigned CreatedInstsCost) {
+ bool Promoted = false;
+
+ // Iterate over all the extensions to try to promote them.
for (auto I : Exts) {
- // Check if we directly have ext(load).
- if ((LI = dyn_cast<LoadInst>(I->getOperand(0)))) {
- Inst = I;
- // No promotion happened here.
- return false;
+ // Early check if we directly have ext(load).
+ if (isa<LoadInst>(I->getOperand(0))) {
+ ProfitablyMovedExts.push_back(I);
+ continue;
}
- // Check whether or not we want to do any promotion.
+
+ // Check whether or not we want to do any promotion. The reason we have
+ // this check inside the for loop is to catch the case where an extension
+ // is directly fed by a load because in such case the extension can be moved
+ // up without any promotion on its operands.
if (!TLI || !TLI->enableExtLdPromotion() || DisableExtLdPromotion)
- continue;
+ return false;
+
// Get the action to perform the promotion.
- TypePromotionHelper::Action TPH = TypePromotionHelper::getAction(
- I, InsertedInsts, *TLI, PromotedInsts);
+ TypePromotionHelper::Action TPH =
+ TypePromotionHelper::getAction(I, InsertedInsts, *TLI, PromotedInsts);
// Check if we can promote.
- if (!TPH)
+ if (!TPH) {
+ // Save the current extension as we cannot move up through its operand.
+ ProfitablyMovedExts.push_back(I);
continue;
+ }
+
// Save the current state.
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
@@ -4297,110 +4586,293 @@ bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
// one extension but leave one. However, we optimistically keep going,
// because the new extension may be removed too.
long long TotalCreatedInstsCost = CreatedInstsCost + NewCreatedInstsCost;
- TotalCreatedInstsCost -= ExtCost;
+ // FIXME: It would be possible to propagate a negative value instead of
+ // conservatively ceiling it to 0.
+ TotalCreatedInstsCost =
+ std::max((long long)0, (TotalCreatedInstsCost - ExtCost));
if (!StressExtLdPromotion &&
(TotalCreatedInstsCost > 1 ||
!isPromotedInstructionLegal(*TLI, *DL, PromotedVal))) {
- // The promotion is not profitable, rollback to the previous state.
+ // This promotion is not profitable, rollback to the previous state, and
+ // save the current extension in ProfitablyMovedExts as the latest
+ // speculative promotion turned out to be unprofitable.
TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
+ continue;
+ }
+ // Continue promoting NewExts as far as doing so is profitable.
+ SmallVector<Instruction *, 2> NewlyMovedExts;
+ (void)tryToPromoteExts(TPT, NewExts, NewlyMovedExts, TotalCreatedInstsCost);
+ bool NewPromoted = false;
+ for (auto ExtInst : NewlyMovedExts) {
+ Instruction *MovedExt = cast<Instruction>(ExtInst);
+ Value *ExtOperand = MovedExt->getOperand(0);
+ // If we have reached to a load, we need this extra profitability check
+ // as it could potentially be merged into an ext(load).
+ if (isa<LoadInst>(ExtOperand) &&
+ !(StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
+ (ExtOperand->hasOneUse() || hasSameExtUse(ExtOperand, *TLI))))
+ continue;
+
+ ProfitablyMovedExts.push_back(MovedExt);
+ NewPromoted = true;
+ }
+
+ // If none of speculative promotions for NewExts is profitable, rollback
+ // and save the current extension (I) as the last profitable extension.
+ if (!NewPromoted) {
+ TPT.rollback(LastKnownGood);
+ ProfitablyMovedExts.push_back(I);
continue;
}
// The promotion is profitable.
- // Check if it exposes an ext(load).
- (void)extLdPromotion(TPT, LI, Inst, NewExts, TotalCreatedInstsCost);
- if (LI && (StressExtLdPromotion || NewCreatedInstsCost <= ExtCost ||
- // If we have created a new extension, i.e., now we have two
- // extensions. We must make sure one of them is merged with
- // the load, otherwise we may degrade the code quality.
- (LI->hasOneUse() || hasSameExtUse(LI, *TLI))))
- // Promotion happened.
- return true;
- // If this does not help to expose an ext(load) then, rollback.
- TPT.rollback(LastKnownGood);
+ Promoted = true;
}
- // None of the extension can form an ext(load).
- LI = nullptr;
- Inst = nullptr;
- return false;
+ return Promoted;
}
-/// Move a zext or sext fed by a load into the same basic block as the load,
-/// unless conditions are unfavorable. This allows SelectionDAG to fold the
-/// extend into the load.
-/// \p I[in/out] the extension may be modified during the process if some
-/// promotions apply.
-///
-bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
- // ExtLoad formation infrastructure requires TLI to be effective.
- if (!TLI)
- return false;
+/// Merging redundant sexts when one is dominating the other.
+bool CodeGenPrepare::mergeSExts(Function &F) {
+ DominatorTree DT(F);
+ bool Changed = false;
+ for (auto &Entry : ValToSExtendedUses) {
+ SExts &Insts = Entry.second;
+ SExts CurPts;
+ for (Instruction *Inst : Insts) {
+ if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
+ Inst->getOperand(0) != Entry.first)
+ continue;
+ bool inserted = false;
+ for (auto &Pt : CurPts) {
+ if (DT.dominates(Inst, Pt)) {
+ Pt->replaceAllUsesWith(Inst);
+ RemovedInsts.insert(Pt);
+ Pt->removeFromParent();
+ Pt = Inst;
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!DT.dominates(Pt, Inst))
+ // Give up if we need to merge in a common dominator as the
+ // expermients show it is not profitable.
+ continue;
+ Inst->replaceAllUsesWith(Pt);
+ RemovedInsts.insert(Inst);
+ Inst->removeFromParent();
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!inserted)
+ CurPts.push_back(Inst);
+ }
+ }
+ return Changed;
+}
- // Try to promote a chain of computation if it allows to form
- // an extended load.
- TypePromotionTransaction TPT;
- TypePromotionTransaction::ConstRestorationPt LastKnownGood =
- TPT.getRestorationPoint();
- SmallVector<Instruction *, 1> Exts;
- Exts.push_back(I);
- // Look for a load being extended.
- LoadInst *LI = nullptr;
- Instruction *OldExt = I;
- bool HasPromoted = extLdPromotion(TPT, LI, I, Exts);
- if (!LI || !I) {
- assert(!HasPromoted && !LI && "If we did not match any load instruction "
- "the code must remain the same");
- I = OldExt;
- return false;
+/// Return true, if an ext(load) can be formed from an extension in
+/// \p MovedExts.
+bool CodeGenPrepare::canFormExtLd(
+ const SmallVectorImpl<Instruction *> &MovedExts, LoadInst *&LI,
+ Instruction *&Inst, bool HasPromoted) {
+ for (auto *MovedExtInst : MovedExts) {
+ if (isa<LoadInst>(MovedExtInst->getOperand(0))) {
+ LI = cast<LoadInst>(MovedExtInst->getOperand(0));
+ Inst = MovedExtInst;
+ break;
+ }
}
+ if (!LI)
+ return false;
// If they're already in the same block, there's nothing to do.
// Make the cheap checks first if we did not promote.
// If we promoted, we need to check if it is indeed profitable.
- if (!HasPromoted && LI->getParent() == I->getParent())
+ if (!HasPromoted && LI->getParent() == Inst->getParent())
return false;
- EVT VT = TLI->getValueType(*DL, I->getType());
+ EVT VT = TLI->getValueType(*DL, Inst->getType());
EVT LoadVT = TLI->getValueType(*DL, LI->getType());
// If the load has other users and the truncate is not free, this probably
// isn't worthwhile.
- if (!LI->hasOneUse() &&
- (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
- !TLI->isTruncateFree(I->getType(), LI->getType())) {
- I = OldExt;
- TPT.rollback(LastKnownGood);
+ if (!LI->hasOneUse() && (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
+ !TLI->isTruncateFree(Inst->getType(), LI->getType()))
return false;
- }
// Check whether the target supports casts folded into loads.
unsigned LType;
- if (isa<ZExtInst>(I))
+ if (isa<ZExtInst>(Inst))
LType = ISD::ZEXTLOAD;
else {
- assert(isa<SExtInst>(I) && "Unexpected ext type!");
+ assert(isa<SExtInst>(Inst) && "Unexpected ext type!");
LType = ISD::SEXTLOAD;
}
- if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) {
- I = OldExt;
- TPT.rollback(LastKnownGood);
+
+ return TLI->isLoadExtLegal(LType, VT, LoadVT);
+}
+
+/// Move a zext or sext fed by a load into the same basic block as the load,
+/// unless conditions are unfavorable. This allows SelectionDAG to fold the
+/// extend into the load.
+///
+/// E.g.,
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
+/// allow us to match zext(load i32*) to i64.
+///
+/// Also, try to promote the computations used to obtain a sign extended
+/// value used into memory accesses.
+/// E.g.,
+/// \code
+/// a = add nsw i32 b, 3
+/// d = sext i32 a to i64
+/// e = getelementptr ..., i64 d
+/// \endcode
+/// =>
+/// \code
+/// f = sext i32 b to i64
+/// a = add nsw i64 f, 3
+/// e = getelementptr ..., i64 a
+/// \endcode
+///
+/// \p Inst[in/out] the extension may be modified during the process if some
+/// promotions apply.
+bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
+ // ExtLoad formation and address type promotion infrastructure requires TLI to
+ // be effective.
+ if (!TLI)
return false;
+
+ bool AllowPromotionWithoutCommonHeader = false;
+ /// See if it is an interesting sext operations for the address type
+ /// promotion before trying to promote it, e.g., the ones with the right
+ /// type and used in memory accesses.
+ bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
+ *Inst, AllowPromotionWithoutCommonHeader);
+ TypePromotionTransaction TPT(RemovedInsts);
+ TypePromotionTransaction::ConstRestorationPt LastKnownGood =
+ TPT.getRestorationPoint();
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> SpeculativelyMovedExts;
+ Exts.push_back(Inst);
+
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
+
+ // Look for a load being extended.
+ LoadInst *LI = nullptr;
+ Instruction *ExtFedByLoad;
+
+ // Try to promote a chain of computation if it allows to form an extended
+ // load.
+ if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
+ assert(LI && ExtFedByLoad && "Expect a valid load and extension");
+ TPT.commit();
+ // Move the extend into the same block as the load
+ ExtFedByLoad->removeFromParent();
+ ExtFedByLoad->insertAfter(LI);
+ // CGP does not check if the zext would be speculatively executed when moved
+ // to the same basic block as the load. Preserving its original location
+ // would pessimize the debugging experience, as well as negatively impact
+ // the quality of sample pgo. We don't want to use "line 0" as that has a
+ // size cost in the line-table section and logically the zext can be seen as
+ // part of the load. Therefore we conservatively reuse the same debug
+ // location for the load and the zext.
+ ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
+ ++NumExtsMoved;
+ Inst = ExtFedByLoad;
+ return true;
}
- // Move the extend into the same block as the load, so that SelectionDAG
- // can fold it.
- TPT.commit();
- I->removeFromParent();
- I->insertAfter(LI);
- // CGP does not check if the zext would be speculatively executed when moved
- // to the same basic block as the load. Preserving its original location would
- // pessimize the debugging experience, as well as negatively impact the
- // quality of sample pgo. We don't want to use "line 0" as that has a
- // size cost in the line-table section and logically the zext can be seen as
- // part of the load. Therefore we conservatively reuse the same debug location
- // for the load and the zext.
- I->setDebugLoc(LI->getDebugLoc());
- ++NumExtsMoved;
- return true;
+ // Continue promoting SExts if known as considerable depending on targets.
+ if (ATPConsiderable &&
+ performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
+ HasPromoted, TPT, SpeculativelyMovedExts))
+ return true;
+
+ TPT.rollback(LastKnownGood);
+ return false;
+}
+
+// Perform address type promotion if doing so is profitable.
+// If AllowPromotionWithoutCommonHeader == false, we should find other sext
+// instructions that sign extended the same initial value. However, if
+// AllowPromotionWithoutCommonHeader == true, we expect promoting the
+// extension is just profitable.
+bool CodeGenPrepare::performAddressTypePromotion(
+ Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
+ bool Promoted = false;
+ SmallPtrSet<Instruction *, 1> UnhandledExts;
+ bool AllSeenFirst = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ DenseMap<Value *, Instruction *>::iterator AlreadySeen =
+ SeenChainsForSExt.find(HeadOfChain);
+ // If there is an unhandled SExt which has the same header, try to promote
+ // it as well.
+ if (AlreadySeen != SeenChainsForSExt.end()) {
+ if (AlreadySeen->second != nullptr)
+ UnhandledExts.insert(AlreadySeen->second);
+ AllSeenFirst = false;
+ }
+ }
+
+ if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
+ SpeculativelyMovedExts.size() == 1)) {
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ // Update Inst as promotion happen.
+ Inst = SpeculativelyMovedExts.pop_back_val();
+ } else {
+ // This is the first chain visited from the header, keep the current chain
+ // as unhandled. Defer to promote this until we encounter another SExt
+ // chain derived from the same header.
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = Inst;
+ }
+ return false;
+ }
+
+ if (!AllSeenFirst && !UnhandledExts.empty())
+ for (auto VisitedSExt : UnhandledExts) {
+ if (RemovedInsts.count(VisitedSExt))
+ continue;
+ TypePromotionTransaction TPT(RemovedInsts);
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> Chains;
+ Exts.push_back(VisitedSExt);
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : Chains) {
+ Value *HeadOfChain = I->getOperand(0);
+ // Mark this as handled.
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ }
+ return Promoted;
}
bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
@@ -4534,13 +5006,10 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
!(Load->getType()->isIntegerTy() || Load->getType()->isPointerTy()))
return false;
- // Skip loads we've already transformed or have no reason to transform.
- if (Load->hasOneUse()) {
- User *LoadUser = *Load->user_begin();
- if (cast<Instruction>(LoadUser)->getParent() == Load->getParent() &&
- !dyn_cast<PHINode>(LoadUser))
- return false;
- }
+ // Skip loads we've already transformed.
+ if (Load->hasOneUse() &&
+ InsertedInsts.count(cast<Instruction>(*Load->user_begin())))
+ return false;
// Look at all uses of Load, looking through phis, to determine how many bits
// of the loaded value are needed.
@@ -4620,7 +5089,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
//
// Also avoid hoisting if we didn't see any ands with the exact DemandBits
// mask, since these are the only ands that will be removed by isel.
- if (ActiveBits <= 1 || !APIntOps::isMask(ActiveBits, DemandBits) ||
+ if (ActiveBits <= 1 || !DemandBits.isMask(ActiveBits) ||
WidestAndBits != DemandBits)
return false;
@@ -4636,6 +5105,9 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
IRBuilder<> Builder(Load->getNextNode());
auto *NewAnd = dyn_cast<Instruction>(
Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
+ // Mark this instruction as "inserted by CGP", so that other
+ // optimizations don't touch it.
+ InsertedInsts.insert(NewAnd);
// Replace all uses of load with new and (except for the use of load in the
// new and itself).
@@ -4985,7 +5457,7 @@ bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
auto *ExtInst = CastInst::Create(ExtType, Cond, NewType);
ExtInst->insertBefore(SI);
SI->setCondition(ExtInst);
- for (SwitchInst::CaseIt Case : SI->cases()) {
+ for (auto Case : SI->cases()) {
APInt NarrowConst = Case.getCaseValue()->getValue();
APInt WideConst = (ExtType == Instruction::ZExt) ?
NarrowConst.zext(RegWidth) : NarrowConst.sext(RegWidth);
@@ -5514,7 +5986,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
- bool MadeChange = moveExtToFormExtLoad(I);
+ bool MadeChange = optimizeExt(I);
return MadeChange | optimizeExtUses(I);
}
}
@@ -5548,8 +6020,24 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
return false;
}
+ if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
+ unsigned AS = RMW->getPointerAddressSpace();
+ return optimizeMemoryInst(I, RMW->getPointerOperand(),
+ RMW->getType(), AS);
+ }
+
+ if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(I)) {
+ unsigned AS = CmpX->getPointerAddressSpace();
+ return optimizeMemoryInst(I, CmpX->getPointerOperand(),
+ CmpX->getCompareOperand()->getType(), AS);
+ }
+
BinaryOperator *BinOp = dyn_cast<BinaryOperator>(I);
+ if (BinOp && (BinOp->getOpcode() == Instruction::And) &&
+ EnableAndCmpSinking && TLI)
+ return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
@@ -5679,68 +6167,6 @@ bool CodeGenPrepare::placeDbgValues(Function &F) {
return MadeChange;
}
-// If there is a sequence that branches based on comparing a single bit
-// against zero that can be combined into a single instruction, and the
-// target supports folding these into a single instruction, sink the
-// mask and compare into the branch uses. Do this before OptimizeBlock ->
-// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being
-// searched for.
-bool CodeGenPrepare::sinkAndCmp(Function &F) {
- if (!EnableAndCmpSinking)
- return false;
- if (!TLI || !TLI->isMaskAndBranchFoldingLegal())
- return false;
- bool MadeChange = false;
- for (BasicBlock &BB : F) {
- // Does this BB end with the following?
- // %andVal = and %val, #single-bit-set
- // %icmpVal = icmp %andResult, 0
- // br i1 %cmpVal label %dest1, label %dest2"
- BranchInst *Brcc = dyn_cast<BranchInst>(BB.getTerminator());
- if (!Brcc || !Brcc->isConditional())
- continue;
- ICmpInst *Cmp = dyn_cast<ICmpInst>(Brcc->getOperand(0));
- if (!Cmp || Cmp->getParent() != &BB)
- continue;
- ConstantInt *Zero = dyn_cast<ConstantInt>(Cmp->getOperand(1));
- if (!Zero || !Zero->isZero())
- continue;
- Instruction *And = dyn_cast<Instruction>(Cmp->getOperand(0));
- if (!And || And->getOpcode() != Instruction::And || And->getParent() != &BB)
- continue;
- ConstantInt* Mask = dyn_cast<ConstantInt>(And->getOperand(1));
- if (!Mask || !Mask->getUniqueInteger().isPowerOf2())
- continue;
- DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB.dump());
-
- // Push the "and; icmp" for any users that are conditional branches.
- // Since there can only be one branch use per BB, we don't need to keep
- // track of which BBs we insert into.
- for (Use &TheUse : Cmp->uses()) {
- // Find brcc use.
- BranchInst *BrccUser = dyn_cast<BranchInst>(TheUse);
- if (!BrccUser || !BrccUser->isConditional())
- continue;
- BasicBlock *UserBB = BrccUser->getParent();
- if (UserBB == &BB) continue;
- DEBUG(dbgs() << "found Brcc use\n");
-
- // Sink the "and; icmp" to use.
- MadeChange = true;
- BinaryOperator *NewAnd =
- BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "",
- BrccUser);
- CmpInst *NewCmp =
- CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero,
- "", BrccUser);
- TheUse = NewCmp;
- ++NumAndCmpsMoved;
- DEBUG(BrccUser->getParent()->dump());
- }
- }
- return MadeChange;
-}
-
/// \brief Scale down both weights to fit into uint32_t.
static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
uint64_t NewMax = (NewTrue > NewFalse) ? NewTrue : NewFalse;