summaryrefslogtreecommitdiff
path: root/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstructionCombining.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstructionCombining.cpp380
1 files changed, 197 insertions, 183 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 4e3b18e805ee..47b6dcb67a78 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -100,7 +100,6 @@
#include "llvm/Support/KnownBits.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/InstCombine/InstCombine.h"
-#include "llvm/Transforms/InstCombine/InstCombineWorklist.h"
#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
@@ -109,11 +108,12 @@
#include <string>
#include <utility>
+#define DEBUG_TYPE "instcombine"
+#include "llvm/Transforms/Utils/InstructionWorklist.h"
+
using namespace llvm;
using namespace llvm::PatternMatch;
-#define DEBUG_TYPE "instcombine"
-
STATISTIC(NumWorklistIterations,
"Number of instruction combining iterations performed");
@@ -202,23 +202,37 @@ Value *InstCombinerImpl::EmitGEPOffset(User *GEP) {
return llvm::EmitGEPOffset(&Builder, DL, GEP);
}
+/// Legal integers and common types are considered desirable. This is used to
+/// avoid creating instructions with types that may not be supported well by the
+/// the backend.
+/// NOTE: This treats i8, i16 and i32 specially because they are common
+/// types in frontend languages.
+bool InstCombinerImpl::isDesirableIntType(unsigned BitWidth) const {
+ switch (BitWidth) {
+ case 8:
+ case 16:
+ case 32:
+ return true;
+ default:
+ return DL.isLegalInteger(BitWidth);
+ }
+}
+
/// Return true if it is desirable to convert an integer computation from a
/// given bit width to a new bit width.
/// We don't want to convert from a legal to an illegal type or from a smaller
-/// to a larger illegal type. A width of '1' is always treated as a legal type
-/// because i1 is a fundamental type in IR, and there are many specialized
-/// optimizations for i1 types. Widths of 8, 16 or 32 are equally treated as
+/// to a larger illegal type. A width of '1' is always treated as a desirable
+/// type because i1 is a fundamental type in IR, and there are many specialized
+/// optimizations for i1 types. Common/desirable widths are equally treated as
/// legal to convert to, in order to open up more combining opportunities.
-/// NOTE: this treats i8, i16 and i32 specially, due to them being so common
-/// from frontend languages.
bool InstCombinerImpl::shouldChangeType(unsigned FromWidth,
unsigned ToWidth) const {
bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth);
bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth);
- // Convert to widths of 8, 16 or 32 even if they are not legal types. Only
- // shrink types, to prevent infinite loops.
- if (ToWidth < FromWidth && (ToWidth == 8 || ToWidth == 16 || ToWidth == 32))
+ // Convert to desirable widths even if they are not legal types.
+ // Only shrink types, to prevent infinite loops.
+ if (ToWidth < FromWidth && isDesirableIntType(ToWidth))
return true;
// If this is a legal integer from type, and the result would be an illegal
@@ -359,7 +373,8 @@ Value *InstCombinerImpl::simplifyIntToPtrRoundTripCast(Value *Val) {
PtrToInt->getSrcTy()->getPointerAddressSpace() &&
DL.getPointerTypeSizeInBits(PtrToInt->getSrcTy()) ==
DL.getTypeSizeInBits(PtrToInt->getDestTy())) {
- return Builder.CreateBitCast(PtrToInt->getOperand(0), CastTy);
+ return CastInst::CreateBitOrPointerCast(PtrToInt->getOperand(0), CastTy,
+ "", PtrToInt);
}
}
return nullptr;
@@ -961,14 +976,14 @@ static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
assert(canConstantFoldCallTo(II, cast<Function>(II->getCalledOperand())) &&
"Expected constant-foldable intrinsic");
Intrinsic::ID IID = II->getIntrinsicID();
- if (II->getNumArgOperands() == 1)
+ if (II->arg_size() == 1)
return Builder.CreateUnaryIntrinsic(IID, SO);
// This works for real binary ops like min/max (where we always expect the
// constant operand to be canonicalized as op1) and unary ops with a bonus
// constant argument like ctlz/cttz.
// TODO: Handle non-commutative binary intrinsics as below for binops.
- assert(II->getNumArgOperands() == 2 && "Expected binary intrinsic");
+ assert(II->arg_size() == 2 && "Expected binary intrinsic");
assert(isa<Constant>(II->getArgOperand(1)) && "Expected constant operand");
return Builder.CreateBinaryIntrinsic(IID, SO, II->getArgOperand(1));
}
@@ -1058,7 +1073,7 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op,
// Compare for equality including undefs as equal.
auto *Cmp = ConstantExpr::getCompare(ICmpInst::ICMP_EQ, ConstA, ConstB);
const APInt *C;
- return match(Cmp, m_APIntAllowUndef(C)) && C->isOneValue();
+ return match(Cmp, m_APIntAllowUndef(C)) && C->isOne();
};
if ((areLooselyEqual(TV, Op0) && areLooselyEqual(FV, Op1)) ||
@@ -1120,9 +1135,11 @@ Instruction *InstCombinerImpl::foldOpIntoPhi(Instruction &I, PHINode *PN) {
BasicBlock *NonConstBB = nullptr;
for (unsigned i = 0; i != NumPHIValues; ++i) {
Value *InVal = PN->getIncomingValue(i);
- // If I is a freeze instruction, count undef as a non-constant.
- if (match(InVal, m_ImmConstant()) &&
- (!isa<FreezeInst>(I) || isGuaranteedNotToBeUndefOrPoison(InVal)))
+ // For non-freeze, require constant operand
+ // For freeze, require non-undef, non-poison operand
+ if (!isa<FreezeInst>(I) && match(InVal, m_ImmConstant()))
+ continue;
+ if (isa<FreezeInst>(I) && isGuaranteedNotToBeUndefOrPoison(InVal))
continue;
if (isa<PHINode>(InVal)) return nullptr; // Itself a phi.
@@ -1268,61 +1285,19 @@ Instruction *InstCombinerImpl::foldBinOpIntoSelectOrPhi(BinaryOperator &I) {
/// specified offset. If so, fill them into NewIndices and return the resultant
/// element type, otherwise return null.
Type *
-InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t Offset,
+InstCombinerImpl::FindElementAtOffset(PointerType *PtrTy, int64_t IntOffset,
SmallVectorImpl<Value *> &NewIndices) {
Type *Ty = PtrTy->getElementType();
if (!Ty->isSized())
return nullptr;
- // Start with the index over the outer type. Note that the type size
- // might be zero (even if the offset isn't zero) if the indexed type
- // is something like [0 x {int, int}]
- Type *IndexTy = DL.getIndexType(PtrTy);
- int64_t FirstIdx = 0;
- if (int64_t TySize = DL.getTypeAllocSize(Ty)) {
- FirstIdx = Offset/TySize;
- Offset -= FirstIdx*TySize;
-
- // Handle hosts where % returns negative instead of values [0..TySize).
- if (Offset < 0) {
- --FirstIdx;
- Offset += TySize;
- assert(Offset >= 0);
- }
- assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset");
- }
-
- NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx));
-
- // Index into the types. If we fail, set OrigBase to null.
- while (Offset) {
- // Indexing into tail padding between struct/array elements.
- if (uint64_t(Offset * 8) >= DL.getTypeSizeInBits(Ty))
- return nullptr;
-
- if (StructType *STy = dyn_cast<StructType>(Ty)) {
- const StructLayout *SL = DL.getStructLayout(STy);
- assert(Offset < (int64_t)SL->getSizeInBytes() &&
- "Offset must stay within the indexed type");
-
- unsigned Elt = SL->getElementContainingOffset(Offset);
- NewIndices.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
- Elt));
-
- Offset -= SL->getElementOffset(Elt);
- Ty = STy->getElementType(Elt);
- } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) {
- uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType());
- assert(EltSize && "Cannot index into a zero-sized array");
- NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize));
- Offset %= EltSize;
- Ty = AT->getElementType();
- } else {
- // Otherwise, we can't index into the middle of this atomic type, bail.
- return nullptr;
- }
- }
+ APInt Offset(DL.getIndexTypeSizeInBits(PtrTy), IntOffset);
+ SmallVector<APInt> Indices = DL.getGEPIndicesForOffset(Ty, Offset);
+ if (!Offset.isZero())
+ return nullptr;
+ for (const APInt &Index : Indices)
+ NewIndices.push_back(Builder.getInt(Index));
return Ty;
}
@@ -1623,7 +1598,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
Value *XY = Builder.CreateBinOp(Opcode, X, Y);
if (auto *BO = dyn_cast<BinaryOperator>(XY))
BO->copyIRFlags(&Inst);
- return new ShuffleVectorInst(XY, UndefValue::get(XY->getType()), M);
+ return new ShuffleVectorInst(XY, M);
};
// If both arguments of the binary operation are shuffles that use the same
@@ -1754,25 +1729,20 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
Value *X;
ArrayRef<int> MaskC;
int SplatIndex;
- BinaryOperator *BO;
+ Value *Y, *OtherOp;
if (!match(LHS,
m_OneUse(m_Shuffle(m_Value(X), m_Undef(), m_Mask(MaskC)))) ||
!match(MaskC, m_SplatOrUndefMask(SplatIndex)) ||
- X->getType() != Inst.getType() || !match(RHS, m_OneUse(m_BinOp(BO))) ||
- BO->getOpcode() != Opcode)
+ X->getType() != Inst.getType() ||
+ !match(RHS, m_OneUse(m_BinOp(Opcode, m_Value(Y), m_Value(OtherOp)))))
return nullptr;
// FIXME: This may not be safe if the analysis allows undef elements. By
// moving 'Y' before the splat shuffle, we are implicitly assuming
// that it is not undef/poison at the splat index.
- Value *Y, *OtherOp;
- if (isSplatValue(BO->getOperand(0), SplatIndex)) {
- Y = BO->getOperand(0);
- OtherOp = BO->getOperand(1);
- } else if (isSplatValue(BO->getOperand(1), SplatIndex)) {
- Y = BO->getOperand(1);
- OtherOp = BO->getOperand(0);
- } else {
+ if (isSplatValue(OtherOp, SplatIndex)) {
+ std::swap(Y, OtherOp);
+ } else if (!isSplatValue(Y, SplatIndex)) {
return nullptr;
}
@@ -1788,7 +1758,7 @@ Instruction *InstCombinerImpl::foldVectorBinop(BinaryOperator &Inst) {
// dropped to be safe.
if (isa<FPMathOperator>(R)) {
R->copyFastMathFlags(&Inst);
- R->andIRFlags(BO);
+ R->andIRFlags(RHS);
}
if (auto *NewInstBO = dyn_cast<BinaryOperator>(NewBO))
NewInstBO->copyIRFlags(R);
@@ -1896,7 +1866,8 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
Type *GEPType = GEP.getType();
Type *GEPEltType = GEP.getSourceElementType();
bool IsGEPSrcEleScalable = isa<ScalableVectorType>(GEPEltType);
- if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))
+ if (Value *V = SimplifyGEPInst(GEPEltType, Ops, GEP.isInBounds(),
+ SQ.getWithInstruction(&GEP)))
return replaceInstUsesWith(GEP, V);
// For vector geps, use the generic demanded vector support.
@@ -1905,7 +1876,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
if (auto *GEPFVTy = dyn_cast<FixedVectorType>(GEPType)) {
auto VWidth = GEPFVTy->getNumElements();
APInt UndefElts(VWidth, 0);
- APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+ APInt AllOnesEltMask(APInt::getAllOnes(VWidth));
if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask,
UndefElts)) {
if (V != &GEP)
@@ -2117,10 +2088,12 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// -- have to recreate %src & %gep
// put NewSrc at same location as %src
Builder.SetInsertPoint(cast<Instruction>(PtrOp));
- auto *NewSrc = cast<GetElementPtrInst>(
- Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()));
- NewSrc->setIsInBounds(Src->isInBounds());
- auto *NewGEP =
+ Value *NewSrc =
+ Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName());
+ // Propagate 'inbounds' if the new source was not constant-folded.
+ if (auto *NewSrcGEPI = dyn_cast<GetElementPtrInst>(NewSrc))
+ NewSrcGEPI->setIsInBounds(Src->isInBounds());
+ GetElementPtrInst *NewGEP =
GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1});
NewGEP->setIsInBounds(GEP.isInBounds());
return NewGEP;
@@ -2128,18 +2101,6 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
}
}
}
-
- // Fold (gep(gep(Ptr,Idx0),Idx1) -> gep(Ptr,add(Idx0,Idx1))
- if (GO1->getType() == SO1->getType()) {
- bool NewInBounds = GEP.isInBounds() && Src->isInBounds();
- auto *NewIdx =
- Builder.CreateAdd(GO1, SO1, GEP.getName() + ".idx",
- /*HasNUW*/ false, /*HasNSW*/ NewInBounds);
- auto *NewGEP = GetElementPtrInst::Create(
- GEPEltType, Src->getPointerOperand(), {NewIdx});
- NewGEP->setIsInBounds(NewInBounds);
- return NewGEP;
- }
}
// Note that if our source is a gep chain itself then we wait for that
@@ -2647,6 +2608,13 @@ static bool isAllocSiteRemovable(Instruction *AI,
Users.emplace_back(I);
continue;
}
+
+ if (isReallocLikeFn(I, TLI, true)) {
+ Users.emplace_back(I);
+ Worklist.push_back(I);
+ continue;
+ }
+
return false;
case Instruction::Store: {
@@ -2834,15 +2802,33 @@ static Instruction *tryToMoveFreeBeforeNullTest(CallInst &FI,
// At this point, we know that everything in FreeInstrBB can be moved
// before TI.
- for (BasicBlock::iterator It = FreeInstrBB->begin(), End = FreeInstrBB->end();
- It != End;) {
- Instruction &Instr = *It++;
+ for (Instruction &Instr : llvm::make_early_inc_range(*FreeInstrBB)) {
if (&Instr == FreeInstrBBTerminator)
break;
Instr.moveBefore(TI);
}
assert(FreeInstrBB->size() == 1 &&
"Only the branch instruction should remain");
+
+ // Now that we've moved the call to free before the NULL check, we have to
+ // remove any attributes on its parameter that imply it's non-null, because
+ // those attributes might have only been valid because of the NULL check, and
+ // we can get miscompiles if we keep them. This is conservative if non-null is
+ // also implied by something other than the NULL check, but it's guaranteed to
+ // be correct, and the conservativeness won't matter in practice, since the
+ // attributes are irrelevant for the call to free itself and the pointer
+ // shouldn't be used after the call.
+ AttributeList Attrs = FI.getAttributes();
+ Attrs = Attrs.removeParamAttribute(FI.getContext(), 0, Attribute::NonNull);
+ Attribute Dereferenceable = Attrs.getParamAttr(0, Attribute::Dereferenceable);
+ if (Dereferenceable.isValid()) {
+ uint64_t Bytes = Dereferenceable.getDereferenceableBytes();
+ Attrs = Attrs.removeParamAttribute(FI.getContext(), 0,
+ Attribute::Dereferenceable);
+ Attrs = Attrs.addDereferenceableOrNullParamAttr(FI.getContext(), 0, Bytes);
+ }
+ FI.setAttributes(Attrs);
+
return &FI;
}
@@ -2861,6 +2847,15 @@ Instruction *InstCombinerImpl::visitFree(CallInst &FI) {
if (isa<ConstantPointerNull>(Op))
return eraseInstFromFunction(FI);
+ // If we had free(realloc(...)) with no intervening uses, then eliminate the
+ // realloc() entirely.
+ if (CallInst *CI = dyn_cast<CallInst>(Op)) {
+ if (CI->hasOneUse() && isReallocLikeFn(CI, &TLI, true)) {
+ return eraseInstFromFunction(
+ *replaceInstUsesWith(*CI, CI->getOperand(0)));
+ }
+ }
+
// If we optimize for code size, try to move the call to free before the null
// test so that simplify cfg can remove the empty block and dead code
// elimination the branch. I.e., helps to turn something like:
@@ -2947,7 +2942,7 @@ Instruction *InstCombinerImpl::visitUnconditionalBranchInst(BranchInst &BI) {
auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
- return isa<DbgInfoIntrinsic>(BBI) ||
+ return BBI->isDebugOrPseudoInst() ||
(isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
};
@@ -3138,26 +3133,21 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
// checking for overflow.
const APInt *C;
if (match(WO->getRHS(), m_APInt(C))) {
- // Compute the no-wrap range [X,Y) for LHS given RHS=C, then
- // check for the inverted range using range offset trick (i.e.
- // use a subtract to shift the range to bottom of either the
- // signed or unsigned domain and then use a single compare to
- // check range membership).
+ // Compute the no-wrap range for LHS given RHS=C, then construct an
+ // equivalent icmp, potentially using an offset.
ConstantRange NWR =
ConstantRange::makeExactNoWrapRegion(WO->getBinaryOp(), *C,
WO->getNoWrapKind());
- APInt Min = WO->isSigned() ? NWR.getSignedMin() : NWR.getUnsignedMin();
- NWR = NWR.subtract(Min);
CmpInst::Predicate Pred;
- APInt NewRHSC;
- if (NWR.getEquivalentICmp(Pred, NewRHSC)) {
- auto *OpTy = WO->getRHS()->getType();
- auto *NewLHS = Builder.CreateSub(WO->getLHS(),
- ConstantInt::get(OpTy, Min));
- return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
- ConstantInt::get(OpTy, NewRHSC));
- }
+ APInt NewRHSC, Offset;
+ NWR.getEquivalentICmp(Pred, NewRHSC, Offset);
+ auto *OpTy = WO->getRHS()->getType();
+ auto *NewLHS = WO->getLHS();
+ if (Offset != 0)
+ NewLHS = Builder.CreateAdd(NewLHS, ConstantInt::get(OpTy, Offset));
+ return new ICmpInst(ICmpInst::getInversePredicate(Pred), NewLHS,
+ ConstantInt::get(OpTy, NewRHSC));
}
}
}
@@ -3183,9 +3173,7 @@ Instruction *InstCombinerImpl::visitExtractValueInst(ExtractValueInst &EV) {
Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);
// Whatever aliasing information we had for the orignal load must also
// hold for the smaller load, so propagate the annotations.
- AAMDNodes Nodes;
- L->getAAMetadata(Nodes);
- NL->setAAMetadata(Nodes);
+ NL->setAAMetadata(L->getAAMetadata());
// Returning the load directly will cause the main loop to insert it in
// the wrong spot, so use replaceInstUsesWith().
return replaceInstUsesWith(EV, NL);
@@ -3568,8 +3556,14 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
// While we could change the other users of OrigOp to use freeze(OrigOp), that
// potentially reduces their optimization potential, so let's only do this iff
// the OrigOp is only used by the freeze.
- if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp) ||
- canCreateUndefOrPoison(dyn_cast<Operator>(OrigOp)))
+ if (!OrigOpInst || !OrigOpInst->hasOneUse() || isa<PHINode>(OrigOp))
+ return nullptr;
+
+ // We can't push the freeze through an instruction which can itself create
+ // poison. If the only source of new poison is flags, we can simply
+ // strip them (since we know the only use is the freeze and nothing can
+ // benefit from them.)
+ if (canCreateUndefOrPoison(cast<Operator>(OrigOp), /*ConsiderFlags*/ false))
return nullptr;
// If operand is guaranteed not to be poison, there is no need to add freeze
@@ -3585,6 +3579,8 @@ InstCombinerImpl::pushFreezeToPreventPoisonFromPropagating(FreezeInst &OrigFI) {
return nullptr;
}
+ OrigOpInst->dropPoisonGeneratingFlags();
+
// If all operands are guaranteed to be non-poison, we can drop freeze.
if (!MaybePoisonOperand)
return OrigOp;
@@ -3668,7 +3664,7 @@ Instruction *InstCombinerImpl::visitFreeze(FreezeInst &I) {
/// instruction past all of the instructions between it and the end of its
/// block.
static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
- assert(I->getSingleUndroppableUse() && "Invariants didn't hold!");
+ assert(I->getUniqueUndroppableUser() && "Invariants didn't hold!");
BasicBlock *SrcBlock = I->getParent();
// Cannot move control-flow-involving, volatile loads, vaarg, etc.
@@ -3822,51 +3818,71 @@ bool InstCombinerImpl::run() {
// See if we can trivially sink this instruction to its user if we can
// prove that the successor is not executed more frequently than our block.
- if (EnableCodeSinking)
- if (Use *SingleUse = I->getSingleUndroppableUse()) {
- BasicBlock *BB = I->getParent();
- Instruction *UserInst = cast<Instruction>(SingleUse->getUser());
- BasicBlock *UserParent;
+ // Return the UserBlock if successful.
+ auto getOptionalSinkBlockForInst =
+ [this](Instruction *I) -> Optional<BasicBlock *> {
+ if (!EnableCodeSinking)
+ return None;
+ auto *UserInst = cast_or_null<Instruction>(I->getUniqueUndroppableUser());
+ if (!UserInst)
+ return None;
- // Get the block the use occurs in.
- if (PHINode *PN = dyn_cast<PHINode>(UserInst))
- UserParent = PN->getIncomingBlock(*SingleUse);
- else
- UserParent = UserInst->getParent();
+ BasicBlock *BB = I->getParent();
+ BasicBlock *UserParent = nullptr;
- // Try sinking to another block. If that block is unreachable, then do
- // not bother. SimplifyCFG should handle it.
- if (UserParent != BB && DT.isReachableFromEntry(UserParent)) {
- // See if the user is one of our successors that has only one
- // predecessor, so that we don't have to split the critical edge.
- bool ShouldSink = UserParent->getUniquePredecessor() == BB;
- // Another option where we can sink is a block that ends with a
- // terminator that does not pass control to other block (such as
- // return or unreachable). In this case:
- // - I dominates the User (by SSA form);
- // - the User will be executed at most once.
- // So sinking I down to User is always profitable or neutral.
- if (!ShouldSink) {
- auto *Term = UserParent->getTerminator();
- ShouldSink = isa<ReturnInst>(Term) || isa<UnreachableInst>(Term);
- }
- if (ShouldSink) {
- assert(DT.dominates(BB, UserParent) &&
- "Dominance relation broken?");
- // Okay, the CFG is simple enough, try to sink this instruction.
- if (TryToSinkInstruction(I, UserParent)) {
- LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
- MadeIRChange = true;
- // We'll add uses of the sunk instruction below, but since sinking
- // can expose opportunities for it's *operands* add them to the
- // worklist
- for (Use &U : I->operands())
- if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
- Worklist.push(OpI);
- }
+ // Special handling for Phi nodes - get the block the use occurs in.
+ if (PHINode *PN = dyn_cast<PHINode>(UserInst)) {
+ for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
+ if (PN->getIncomingValue(i) == I) {
+ // Bail out if we have uses in different blocks. We don't do any
+ // sophisticated analysis (i.e finding NearestCommonDominator of these
+ // use blocks).
+ if (UserParent && UserParent != PN->getIncomingBlock(i))
+ return None;
+ UserParent = PN->getIncomingBlock(i);
}
}
+ assert(UserParent && "expected to find user block!");
+ } else
+ UserParent = UserInst->getParent();
+
+ // Try sinking to another block. If that block is unreachable, then do
+ // not bother. SimplifyCFG should handle it.
+ if (UserParent == BB || !DT.isReachableFromEntry(UserParent))
+ return None;
+
+ auto *Term = UserParent->getTerminator();
+ // See if the user is one of our successors that has only one
+ // predecessor, so that we don't have to split the critical edge.
+ // Another option where we can sink is a block that ends with a
+ // terminator that does not pass control to other block (such as
+ // return or unreachable). In this case:
+ // - I dominates the User (by SSA form);
+ // - the User will be executed at most once.
+ // So sinking I down to User is always profitable or neutral.
+ if (UserParent->getUniquePredecessor() == BB ||
+ (isa<ReturnInst>(Term) || isa<UnreachableInst>(Term))) {
+ assert(DT.dominates(BB, UserParent) && "Dominance relation broken?");
+ return UserParent;
}
+ return None;
+ };
+
+ auto OptBB = getOptionalSinkBlockForInst(I);
+ if (OptBB) {
+ auto *UserParent = *OptBB;
+ // Okay, the CFG is simple enough, try to sink this instruction.
+ if (TryToSinkInstruction(I, UserParent)) {
+ LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
+ MadeIRChange = true;
+ // We'll add uses of the sunk instruction below, but since
+ // sinking can expose opportunities for it's *operands* add
+ // them to the worklist
+ for (Use &U : I->operands())
+ if (Instruction *OpI = dyn_cast<Instruction>(U.get()))
+ Worklist.push(OpI);
+ }
+ }
// Now that we have an instruction, try combining it to simplify it.
Builder.SetInsertPoint(I);
@@ -3994,13 +4010,13 @@ public:
/// whose condition is a known constant, we only visit the reachable successors.
static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
const TargetLibraryInfo *TLI,
- InstCombineWorklist &ICWorklist) {
+ InstructionWorklist &ICWorklist) {
bool MadeIRChange = false;
SmallPtrSet<BasicBlock *, 32> Visited;
SmallVector<BasicBlock*, 256> Worklist;
Worklist.push_back(&F.front());
- SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
+ SmallVector<Instruction *, 128> InstrsForInstructionWorklist;
DenseMap<Constant *, Constant *> FoldedConstants;
AliasScopeTracker SeenAliasScopes;
@@ -4011,25 +4027,23 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
if (!Visited.insert(BB).second)
continue;
- for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E; ) {
- Instruction *Inst = &*BBI++;
-
+ for (Instruction &Inst : llvm::make_early_inc_range(*BB)) {
// ConstantProp instruction if trivially constant.
- if (!Inst->use_empty() &&
- (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0))))
- if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) {
- LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst
+ if (!Inst.use_empty() &&
+ (Inst.getNumOperands() == 0 || isa<Constant>(Inst.getOperand(0))))
+ if (Constant *C = ConstantFoldInstruction(&Inst, DL, TLI)) {
+ LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << Inst
<< '\n');
- Inst->replaceAllUsesWith(C);
+ Inst.replaceAllUsesWith(C);
++NumConstProp;
- if (isInstructionTriviallyDead(Inst, TLI))
- Inst->eraseFromParent();
+ if (isInstructionTriviallyDead(&Inst, TLI))
+ Inst.eraseFromParent();
MadeIRChange = true;
continue;
}
// See if we can constant fold its operands.
- for (Use &U : Inst->operands()) {
+ for (Use &U : Inst.operands()) {
if (!isa<ConstantVector>(U) && !isa<ConstantExpr>(U))
continue;
@@ -4039,7 +4053,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
FoldRes = ConstantFoldConstant(C, DL, TLI);
if (FoldRes != C) {
- LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst
+ LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << Inst
<< "\n Old = " << *C
<< "\n New = " << *FoldRes << '\n');
U = FoldRes;
@@ -4050,9 +4064,9 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// Skip processing debug and pseudo intrinsics in InstCombine. Processing
// these call instructions consumes non-trivial amount of time and
// provides no value for the optimization.
- if (!Inst->isDebugOrPseudoInst()) {
- InstrsForInstCombineWorklist.push_back(Inst);
- SeenAliasScopes.analyse(Inst);
+ if (!Inst.isDebugOrPseudoInst()) {
+ InstrsForInstructionWorklist.push_back(&Inst);
+ SeenAliasScopes.analyse(&Inst);
}
}
@@ -4097,8 +4111,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
// of the function down. This jives well with the way that it adds all uses
// of instructions to the worklist after doing a transformation, thus avoiding
// some N^2 behavior in pathological cases.
- ICWorklist.reserve(InstrsForInstCombineWorklist.size());
- for (Instruction *Inst : reverse(InstrsForInstCombineWorklist)) {
+ ICWorklist.reserve(InstrsForInstructionWorklist.size());
+ for (Instruction *Inst : reverse(InstrsForInstructionWorklist)) {
// DCE instruction if trivially dead. As we iterate in reverse program
// order here, we will clean up whole chains of dead instructions.
if (isInstructionTriviallyDead(Inst, TLI) ||
@@ -4118,7 +4132,7 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,
}
static bool combineInstructionsOverFunction(
- Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,
+ Function &F, InstructionWorklist &Worklist, AliasAnalysis *AA,
AssumptionCache &AC, TargetLibraryInfo &TLI, TargetTransformInfo &TTI,
DominatorTree &DT, OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI,
ProfileSummaryInfo *PSI, unsigned MaxIterations, LoopInfo *LI) {