diff options
Diffstat (limited to 'lib/Transforms/InstCombine/InstructionCombining.cpp')
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 611 |
1 files changed, 318 insertions, 293 deletions
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index b332e75c7feb..12fcc8752ea9 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -34,6 +34,8 @@ //===----------------------------------------------------------------------===// #include "InstCombineInternal.h" +#include "llvm-c/Initialization.h" +#include "llvm-c/Transforms/InstCombine.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" @@ -55,6 +57,7 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" @@ -72,6 +75,7 @@ #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Metadata.h" #include "llvm/IR/Operator.h" #include "llvm/IR/PassManager.h" @@ -93,8 +97,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/InstCombine/InstCombine.h" #include "llvm/Transforms/InstCombine/InstCombineWorklist.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -144,12 +146,20 @@ Value *InstCombiner::EmitGEPOffset(User *GEP) { /// We don't want to convert from a legal to an illegal type or from a smaller /// to a larger illegal type. A width of '1' is always treated as a legal type /// because i1 is a fundamental type in IR, and there are many specialized -/// optimizations for i1 types. +/// optimizations for i1 types. Widths of 8, 16 or 32 are equally treated as +/// legal to convert to, in order to open up more combining opportunities. +/// NOTE: this treats i8, i16 and i32 specially, due to them being so common +/// from frontend languages. bool InstCombiner::shouldChangeType(unsigned FromWidth, unsigned ToWidth) const { bool FromLegal = FromWidth == 1 || DL.isLegalInteger(FromWidth); bool ToLegal = ToWidth == 1 || DL.isLegalInteger(ToWidth); + // Convert to widths of 8, 16 or 32 even if they are not legal types. Only + // shrink types, to prevent infinite loops. + if (ToWidth < FromWidth && (ToWidth == 8 || ToWidth == 16 || ToWidth == 32)) + return true; + // If this is a legal integer from type, and the result would be an illegal // type, don't do the transformation. if (FromLegal && !ToLegal) @@ -396,28 +406,23 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // Transform: "(A op C1) op (B op C2)" ==> "(A op B) op (C1 op C2)" // if C1 and C2 are constants. + Value *A, *B; + Constant *C1, *C2; if (Op0 && Op1 && Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && - isa<Constant>(Op0->getOperand(1)) && - isa<Constant>(Op1->getOperand(1)) && - Op0->hasOneUse() && Op1->hasOneUse()) { - Value *A = Op0->getOperand(0); - Constant *C1 = cast<Constant>(Op0->getOperand(1)); - Value *B = Op1->getOperand(0); - Constant *C2 = cast<Constant>(Op1->getOperand(1)); - - Constant *Folded = ConstantExpr::get(Opcode, C1, C2); - BinaryOperator *New = BinaryOperator::Create(Opcode, A, B); - if (isa<FPMathOperator>(New)) { + match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) && + match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) { + BinaryOperator *NewBO = BinaryOperator::Create(Opcode, A, B); + if (isa<FPMathOperator>(NewBO)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= Op0->getFastMathFlags(); Flags &= Op1->getFastMathFlags(); - New->setFastMathFlags(Flags); + NewBO->setFastMathFlags(Flags); } - InsertNewInstWith(New, I); - New->takeName(Op1); - I.setOperand(0, New); - I.setOperand(1, Folded); + InsertNewInstWith(NewBO, I); + NewBO->takeName(Op1); + I.setOperand(0, NewBO); + I.setOperand(1, ConstantExpr::get(Opcode, C1, C2)); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); @@ -434,72 +439,38 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { /// Return whether "X LOp (Y ROp Z)" is always equal to /// "(X LOp Y) ROp (X LOp Z)". -static bool LeftDistributesOverRight(Instruction::BinaryOps LOp, +static bool leftDistributesOverRight(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp) { - switch (LOp) { - default: - return false; + // X & (Y | Z) <--> (X & Y) | (X & Z) + // X & (Y ^ Z) <--> (X & Y) ^ (X & Z) + if (LOp == Instruction::And) + return ROp == Instruction::Or || ROp == Instruction::Xor; - case Instruction::And: - // And distributes over Or and Xor. - switch (ROp) { - default: - return false; - case Instruction::Or: - case Instruction::Xor: - return true; - } + // X | (Y & Z) <--> (X | Y) & (X | Z) + if (LOp == Instruction::Or) + return ROp == Instruction::And; - case Instruction::Mul: - // Multiplication distributes over addition and subtraction. - switch (ROp) { - default: - return false; - case Instruction::Add: - case Instruction::Sub: - return true; - } + // X * (Y + Z) <--> (X * Y) + (X * Z) + // X * (Y - Z) <--> (X * Y) - (X * Z) + if (LOp == Instruction::Mul) + return ROp == Instruction::Add || ROp == Instruction::Sub; - case Instruction::Or: - // Or distributes over And. - switch (ROp) { - default: - return false; - case Instruction::And: - return true; - } - } + return false; } /// Return whether "(X LOp Y) ROp Z" is always equal to /// "(X ROp Z) LOp (Y ROp Z)". -static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, +static bool rightDistributesOverLeft(Instruction::BinaryOps LOp, Instruction::BinaryOps ROp) { if (Instruction::isCommutative(ROp)) - return LeftDistributesOverRight(ROp, LOp); + return leftDistributesOverRight(ROp, LOp); + + // (X {&|^} Y) >> Z <--> (X >> Z) {&|^} (Y >> Z) for all shifts. + return Instruction::isBitwiseLogicOp(LOp) && Instruction::isShift(ROp); - switch (LOp) { - default: - return false; - // (X >> Z) & (Y >> Z) -> (X&Y) >> Z for all shifts. - // (X >> Z) | (Y >> Z) -> (X|Y) >> Z for all shifts. - // (X >> Z) ^ (Y >> Z) -> (X^Y) >> Z for all shifts. - case Instruction::And: - case Instruction::Or: - case Instruction::Xor: - switch (ROp) { - default: - return false; - case Instruction::Shl: - case Instruction::LShr: - case Instruction::AShr: - return true; - } - } // TODO: It would be nice to handle division, aka "(X + Y)/Z = X/Z + Y/Z", // but this requires knowing that the addition does not overflow and other // such subtleties. - return false; } /// This function returns identity value for given opcode, which can be used to @@ -511,37 +482,27 @@ static Value *getIdentityValue(Instruction::BinaryOps Opcode, Value *V) { return ConstantExpr::getBinOpIdentity(Opcode, V->getType()); } -/// This function factors binary ops which can be combined using distributive -/// laws. This function tries to transform 'Op' based TopLevelOpcode to enable -/// factorization e.g for ADD(SHL(X , 2), MUL(X, 5)), When this function called -/// with TopLevelOpcode == Instruction::Add and Op = SHL(X, 2), transforms -/// SHL(X, 2) to MUL(X, 4) i.e. returns Instruction::Mul with LHS set to 'X' and -/// RHS to 4. +/// This function predicates factorization using distributive laws. By default, +/// it just returns the 'Op' inputs. But for special-cases like +/// 'add(shl(X, 5), ...)', this function will have TopOpcode == Instruction::Add +/// and Op = shl(X, 5). The 'shl' is treated as the more general 'mul X, 32' to +/// allow more factorization opportunities. static Instruction::BinaryOps -getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode, - BinaryOperator *Op, Value *&LHS, Value *&RHS) { +getBinOpsForFactorization(Instruction::BinaryOps TopOpcode, BinaryOperator *Op, + Value *&LHS, Value *&RHS) { assert(Op && "Expected a binary operator"); - LHS = Op->getOperand(0); RHS = Op->getOperand(1); - - switch (TopLevelOpcode) { - default: - return Op->getOpcode(); - - case Instruction::Add: - case Instruction::Sub: - if (Op->getOpcode() == Instruction::Shl) { - if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) { - // The multiplier is really 1 << CST. - RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST); - return Instruction::Mul; - } + if (TopOpcode == Instruction::Add || TopOpcode == Instruction::Sub) { + Constant *C; + if (match(Op, m_Shl(m_Value(), m_Constant(C)))) { + // X << C --> X * (1 << C) + RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), C); + return Instruction::Mul; } - return Op->getOpcode(); + // TODO: We can add other conversions e.g. shr => div etc. } - - // TODO: We can add other conversions e.g. shr => div etc. + return Op->getOpcode(); } /// This tries to simplify binary operations by factorizing out common terms @@ -560,7 +521,7 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I, bool InnerCommutative = Instruction::isCommutative(InnerOpcode); // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? - if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) + if (leftDistributesOverRight(InnerOpcode, TopLevelOpcode)) // Does the instruction have the form "(A op' B) op (A op' D)" or, in the // commutative case, "(A op' B) op (C op' A)"? if (A == C || (InnerCommutative && A == D)) { @@ -579,7 +540,7 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I, } // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? - if (!SimplifiedInst && RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) + if (!SimplifiedInst && rightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) // Does the instruction have the form "(A op' B) op (C op' B)" or, in the // commutative case, "(A op' B) op (B op' D)"? if (B == D || (InnerCommutative && B == C)) { @@ -664,21 +625,19 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { // term. if (Op0) if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) - if (Value *V = - tryFactorization(I, LHSOpcode, A, B, RHS, Ident)) + if (Value *V = tryFactorization(I, LHSOpcode, A, B, RHS, Ident)) return V; // The instruction has the form "(B) op (C op' D)". Try to factorize common // term. if (Op1) if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) - if (Value *V = - tryFactorization(I, RHSOpcode, LHS, Ident, C, D)) + if (Value *V = tryFactorization(I, RHSOpcode, LHS, Ident, C, D)) return V; } // Expansion. - if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { + if (Op0 && rightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { // The instruction has the form "(A op' B) op C". See if expanding it out // to "(A op C) op' (B op C)" results in simplifications. Value *A = Op0->getOperand(0), *B = Op0->getOperand(1), *C = RHS; @@ -715,7 +674,7 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { } } - if (Op1 && LeftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { + if (Op1 && leftDistributesOverRight(TopLevelOpcode, Op1->getOpcode())) { // The instruction has the form "A op (B op' C)". See if expanding it out // to "(A op B) op' (A op C)" results in simplifications. Value *A = LHS, *B = Op1->getOperand(0), *C = Op1->getOperand(1); @@ -817,23 +776,6 @@ Value *InstCombiner::dyn_castNegVal(Value *V) const { return nullptr; } -/// Given a 'fsub' instruction, return the RHS of the instruction if the LHS is -/// a constant negative zero (which is the 'negate' form). -Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const { - if (BinaryOperator::isFNeg(V, IgnoreZeroSign)) - return BinaryOperator::getFNegArgument(V); - - // Constants can be considered to be negated values if they can be folded. - if (ConstantFP *C = dyn_cast<ConstantFP>(V)) - return ConstantExpr::getFNeg(C); - - if (ConstantDataVector *C = dyn_cast<ConstantDataVector>(V)) - if (C->getType()->getElementType()->isFloatingPointTy()) - return ConstantExpr::getFNeg(C); - - return nullptr; -} - static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO, InstCombiner::BuilderTy &Builder) { if (auto *Cast = dyn_cast<CastInst>(&I)) @@ -1081,8 +1023,9 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { return replaceInstUsesWith(I, NewPN); } -Instruction *InstCombiner::foldOpWithConstantIntoOperand(BinaryOperator &I) { - assert(isa<Constant>(I.getOperand(1)) && "Unexpected operand type"); +Instruction *InstCombiner::foldBinOpIntoSelectOrPhi(BinaryOperator &I) { + if (!isa<Constant>(I.getOperand(1))) + return nullptr; if (auto *Sel = dyn_cast<SelectInst>(I.getOperand(0))) { if (Instruction *NewSel = FoldOpIntoSelect(I, Sel)) @@ -1107,7 +1050,7 @@ Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, // Start with the index over the outer type. Note that the type size // might be zero (even if the offset isn't zero) if the indexed type // is something like [0 x {int, int}] - Type *IntPtrTy = DL.getIntPtrType(PtrTy); + Type *IndexTy = DL.getIndexType(PtrTy); int64_t FirstIdx = 0; if (int64_t TySize = DL.getTypeAllocSize(Ty)) { FirstIdx = Offset/TySize; @@ -1122,7 +1065,7 @@ Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, assert((uint64_t)Offset < (uint64_t)TySize && "Out of range offset"); } - NewIndices.push_back(ConstantInt::get(IntPtrTy, FirstIdx)); + NewIndices.push_back(ConstantInt::get(IndexTy, FirstIdx)); // Index into the types. If we fail, set OrigBase to null. while (Offset) { @@ -1144,7 +1087,7 @@ Type *InstCombiner::FindElementAtOffset(PointerType *PtrTy, int64_t Offset, } else if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) { uint64_t EltSize = DL.getTypeAllocSize(AT->getElementType()); assert(EltSize && "Cannot index into a zero-sized array"); - NewIndices.push_back(ConstantInt::get(IntPtrTy,Offset/EltSize)); + NewIndices.push_back(ConstantInt::get(IndexTy,Offset/EltSize)); Offset %= EltSize; Ty = AT->getElementType(); } else { @@ -1408,22 +1351,7 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { } while (true); } -/// \brief Creates node of binary operation with the same attributes as the -/// specified one but with other operands. -static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, - InstCombiner::BuilderTy &B) { - Value *BO = B.CreateBinOp(Inst.getOpcode(), LHS, RHS); - // If LHS and RHS are constant, BO won't be a binary operator. - if (BinaryOperator *NewBO = dyn_cast<BinaryOperator>(BO)) - NewBO->copyIRFlags(&Inst); - return BO; -} - -/// \brief Makes transformation of binary operation specific for vector types. -/// \param Inst Binary operator to transform. -/// \return Pointer to node that must replace the original binary operator, or -/// null pointer if no transformation was made. -Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { +Instruction *InstCombiner::foldShuffledBinop(BinaryOperator &Inst) { if (!Inst.getType()->isVectorTy()) return nullptr; // It may not be safe to reorder shuffles and things like div, urem, etc. @@ -1437,58 +1365,71 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth); assert(cast<VectorType>(RHS->getType())->getNumElements() == VWidth); + auto createBinOpShuffle = [&](Value *X, Value *Y, Constant *M) { + Value *XY = Builder.CreateBinOp(Inst.getOpcode(), X, Y); + if (auto *BO = dyn_cast<BinaryOperator>(XY)) + BO->copyIRFlags(&Inst); + return new ShuffleVectorInst(XY, UndefValue::get(XY->getType()), M); + }; + // If both arguments of the binary operation are shuffles that use the same - // mask and shuffle within a single vector, move the shuffle after the binop: - // Op(shuffle(v1, m), shuffle(v2, m)) -> shuffle(Op(v1, v2), m) - auto *LShuf = dyn_cast<ShuffleVectorInst>(LHS); - auto *RShuf = dyn_cast<ShuffleVectorInst>(RHS); - if (LShuf && RShuf && LShuf->getMask() == RShuf->getMask() && - isa<UndefValue>(LShuf->getOperand(1)) && - isa<UndefValue>(RShuf->getOperand(1)) && - LShuf->getOperand(0)->getType() == RShuf->getOperand(0)->getType()) { - Value *NewBO = CreateBinOpAsGiven(Inst, LShuf->getOperand(0), - RShuf->getOperand(0), Builder); - return Builder.CreateShuffleVector( - NewBO, UndefValue::get(NewBO->getType()), LShuf->getMask()); + // mask and shuffle within a single vector, move the shuffle after the binop. + Value *V1, *V2; + Constant *Mask; + if (match(LHS, m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))) && + match(RHS, m_ShuffleVector(m_Value(V2), m_Undef(), m_Specific(Mask))) && + V1->getType() == V2->getType() && + (LHS->hasOneUse() || RHS->hasOneUse() || LHS == RHS)) { + // Op(shuffle(V1, Mask), shuffle(V2, Mask)) -> shuffle(Op(V1, V2), Mask) + return createBinOpShuffle(V1, V2, Mask); } - // If one argument is a shuffle within one vector, the other is a constant, - // try moving the shuffle after the binary operation. - ShuffleVectorInst *Shuffle = nullptr; - Constant *C1 = nullptr; - if (isa<ShuffleVectorInst>(LHS)) Shuffle = cast<ShuffleVectorInst>(LHS); - if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS); - if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS); - if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS); - if (Shuffle && C1 && - (isa<ConstantVector>(C1) || isa<ConstantDataVector>(C1)) && - isa<UndefValue>(Shuffle->getOperand(1)) && - Shuffle->getType() == Shuffle->getOperand(0)->getType()) { - SmallVector<int, 16> ShMask = Shuffle->getShuffleMask(); - // Find constant C2 that has property: - // shuffle(C2, ShMask) = C1 - // If such constant does not exist (example: ShMask=<0,0> and C1=<1,2>) - // reorder is not possible. - SmallVector<Constant*, 16> C2M(VWidth, - UndefValue::get(C1->getType()->getScalarType())); + // If one argument is a shuffle within one vector and the other is a constant, + // try moving the shuffle after the binary operation. This canonicalization + // intends to move shuffles closer to other shuffles and binops closer to + // other binops, so they can be folded. It may also enable demanded elements + // transforms. + Constant *C; + if (match(&Inst, m_c_BinOp( + m_OneUse(m_ShuffleVector(m_Value(V1), m_Undef(), m_Constant(Mask))), + m_Constant(C))) && + V1->getType() == Inst.getType()) { + // Find constant NewC that has property: + // shuffle(NewC, ShMask) = C + // If such constant does not exist (example: ShMask=<0,0> and C=<1,2>) + // reorder is not possible. A 1-to-1 mapping is not required. Example: + // ShMask = <1,1,2,2> and C = <5,5,6,6> --> NewC = <undef,5,6,undef> + SmallVector<int, 16> ShMask; + ShuffleVectorInst::getShuffleMask(Mask, ShMask); + SmallVector<Constant *, 16> + NewVecC(VWidth, UndefValue::get(C->getType()->getScalarType())); bool MayChange = true; for (unsigned I = 0; I < VWidth; ++I) { if (ShMask[I] >= 0) { assert(ShMask[I] < (int)VWidth); - if (!isa<UndefValue>(C2M[ShMask[I]])) { + Constant *CElt = C->getAggregateElement(I); + Constant *NewCElt = NewVecC[ShMask[I]]; + if (!CElt || (!isa<UndefValue>(NewCElt) && NewCElt != CElt)) { MayChange = false; break; } - C2M[ShMask[I]] = C1->getAggregateElement(I); + NewVecC[ShMask[I]] = CElt; } } if (MayChange) { - Constant *C2 = ConstantVector::get(C2M); - Value *NewLHS = isa<Constant>(LHS) ? C2 : Shuffle->getOperand(0); - Value *NewRHS = isa<Constant>(LHS) ? Shuffle->getOperand(0) : C2; - Value *NewBO = CreateBinOpAsGiven(Inst, NewLHS, NewRHS, Builder); - return Builder.CreateShuffleVector(NewBO, - UndefValue::get(Inst.getType()), Shuffle->getMask()); + Constant *NewC = ConstantVector::get(NewVecC); + // It may not be safe to execute a binop on a vector with undef elements + // because the entire instruction can be folded to undef or create poison + // that did not exist in the original code. + bool ConstOp1 = isa<Constant>(Inst.getOperand(1)); + if (Inst.isIntDivRem() || (Inst.isShift() && ConstOp1)) + NewC = getSafeVectorConstantForBinop(Inst.getOpcode(), NewC, ConstOp1); + + // Op(shuffle(V1, Mask), C) -> shuffle(Op(V1, NewC), Mask) + // Op(C, shuffle(V1, Mask)) -> shuffle(Op(NewC, V1), Mask) + Value *NewLHS = isa<Constant>(LHS) ? NewC : V1; + Value *NewRHS = isa<Constant>(LHS) ? V1 : NewC; + return createBinOpShuffle(NewLHS, NewRHS, Mask); } } @@ -1497,9 +1438,9 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); - - if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, - SQ.getWithInstruction(&GEP))) + Type *GEPType = GEP.getType(); + Type *GEPEltType = GEP.getSourceElementType(); + if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); Value *PtrOp = GEP.getOperand(0); @@ -1507,8 +1448,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Eliminate unneeded casts for indices, and replace indices which displace // by multiples of a zero size type with zero. bool MadeChange = false; - Type *IntPtrTy = - DL.getIntPtrType(GEP.getPointerOperandType()->getScalarType()); + + // Index width may not be the same width as pointer width. + // Data layout chooses the right type based on supported integer types. + Type *NewScalarIndexTy = + DL.getIndexType(GEP.getPointerOperandType()->getScalarType()); gep_type_iterator GTI = gep_type_begin(GEP); for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E; @@ -1517,10 +1461,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (GTI.isStruct()) continue; - // Index type should have the same width as IntPtr Type *IndexTy = (*I)->getType(); - Type *NewIndexType = IndexTy->isVectorTy() ? - VectorType::get(IntPtrTy, IndexTy->getVectorNumElements()) : IntPtrTy; + Type *NewIndexType = + IndexTy->isVectorTy() + ? VectorType::get(NewScalarIndexTy, IndexTy->getVectorNumElements()) + : NewScalarIndexTy; // If the element type has zero size then any index over it is equivalent // to an index of zero, so replace it with zero if it is not zero already. @@ -1543,8 +1488,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return &GEP; // Check to see if the inputs to the PHI node are getelementptr instructions. - if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) { - GetElementPtrInst *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0)); + if (auto *PN = dyn_cast<PHINode>(PtrOp)) { + auto *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0)); if (!Op1) return nullptr; @@ -1560,7 +1505,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { int DI = -1; for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { - GetElementPtrInst *Op2 = dyn_cast<GetElementPtrInst>(*I); + auto *Op2 = dyn_cast<GetElementPtrInst>(*I); if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) return nullptr; @@ -1602,7 +1547,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (J > 0) { if (J == 1) { CurTy = Op1->getSourceElementType(); - } else if (CompositeType *CT = dyn_cast<CompositeType>(CurTy)) { + } else if (auto *CT = dyn_cast<CompositeType>(CurTy)) { CurTy = CT->getTypeAtIndex(Op1->getOperand(J)); } else { CurTy = nullptr; @@ -1617,7 +1562,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (DI != -1 && !PN->hasOneUse()) return nullptr; - GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone()); + auto *NewGEP = cast<GetElementPtrInst>(Op1->clone()); if (DI == -1) { // All the GEPs feeding the PHI are identical. Clone one down into our // BB so that it can be merged with the current GEP. @@ -1652,15 +1597,64 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. - if (GEPOperator *Src = dyn_cast<GEPOperator>(PtrOp)) { + if (auto *Src = dyn_cast<GEPOperator>(PtrOp)) { if (!shouldMergeGEPs(*cast<GEPOperator>(&GEP), *Src)) return nullptr; + // Try to reassociate loop invariant GEP chains to enable LICM. + if (LI && Src->getNumOperands() == 2 && GEP.getNumOperands() == 2 && + Src->hasOneUse()) { + if (Loop *L = LI->getLoopFor(GEP.getParent())) { + Value *GO1 = GEP.getOperand(1); + Value *SO1 = Src->getOperand(1); + // Reassociate the two GEPs if SO1 is variant in the loop and GO1 is + // invariant: this breaks the dependence between GEPs and allows LICM + // to hoist the invariant part out of the loop. + if (L->isLoopInvariant(GO1) && !L->isLoopInvariant(SO1)) { + // We have to be careful here. + // We have something like: + // %src = getelementptr <ty>, <ty>* %base, <ty> %idx + // %gep = getelementptr <ty>, <ty>* %src, <ty> %idx2 + // If we just swap idx & idx2 then we could inadvertantly + // change %src from a vector to a scalar, or vice versa. + // Cases: + // 1) %base a scalar & idx a scalar & idx2 a vector + // => Swapping idx & idx2 turns %src into a vector type. + // 2) %base a scalar & idx a vector & idx2 a scalar + // => Swapping idx & idx2 turns %src in a scalar type + // 3) %base, %idx, and %idx2 are scalars + // => %src & %gep are scalars + // => swapping idx & idx2 is safe + // 4) %base a vector + // => %src is a vector + // => swapping idx & idx2 is safe. + auto *SO0 = Src->getOperand(0); + auto *SO0Ty = SO0->getType(); + if (!isa<VectorType>(GEPType) || // case 3 + isa<VectorType>(SO0Ty)) { // case 4 + Src->setOperand(1, GO1); + GEP.setOperand(1, SO1); + return &GEP; + } else { + // Case 1 or 2 + // -- have to recreate %src & %gep + // put NewSrc at same location as %src + Builder.SetInsertPoint(cast<Instruction>(PtrOp)); + auto *NewSrc = cast<GetElementPtrInst>( + Builder.CreateGEP(SO0, GO1, Src->getName())); + NewSrc->setIsInBounds(Src->isInBounds()); + auto *NewGEP = GetElementPtrInst::Create(nullptr, NewSrc, {SO1}); + NewGEP->setIsInBounds(GEP.isInBounds()); + return NewGEP; + } + } + } + } + // Note that if our source is a gep chain itself then we wait for that // chain to be resolved before we perform this transformation. This // avoids us creating a TON of code in some cases. - if (GEPOperator *SrcGEP = - dyn_cast<GEPOperator>(Src->getOperand(0))) + if (auto *SrcGEP = dyn_cast<GEPOperator>(Src->getOperand(0))) if (SrcGEP->getNumOperands() == 2 && shouldMergeGEPs(*Src, *SrcGEP)) return nullptr; // Wait until our source is folded to completion. @@ -1723,9 +1717,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (GEP.getNumIndices() == 1) { unsigned AS = GEP.getPointerAddressSpace(); if (GEP.getOperand(1)->getType()->getScalarSizeInBits() == - DL.getPointerSizeInBits(AS)) { - Type *Ty = GEP.getSourceElementType(); - uint64_t TyAllocSize = DL.getTypeAllocSize(Ty); + DL.getIndexSizeInBits(AS)) { + uint64_t TyAllocSize = DL.getTypeAllocSize(GEPEltType); bool Matched = false; uint64_t C; @@ -1752,22 +1745,20 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Operator *Index = cast<Operator>(V); Value *PtrToInt = Builder.CreatePtrToInt(PtrOp, Index->getType()); Value *NewSub = Builder.CreateSub(PtrToInt, Index->getOperand(1)); - return CastInst::Create(Instruction::IntToPtr, NewSub, GEP.getType()); + return CastInst::Create(Instruction::IntToPtr, NewSub, GEPType); } // Canonicalize (gep i8* X, (ptrtoint Y)-(ptrtoint X)) // to (bitcast Y) Value *Y; if (match(V, m_Sub(m_PtrToInt(m_Value(Y)), - m_PtrToInt(m_Specific(GEP.getOperand(0)))))) { - return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, - GEP.getType()); - } + m_PtrToInt(m_Specific(GEP.getOperand(0)))))) + return CastInst::CreatePointerBitCastOrAddrSpaceCast(Y, GEPType); } } } // We do not handle pointer-vector geps here. - if (GEP.getType()->isVectorTy()) + if (GEPType->isVectorTy()) return nullptr; // Handle gep(bitcast x) and gep(gep x, 0, 0, 0). @@ -1776,7 +1767,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (StrippedPtr != PtrOp) { bool HasZeroPointerIndex = false; - if (ConstantInt *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) + if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); // Transform: GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... @@ -1787,8 +1778,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // // This occurs when the program declares an array extern like "int X[];" if (HasZeroPointerIndex) { - if (ArrayType *CATy = - dyn_cast<ArrayType>(GEP.getSourceElementType())) { + if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == StrippedPtrTy->getElementType()) { // -> GEP i8* X, ... @@ -1804,11 +1794,10 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // -> // %0 = GEP i8 addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* - return new AddrSpaceCastInst(Builder.Insert(Res), GEP.getType()); + return new AddrSpaceCastInst(Builder.Insert(Res), GEPType); } - if (ArrayType *XATy = - dyn_cast<ArrayType>(StrippedPtrTy->getElementType())){ + if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())) { // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { // -> GEP [10 x i8]* X, i32 0, ... @@ -1836,7 +1825,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { nullptr, StrippedPtr, Idx, GEP.getName()) : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); - return new AddrSpaceCastInst(NewGEP, GEP.getType()); + return new AddrSpaceCastInst(NewGEP, GEPType); } } } @@ -1844,12 +1833,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Transform things like: // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast - Type *SrcElTy = StrippedPtrTy->getElementType(); - Type *ResElTy = GEP.getSourceElementType(); - if (SrcElTy->isArrayTy() && - DL.getTypeAllocSize(SrcElTy->getArrayElementType()) == - DL.getTypeAllocSize(ResElTy)) { - Type *IdxType = DL.getIntPtrType(GEP.getType()); + Type *SrcEltTy = StrippedPtrTy->getElementType(); + if (SrcEltTy->isArrayTy() && + DL.getTypeAllocSize(SrcEltTy->getArrayElementType()) == + DL.getTypeAllocSize(GEPEltType)) { + Type *IdxType = DL.getIndexType(GEPType); Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() @@ -1858,28 +1846,28 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); // V and GEP are both pointer types --> BitCast - return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, - GEP.getType()); + return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType); } // Transform things like: // %V = mul i64 %N, 4 // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast - if (ResElTy->isSized() && SrcElTy->isSized()) { + if (GEPEltType->isSized() && SrcEltTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. - uint64_t ResSize = DL.getTypeAllocSize(ResElTy); - uint64_t SrcSize = DL.getTypeAllocSize(SrcElTy); + uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); + uint64_t SrcSize = DL.getTypeAllocSize(SrcEltTy); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); uint64_t Scale = SrcSize / ResSize; - // Earlier transforms ensure that the index has type IntPtrType, which - // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && - "Index not cast to pointer width?"); + // Earlier transforms ensure that the index has the right type + // according to Data Layout, which considerably simplifies the + // logic by eliminating implicit casts. + assert(Idx->getType() == DL.getIndexType(GEPType) && + "Index type does not match the Data Layout preferences"); bool NSW; if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { @@ -1895,7 +1883,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, - GEP.getType()); + GEPType); } } } @@ -1904,39 +1892,40 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (ResElTy->isSized() && SrcElTy->isSized() && SrcElTy->isArrayTy()) { + if (GEPEltType->isSized() && SrcEltTy->isSized() && + SrcEltTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. - uint64_t ResSize = DL.getTypeAllocSize(ResElTy); + uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); uint64_t ArrayEltSize = - DL.getTypeAllocSize(SrcElTy->getArrayElementType()); + DL.getTypeAllocSize(SrcEltTy->getArrayElementType()); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); uint64_t Scale = ArrayEltSize / ResSize; - // Earlier transforms ensure that the index has type IntPtrType, which - // considerably simplifies the logic by eliminating implicit casts. - assert(Idx->getType() == DL.getIntPtrType(GEP.getType()) && - "Index not cast to pointer width?"); + // Earlier transforms ensure that the index has the right type + // according to the Data Layout, which considerably simplifies + // the logic by eliminating implicit casts. + assert(Idx->getType() == DL.getIndexType(GEPType) && + "Index type does not match the Data Layout preferences"); bool NSW; if (Value *NewIdx = Descale(Idx, APInt(BitWidth, Scale), NSW)) { // Successfully decomposed Idx as NewIdx * Scale, form a new GEP. // If the multiplication NewIdx * Scale may overflow then the new // GEP may not be "inbounds". - Value *Off[2] = { - Constant::getNullValue(DL.getIntPtrType(GEP.getType())), - NewIdx}; + Type *IndTy = DL.getIndexType(GEPType); + Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx}; Value *NewGEP = GEP.isInBounds() && NSW ? Builder.CreateInBoundsGEP( - SrcElTy, StrippedPtr, Off, GEP.getName()) - : Builder.CreateGEP(SrcElTy, StrippedPtr, Off, + SrcEltTy, StrippedPtr, Off, GEP.getName()) + : Builder.CreateGEP(SrcEltTy, StrippedPtr, Off, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, - GEP.getType()); + GEPType); } } } @@ -1946,34 +1935,53 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // addrspacecast between types is canonicalized as a bitcast, then an // addrspacecast. To take advantage of the below bitcast + struct GEP, look // through the addrspacecast. - if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) { + Value *ASCStrippedPtrOp = PtrOp; + if (auto *ASC = dyn_cast<AddrSpaceCastInst>(PtrOp)) { // X = bitcast A addrspace(1)* to B addrspace(1)* // Y = addrspacecast A addrspace(1)* to B addrspace(2)* // Z = gep Y, <...constant indices...> // Into an addrspacecasted GEP of the struct. - if (BitCastInst *BC = dyn_cast<BitCastInst>(ASC->getOperand(0))) - PtrOp = BC; + if (auto *BC = dyn_cast<BitCastInst>(ASC->getOperand(0))) + ASCStrippedPtrOp = BC; } - /// See if we can simplify: - /// X = bitcast A* to B* - /// Y = gep X, <...constant indices...> - /// into a gep of the original struct. This is important for SROA and alias - /// analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(PtrOp)) { - Value *Operand = BCI->getOperand(0); - PointerType *OpType = cast<PointerType>(Operand->getType()); - unsigned OffsetBits = DL.getPointerTypeSizeInBits(GEP.getType()); - APInt Offset(OffsetBits, 0); - if (!isa<BitCastInst>(Operand) && - GEP.accumulateConstantOffset(DL, Offset)) { + if (auto *BCI = dyn_cast<BitCastInst>(ASCStrippedPtrOp)) { + Value *SrcOp = BCI->getOperand(0); + PointerType *SrcType = cast<PointerType>(BCI->getSrcTy()); + Type *SrcEltType = SrcType->getElementType(); + + // GEP directly using the source operand if this GEP is accessing an element + // of a bitcasted pointer to vector or array of the same dimensions: + // gep (bitcast <c x ty>* X to [c x ty]*), Y, Z --> gep X, Y, Z + // gep (bitcast [c x ty]* X to <c x ty>*), Y, Z --> gep X, Y, Z + auto areMatchingArrayAndVecTypes = [](Type *ArrTy, Type *VecTy) { + return ArrTy->getArrayElementType() == VecTy->getVectorElementType() && + ArrTy->getArrayNumElements() == VecTy->getVectorNumElements(); + }; + if (GEP.getNumOperands() == 3 && + ((GEPEltType->isArrayTy() && SrcEltType->isVectorTy() && + areMatchingArrayAndVecTypes(GEPEltType, SrcEltType)) || + (GEPEltType->isVectorTy() && SrcEltType->isArrayTy() && + areMatchingArrayAndVecTypes(SrcEltType, GEPEltType)))) { + GEP.setOperand(0, SrcOp); + GEP.setSourceElementType(SrcEltType); + return &GEP; + } + // See if we can simplify: + // X = bitcast A* to B* + // Y = gep X, <...constant indices...> + // into a gep of the original struct. This is important for SROA and alias + // analysis of unions. If "A" is also a bitcast, wait for A/X to be merged. + unsigned OffsetBits = DL.getIndexTypeSizeInBits(GEPType); + APInt Offset(OffsetBits, 0); + if (!isa<BitCastInst>(SrcOp) && GEP.accumulateConstantOffset(DL, Offset)) { // If this GEP instruction doesn't move the pointer, just replace the GEP // with a bitcast of the real input to the dest type. if (!Offset) { // If the bitcast is of an allocation, and the allocation will be // converted to match the type of the cast, don't touch this. - if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, &TLI)) { + if (isa<AllocaInst>(SrcOp) || isAllocationFn(SrcOp, &TLI)) { // See if the bitcast simplifies, if so, don't nuke this GEP yet. if (Instruction *I = visitBitCast(*BCI)) { if (I != BCI) { @@ -1985,43 +1993,43 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { } } - if (Operand->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) - return new AddrSpaceCastInst(Operand, GEP.getType()); - return new BitCastInst(Operand, GEP.getType()); + if (SrcType->getPointerAddressSpace() != GEP.getAddressSpace()) + return new AddrSpaceCastInst(SrcOp, GEPType); + return new BitCastInst(SrcOp, GEPType); } // Otherwise, if the offset is non-zero, we need to find out if there is a // field at Offset in 'A's type. If so, we can pull the cast through the // GEP. SmallVector<Value*, 8> NewIndices; - if (FindElementAtOffset(OpType, Offset.getSExtValue(), NewIndices)) { + if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() - ? Builder.CreateInBoundsGEP(nullptr, Operand, NewIndices) - : Builder.CreateGEP(nullptr, Operand, NewIndices); + ? Builder.CreateInBoundsGEP(nullptr, SrcOp, NewIndices) + : Builder.CreateGEP(nullptr, SrcOp, NewIndices); - if (NGEP->getType() == GEP.getType()) + if (NGEP->getType() == GEPType) return replaceInstUsesWith(GEP, NGEP); NGEP->takeName(&GEP); if (NGEP->getType()->getPointerAddressSpace() != GEP.getAddressSpace()) - return new AddrSpaceCastInst(NGEP, GEP.getType()); - return new BitCastInst(NGEP, GEP.getType()); + return new AddrSpaceCastInst(NGEP, GEPType); + return new BitCastInst(NGEP, GEPType); } } } if (!GEP.isInBounds()) { - unsigned PtrWidth = - DL.getPointerSizeInBits(PtrOp->getType()->getPointerAddressSpace()); - APInt BasePtrOffset(PtrWidth, 0); + unsigned IdxWidth = + DL.getIndexSizeInBits(PtrOp->getType()->getPointerAddressSpace()); + APInt BasePtrOffset(IdxWidth, 0); Value *UnderlyingPtrOp = PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL, BasePtrOffset); if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) { if (GEP.accumulateConstantOffset(DL, BasePtrOffset) && BasePtrOffset.isNonNegative()) { - APInt AllocSize(PtrWidth, DL.getTypeAllocSize(AI->getAllocatedType())); + APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType())); if (BasePtrOffset.ule(AllocSize)) { return GetElementPtrInst::CreateInBounds( PtrOp, makeArrayRef(Ops).slice(1), GEP.getName()); @@ -2198,7 +2206,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { return nullptr; } -/// \brief Move the call to free before a NULL test. +/// Move the call to free before a NULL test. /// /// Check if this free is accessed after its argument has been test /// against NULL (property 0). @@ -2562,6 +2570,7 @@ static bool isCatchAll(EHPersonality Personality, Constant *TypeInfo) { case EHPersonality::MSVC_Win64SEH: case EHPersonality::MSVC_CXX: case EHPersonality::CoreCLR: + case EHPersonality::Wasm_CXX: return TypeInfo->isNullValue(); } llvm_unreachable("invalid enum"); @@ -2889,6 +2898,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { /// block. static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { assert(I->hasOneUse() && "Invariants didn't hold!"); + BasicBlock *SrcBlock = I->getParent(); // Cannot move control-flow-involving, volatile loads, vaarg, etc. if (isa<PHINode>(I) || I->isEHPad() || I->mayHaveSideEffects() || @@ -2918,10 +2928,20 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { if (Scan->mayWriteToMemory()) return false; } - BasicBlock::iterator InsertPos = DestBlock->getFirstInsertionPt(); I->moveBefore(&*InsertPos); ++NumSunkInst; + + // Also sink all related debug uses from the source basic block. Otherwise we + // get debug use before the def. + SmallVector<DbgInfoIntrinsic *, 1> DbgUsers; + findDbgUsers(DbgUsers, I); + for (auto *DII : DbgUsers) { + if (DII->getParent() == SrcBlock) { + DII->moveBefore(&*InsertPos); + LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n'); + } + } return true; } @@ -2932,7 +2952,7 @@ bool InstCombiner::run() { // Check to see if we can DCE the instruction. if (isInstructionTriviallyDead(I, &TLI)) { - DEBUG(dbgs() << "IC: DCE: " << *I << '\n'); + LLVM_DEBUG(dbgs() << "IC: DCE: " << *I << '\n'); eraseInstFromFunction(*I); ++NumDeadInst; MadeIRChange = true; @@ -2946,7 +2966,8 @@ bool InstCombiner::run() { if (!I->use_empty() && (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) { if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) { - DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n'); + LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I + << '\n'); // Add operands to the worklist. replaceInstUsesWith(*I, C); @@ -2965,8 +2986,8 @@ bool InstCombiner::run() { KnownBits Known = computeKnownBits(I, /*Depth*/0, I); if (Known.isConstant()) { Constant *C = ConstantInt::get(Ty, Known.getConstant()); - DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C << - " from: " << *I << '\n'); + LLVM_DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C + << " from: " << *I << '\n'); // Add operands to the worklist. replaceInstUsesWith(*I, C); @@ -3005,7 +3026,7 @@ bool InstCombiner::run() { if (UserIsSuccessor && UserParent->getUniquePredecessor()) { // Okay, the CFG is simple enough, try to sink this instruction. if (TryToSinkInstruction(I, UserParent)) { - DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); + LLVM_DEBUG(dbgs() << "IC: Sink: " << *I << '\n'); MadeIRChange = true; // We'll add uses of the sunk instruction below, but since sinking // can expose opportunities for it's *operands* add them to the @@ -3025,15 +3046,15 @@ bool InstCombiner::run() { #ifndef NDEBUG std::string OrigI; #endif - DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); - DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); + LLVM_DEBUG(raw_string_ostream SS(OrigI); I->print(SS); OrigI = SS.str();); + LLVM_DEBUG(dbgs() << "IC: Visiting: " << OrigI << '\n'); if (Instruction *Result = visit(*I)) { ++NumCombined; // Should we replace the old instruction with a new one? if (Result != I) { - DEBUG(dbgs() << "IC: Old = " << *I << '\n' - << " New = " << *Result << '\n'); + LLVM_DEBUG(dbgs() << "IC: Old = " << *I << '\n' + << " New = " << *Result << '\n'); if (I->getDebugLoc()) Result->setDebugLoc(I->getDebugLoc()); @@ -3060,8 +3081,8 @@ bool InstCombiner::run() { eraseInstFromFunction(*I); } else { - DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' - << " New = " << *I << '\n'); + LLVM_DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n' + << " New = " << *I << '\n'); // If the instruction was modified, it's possible that it is now dead. // if so, remove it. @@ -3112,7 +3133,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, // DCE instruction if trivially dead. if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; - DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); + LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); salvageDebugInfo(*Inst); Inst->eraseFromParent(); MadeIRChange = true; @@ -3123,8 +3144,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, if (!Inst->use_empty() && (Inst->getNumOperands() == 0 || isa<Constant>(Inst->getOperand(0)))) if (Constant *C = ConstantFoldInstruction(Inst, DL, TLI)) { - DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " - << *Inst << '\n'); + LLVM_DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *Inst + << '\n'); Inst->replaceAllUsesWith(C); ++NumConstProp; if (isInstructionTriviallyDead(Inst, TLI)) @@ -3146,9 +3167,9 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, FoldRes = C; if (FoldRes != C) { - DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst - << "\n Old = " << *C - << "\n New = " << *FoldRes << '\n'); + LLVM_DEBUG(dbgs() << "IC: ConstFold operand of: " << *Inst + << "\n Old = " << *C + << "\n New = " << *FoldRes << '\n'); U = FoldRes; MadeIRChange = true; } @@ -3191,7 +3212,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, return MadeIRChange; } -/// \brief Populate the IC worklist from a function, and prune any dead basic +/// Populate the IC worklist from a function, and prune any dead basic /// blocks discovered in the process. /// /// This also does basic constant propagation and other forward fixing to make @@ -3251,8 +3272,8 @@ static bool combineInstructionsOverFunction( int Iteration = 0; while (true) { ++Iteration; - DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " - << F.getName() << "\n"); + LLVM_DEBUG(dbgs() << "\n\nINSTCOMBINE ITERATION #" << Iteration << " on " + << F.getName() << "\n"); MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); @@ -3348,3 +3369,7 @@ void LLVMInitializeInstCombine(LLVMPassRegistryRef R) { FunctionPass *llvm::createInstructionCombiningPass(bool ExpensiveCombines) { return new InstructionCombiningPass(ExpensiveCombines); } + +void LLVMAddInstructionCombiningPass(LLVMPassManagerRef PM) { + unwrap(PM)->add(createInstructionCombiningPass()); +} |