diff options
Diffstat (limited to 'lib/Transforms/InstCombine/InstructionCombining.cpp')
-rw-r--r-- | lib/Transforms/InstCombine/InstructionCombining.cpp | 348 |
1 files changed, 222 insertions, 126 deletions
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index be7d43bbcf2c..385f4926b845 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1,9 +1,8 @@ //===- InstructionCombining.cpp - Combine multiple instructions -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -47,14 +46,17 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" #include "llvm/Analysis/TargetFolder.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -221,6 +223,11 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) { return !Overflow; } +static bool hasNoUnsignedWrap(BinaryOperator &I) { + OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I); + return OBO && OBO->hasNoUnsignedWrap(); +} + /// Conservatively clears subclassOptionalData after a reassociation or /// commutation. We preserve fast-math flags when applicable as they can be /// preserved. @@ -327,14 +334,19 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { I.setOperand(1, V); // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. - if (MaintainNoSignedWrap(I, B, C) && + bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0); + bool IsNSW = MaintainNoSignedWrap(I, B, C); + + ClearSubclassDataAfterReassociation(I); + + if (IsNUW) + I.setHasNoUnsignedWrap(true); + + if (IsNSW && (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) { // Note: this is only valid because SimplifyBinOp doesn't look at // the operands to Op0. - I.clearSubclassOptionalData(); I.setHasNoSignedWrap(true); - } else { - ClearSubclassDataAfterReassociation(I); } Changed = true; @@ -419,8 +431,14 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode && match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) && match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) { - BinaryOperator *NewBO = BinaryOperator::Create(Opcode, A, B); - if (isa<FPMathOperator>(NewBO)) { + bool IsNUW = hasNoUnsignedWrap(I) && + hasNoUnsignedWrap(*Op0) && + hasNoUnsignedWrap(*Op1); + BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ? + BinaryOperator::CreateNUW(Opcode, A, B) : + BinaryOperator::Create(Opcode, A, B); + + if (isa<FPMathOperator>(NewBO)) { FastMathFlags Flags = I.getFastMathFlags(); Flags &= Op0->getFastMathFlags(); Flags &= Op1->getFastMathFlags(); @@ -433,6 +451,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) { // Conservatively clear the optional flags, since they may not be // preserved by the reassociation. ClearSubclassDataAfterReassociation(I); + if (IsNUW) + I.setHasNoUnsignedWrap(true); Changed = true; continue; @@ -570,32 +590,44 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I, ++NumFactor; SimplifiedInst->takeName(&I); - // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag. - // TODO: Check for NUW. + // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) { if (isa<OverflowingBinaryOperator>(SimplifiedInst)) { bool HasNSW = false; - if (isa<OverflowingBinaryOperator>(&I)) + bool HasNUW = false; + if (isa<OverflowingBinaryOperator>(&I)) { HasNSW = I.hasNoSignedWrap(); + HasNUW = I.hasNoUnsignedWrap(); + } - if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) + if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) { HasNSW &= LOBO->hasNoSignedWrap(); + HasNUW &= LOBO->hasNoUnsignedWrap(); + } - if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) + if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) { HasNSW &= ROBO->hasNoSignedWrap(); + HasNUW &= ROBO->hasNoUnsignedWrap(); + } - // We can propagate 'nsw' if we know that - // %Y = mul nsw i16 %X, C - // %Z = add nsw i16 %Y, %X - // => - // %Z = mul nsw i16 %X, C+1 - // - // iff C+1 isn't INT_MIN const APInt *CInt; if (TopLevelOpcode == Instruction::Add && - InnerOpcode == Instruction::Mul) - if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue()) - BO->setHasNoSignedWrap(HasNSW); + InnerOpcode == Instruction::Mul) { + // We can propagate 'nsw' if we know that + // %Y = mul nsw i16 %X, C + // %Z = add nsw i16 %Y, %X + // => + // %Z = mul nsw i16 %X, C+1 + // + // iff C+1 isn't INT_MIN + if (match(V, m_APInt(CInt))) { + if (!CInt->isMinSignedValue()) + BO->setHasNoSignedWrap(HasNSW); + } + + // nuw can be propagated with any constant or nuw value. + BO->setHasNoUnsignedWrap(HasNUW); + } } } } @@ -922,8 +954,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) { // If the InVal is an invoke at the end of the pred block, then we can't // insert a computation after it without breaking the edge. - if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) - if (II->getParent() == NonConstBB) + if (isa<InvokeInst>(InVal)) + if (cast<Instruction>(InVal)->getParent() == NonConstBB) return nullptr; // If the incoming non-constant value is in I's block, we will remove one @@ -1376,7 +1408,8 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) { if (match(LHS, m_ShuffleVector(m_Value(L0), m_Value(L1), m_Constant(Mask))) && match(RHS, m_ShuffleVector(m_Value(R0), m_Value(R1), m_Specific(Mask))) && LHS->hasOneUse() && RHS->hasOneUse() && - cast<ShuffleVectorInst>(LHS)->isConcat()) { + cast<ShuffleVectorInst>(LHS)->isConcat() && + cast<ShuffleVectorInst>(RHS)->isConcat()) { // This transform does not have the speculative execution constraint as // below because the shuffle is a concatenation. The new binops are // operating on exactly the same elements as the existing binop. @@ -1415,6 +1448,30 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) { return createBinOpShuffle(V1, V2, Mask); } + // If both arguments of a commutative binop are select-shuffles that use the + // same mask with commuted operands, the shuffles are unnecessary. + if (Inst.isCommutative() && + match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) && + match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1), + m_Specific(Mask)))) { + auto *LShuf = cast<ShuffleVectorInst>(LHS); + auto *RShuf = cast<ShuffleVectorInst>(RHS); + // TODO: Allow shuffles that contain undefs in the mask? + // That is legal, but it reduces undef knowledge. + // TODO: Allow arbitrary shuffles by shuffling after binop? + // That might be legal, but we have to deal with poison. + if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() && + RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) { + // Example: + // LHS = shuffle V1, V2, <0, 5, 6, 3> + // RHS = shuffle V2, V1, <0, 5, 6, 3> + // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2 + Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2); + NewBO->copyIRFlags(&Inst); + return NewBO; + } + } + // If one argument is a shuffle within one vector and the other is a constant, // try moving the shuffle after the binary operation. This canonicalization // intends to move shuffles closer to other shuffles and binops closer to @@ -1557,6 +1614,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP))) return replaceInstUsesWith(GEP, V); + // For vector geps, use the generic demanded vector support. + if (GEP.getType()->isVectorTy()) { + auto VWidth = GEP.getType()->getVectorNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask, + UndefElts)) { + if (V != &GEP) + return replaceInstUsesWith(GEP, V); + return &GEP; + } + + // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if + // possible (decide on canonical form for pointer broadcast), 3) exploit + // undef elements to decrease demanded bits + } + Value *PtrOp = GEP.getOperand(0); // Eliminate unneeded casts for indices, and replace indices which displace @@ -1755,9 +1829,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // put NewSrc at same location as %src Builder.SetInsertPoint(cast<Instruction>(PtrOp)); auto *NewSrc = cast<GetElementPtrInst>( - Builder.CreateGEP(SO0, GO1, Src->getName())); + Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName())); NewSrc->setIsInBounds(Src->isInBounds()); - auto *NewGEP = GetElementPtrInst::Create(nullptr, NewSrc, {SO1}); + auto *NewGEP = GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1}); NewGEP->setIsInBounds(GEP.isInBounds()); return NewGEP; } @@ -1881,6 +1955,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (StrippedPtr != PtrOp) { bool HasZeroPointerIndex = false; + Type *StrippedPtrEltTy = StrippedPtrTy->getElementType(); + if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1))) HasZeroPointerIndex = C->isZero(); @@ -1894,11 +1970,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (HasZeroPointerIndex) { if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) { // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? - if (CATy->getElementType() == StrippedPtrTy->getElementType()) { + if (CATy->getElementType() == StrippedPtrEltTy) { // -> GEP i8* X, ... SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end()); GetElementPtrInst *Res = GetElementPtrInst::Create( - StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName()); + StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName()); Res->setIsInBounds(GEP.isInBounds()); if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace()) return Res; @@ -1911,7 +1987,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { return new AddrSpaceCastInst(Builder.Insert(Res), GEPType); } - if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())) { + if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) { // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ? if (CATy->getElementType() == XATy->getElementType()) { // -> GEP [10 x i8]* X, i32 0, ... @@ -1934,11 +2010,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %0 = GEP [10 x i8] addrspace(1)* X, ... // addrspacecast i8 addrspace(1)* %0 to i8* SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); - Value *NewGEP = GEP.isInBounds() - ? Builder.CreateInBoundsGEP( - nullptr, StrippedPtr, Idx, GEP.getName()) - : Builder.CreateGEP(nullptr, StrippedPtr, Idx, - GEP.getName()); + Value *NewGEP = + GEP.isInBounds() + ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, + Idx, GEP.getName()) + : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx, + GEP.getName()); return new AddrSpaceCastInst(NewGEP, GEPType); } } @@ -1947,17 +2024,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // Transform things like: // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V // into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast - Type *SrcEltTy = StrippedPtrTy->getElementType(); - if (SrcEltTy->isArrayTy() && - DL.getTypeAllocSize(SrcEltTy->getArrayElementType()) == + if (StrippedPtrEltTy->isArrayTy() && + DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) == DL.getTypeAllocSize(GEPEltType)) { Type *IdxType = DL.getIndexType(GEPType); Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) }; Value *NewGEP = GEP.isInBounds() - ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx, + ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName()) - : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); + : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx, + GEP.getName()); // V and GEP are both pointer types --> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType); @@ -1967,11 +2044,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // %V = mul i64 %N, 4 // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V // into: %t1 = getelementptr i32* %arr, i32 %N; bitcast - if (GEPEltType->isSized() && SrcEltTy->isSized()) { + if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) { // Check that changing the type amounts to dividing the index by a scale // factor. uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); - uint64_t SrcSize = DL.getTypeAllocSize(SrcEltTy); + uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy); if (ResSize && SrcSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1990,9 +2067,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // GEP may not be "inbounds". Value *NewGEP = GEP.isInBounds() && NSW - ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, - GEP.getName()) - : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx, + ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, + NewIdx, GEP.getName()) + : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx, GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast @@ -2006,13 +2083,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp // (where tmp = 8*tmp2) into: // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast - if (GEPEltType->isSized() && SrcEltTy->isSized() && - SrcEltTy->isArrayTy()) { + if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() && + StrippedPtrEltTy->isArrayTy()) { // Check that changing to the array element type amounts to dividing the // index by a scale factor. uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); uint64_t ArrayEltSize = - DL.getTypeAllocSize(SrcEltTy->getArrayElementType()); + DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()); if (ResSize && ArrayEltSize % ResSize == 0) { Value *Idx = GEP.getOperand(1); unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -2032,11 +2109,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { Type *IndTy = DL.getIndexType(GEPType); Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx}; - Value *NewGEP = GEP.isInBounds() && NSW - ? Builder.CreateInBoundsGEP( - SrcEltTy, StrippedPtr, Off, GEP.getName()) - : Builder.CreateGEP(SrcEltTy, StrippedPtr, Off, - GEP.getName()); + Value *NewGEP = + GEP.isInBounds() && NSW + ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, + Off, GEP.getName()) + : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off, + GEP.getName()); // The NewGEP must be pointer typed, so must the old one -> BitCast return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType); @@ -2084,8 +2162,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { // constructing an AddrSpaceCastInst Value *NGEP = GEP.isInBounds() - ? Builder.CreateInBoundsGEP(nullptr, SrcOp, {Ops[1], Ops[2]}) - : Builder.CreateGEP(nullptr, SrcOp, {Ops[1], Ops[2]}); + ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]}) + : Builder.CreateGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]}); NGEP->takeName(&GEP); // Preserve GEP address space to satisfy users @@ -2132,8 +2210,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) { Value *NGEP = GEP.isInBounds() - ? Builder.CreateInBoundsGEP(nullptr, SrcOp, NewIndices) - : Builder.CreateGEP(nullptr, SrcOp, NewIndices); + ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices) + : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices); if (NGEP->getType() == GEPType) return replaceInstUsesWith(GEP, NGEP); @@ -2159,7 +2237,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType())); if (BasePtrOffset.ule(AllocSize)) { return GetElementPtrInst::CreateInBounds( - PtrOp, makeArrayRef(Ops).slice(1), GEP.getName()); + GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1), + GEP.getName()); } } } @@ -2296,8 +2375,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { if (II->getIntrinsicID() == Intrinsic::objectsize) { - ConstantInt *Result = lowerObjectSizeCall(II, DL, &TLI, - /*MustSucceed=*/true); + Value *Result = + lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true); replaceInstUsesWith(*I, Result); eraseInstFromFunction(*I); Users[i] = nullptr; // Skip examining in the next loop. @@ -2426,9 +2505,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) { // free undef -> unreachable. if (isa<UndefValue>(Op)) { - // Insert a new store to null because we cannot modify the CFG here. - Builder.CreateStore(ConstantInt::getTrue(FI.getContext()), - UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); + // Leave a marker since we can't modify the CFG here. + CreateNonTerminatorUnreachable(&FI); return eraseInstFromFunction(FI); } @@ -2618,53 +2696,28 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { return ExtractValueInst::Create(IV->getInsertedValueOperand(), makeArrayRef(exti, exte)); } - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { - // We're extracting from an intrinsic, see if we're the only user, which - // allows us to simplify multiple result intrinsics to simpler things that - // just get one value. - if (II->hasOneUse()) { - // Check if we're grabbing the overflow bit or the result of a 'with - // overflow' intrinsic. If it's the latter we can remove the intrinsic + if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) { + // We're extracting from an overflow intrinsic, see if we're the only user, + // which allows us to simplify multiple result intrinsics to simpler + // things that just get one value. + if (WO->hasOneUse()) { + // Check if we're grabbing only the result of a 'with overflow' intrinsic // and replace it with a traditional binary instruction. - switch (II->getIntrinsicID()) { - case Intrinsic::uadd_with_overflow: - case Intrinsic::sadd_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - replaceInstUsesWith(*II, UndefValue::get(II->getType())); - eraseInstFromFunction(*II); - return BinaryOperator::CreateAdd(LHS, RHS); - } - - // If the normal result of the add is dead, and the RHS is a constant, - // we can transform this into a range comparison. - // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 - if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow) - if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1))) - return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0), - ConstantExpr::getNot(CI)); - break; - case Intrinsic::usub_with_overflow: - case Intrinsic::ssub_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - replaceInstUsesWith(*II, UndefValue::get(II->getType())); - eraseInstFromFunction(*II); - return BinaryOperator::CreateSub(LHS, RHS); - } - break; - case Intrinsic::umul_with_overflow: - case Intrinsic::smul_with_overflow: - if (*EV.idx_begin() == 0) { // Normal result. - Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); - replaceInstUsesWith(*II, UndefValue::get(II->getType())); - eraseInstFromFunction(*II); - return BinaryOperator::CreateMul(LHS, RHS); - } - break; - default: - break; + if (*EV.idx_begin() == 0) { + Instruction::BinaryOps BinOp = WO->getBinaryOp(); + Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); + replaceInstUsesWith(*WO, UndefValue::get(WO->getType())); + eraseInstFromFunction(*WO); + return BinaryOperator::Create(BinOp, LHS, RHS); } + + // If the normal result of the add is dead, and the RHS is a constant, + // we can transform this into a range comparison. + // overflow = uadd a, -4 --> overflow = icmp ugt a, 3 + if (WO->getIntrinsicID() == Intrinsic::uadd_with_overflow) + if (ConstantInt *CI = dyn_cast<ConstantInt>(WO->getRHS())) + return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(), + ConstantExpr::getNot(CI)); } } if (LoadInst *L = dyn_cast<LoadInst>(Agg)) @@ -2687,7 +2740,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) { Builder.SetInsertPoint(L); Value *GEP = Builder.CreateInBoundsGEP(L->getType(), L->getPointerOperand(), Indices); - Instruction *NL = Builder.CreateLoad(GEP); + Instruction *NL = Builder.CreateLoad(EV.getType(), GEP); // Whatever aliasing information we had for the orignal load must also // hold for the smaller load, so propagate the annotations. AAMDNodes Nodes; @@ -3065,9 +3118,11 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { I->isTerminator()) return false; - // Do not sink alloca instructions out of the entry block. - if (isa<AllocaInst>(I) && I->getParent() == - &DestBlock->getParent()->getEntryBlock()) + // Do not sink static or dynamic alloca instructions. Static allocas must + // remain in the entry block, and dynamic allocas must not be sunk in between + // a stacksave / stackrestore pair, which would incorrectly shorten its + // lifetime. + if (isa<AllocaInst>(I)) return false; // Do not sink into catchswitch blocks. @@ -3093,13 +3148,35 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) { ++NumSunkInst; // Also sink all related debug uses from the source basic block. Otherwise we - // get debug use before the def. - SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; + // get debug use before the def. Attempt to salvage debug uses first, to + // maximise the range variables have location for. If we cannot salvage, then + // mark the location undef: we know it was supposed to receive a new location + // here, but that computation has been sunk. + SmallVector<DbgVariableIntrinsic *, 2> DbgUsers; findDbgUsers(DbgUsers, I); - for (auto *DII : DbgUsers) { + for (auto *DII : reverse(DbgUsers)) { if (DII->getParent() == SrcBlock) { - DII->moveBefore(&*InsertPos); - LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n'); + // dbg.value is in the same basic block as the sunk inst, see if we can + // salvage it. Clone a new copy of the instruction: on success we need + // both salvaged and unsalvaged copies. + SmallVector<DbgVariableIntrinsic *, 1> TmpUser{ + cast<DbgVariableIntrinsic>(DII->clone())}; + + if (!salvageDebugInfoForDbgValues(*I, TmpUser)) { + // We are unable to salvage: sink the cloned dbg.value, and mark the + // original as undef, terminating any earlier variable location. + LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n'); + TmpUser[0]->insertBefore(&*InsertPos); + Value *Undef = UndefValue::get(I->getType()); + DII->setOperand(0, MetadataAsValue::get(DII->getContext(), + ValueAsMetadata::get(Undef))); + } else { + // We successfully salvaged: place the salvaged dbg.value in the + // original location, and move the unmodified dbg.value to sink with + // the sunk inst. + TmpUser[0]->insertBefore(DII); + DII->moveBefore(&*InsertPos); + } } } return true; @@ -3294,7 +3371,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL, if (isInstructionTriviallyDead(Inst, TLI)) { ++NumDeadInst; LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); - salvageDebugInfo(*Inst); + if (!salvageDebugInfo(*Inst)) + replaceDbgUsesWithUndef(Inst); Inst->eraseFromParent(); MadeIRChange = true; continue; @@ -3407,7 +3485,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL, static bool combineInstructionsOverFunction( Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA, AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, - OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true, + OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, + ProfileSummaryInfo *PSI, bool ExpensiveCombines = true, LoopInfo *LI = nullptr) { auto &DL = F.getParent()->getDataLayout(); ExpensiveCombines |= EnableExpensiveCombines; @@ -3437,8 +3516,8 @@ static bool combineInstructionsOverFunction( MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); - InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA, - AC, TLI, DT, ORE, DL, LI); + InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA, + AC, TLI, DT, ORE, BFI, PSI, DL, LI); IC.MaxArraySizeForCombine = MaxArraySize; if (!IC.run()) @@ -3458,8 +3537,15 @@ PreservedAnalyses InstCombinePass::run(Function &F, auto *LI = AM.getCachedResult<LoopAnalysis>(F); auto *AA = &AM.getResult<AAManager>(F); + const ModuleAnalysisManager &MAM = + AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); + ProfileSummaryInfo *PSI = + MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); + auto *BFI = (PSI && PSI->hasProfileSummary()) ? + &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; + if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, - ExpensiveCombines, LI)) + BFI, PSI, ExpensiveCombines, LI)) // No changes, all analyses are preserved. return PreservedAnalyses::all(); @@ -3483,6 +3569,8 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved<AAResultsWrapperPass>(); AU.addPreserved<BasicAAWrapperPass>(); AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addRequired<ProfileSummaryInfoWrapperPass>(); + LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU); } bool InstructionCombiningPass::runOnFunction(Function &F) { @@ -3499,9 +3587,15 @@ bool InstructionCombiningPass::runOnFunction(Function &F) { // Optional analyses. auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; + ProfileSummaryInfo *PSI = + &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); + BlockFrequencyInfo *BFI = + (PSI && PSI->hasProfileSummary()) ? + &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : + nullptr; return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, - ExpensiveCombines, LI); + BFI, PSI, ExpensiveCombines, LI); } char InstructionCombiningPass::ID = 0; @@ -3514,6 +3608,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine", "Combine redundant instructions", false, false) |