diff options
Diffstat (limited to 'contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp')
| -rw-r--r-- | contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 337 | 
1 files changed, 215 insertions, 122 deletions
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index fef051aa1b7c..385f4926b845 100644 --- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1,9 +1,8 @@  //===- InstructionCombining.cpp - Combine multiple instructions -----------===//  // -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception  //  //===----------------------------------------------------------------------===//  // @@ -47,14 +46,17 @@  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h"  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Analysis/EHPersonalities.h"  #include "llvm/Analysis/GlobalsModRef.h"  #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LazyBlockFrequencyInfo.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/MemoryBuiltins.h"  #include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/TargetFolder.h"  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/ValueTracking.h" @@ -221,6 +223,11 @@ static bool MaintainNoSignedWrap(BinaryOperator &I, Value *B, Value *C) {    return !Overflow;  } +static bool hasNoUnsignedWrap(BinaryOperator &I) { +  OverflowingBinaryOperator *OBO = dyn_cast<OverflowingBinaryOperator>(&I); +  return OBO && OBO->hasNoUnsignedWrap(); +} +  /// Conservatively clears subclassOptionalData after a reassociation or  /// commutation. We preserve fast-math flags when applicable as they can be  /// preserved. @@ -327,14 +334,19 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {            I.setOperand(1, V);            // Conservatively clear the optional flags, since they may not be            // preserved by the reassociation. -          if (MaintainNoSignedWrap(I, B, C) && +          bool IsNUW = hasNoUnsignedWrap(I) && hasNoUnsignedWrap(*Op0); +          bool IsNSW = MaintainNoSignedWrap(I, B, C); + +          ClearSubclassDataAfterReassociation(I); + +          if (IsNUW) +            I.setHasNoUnsignedWrap(true); + +          if (IsNSW &&                (!Op0 || (isa<BinaryOperator>(Op0) && Op0->hasNoSignedWrap()))) {              // Note: this is only valid because SimplifyBinOp doesn't look at              // the operands to Op0. -            I.clearSubclassOptionalData();              I.setHasNoSignedWrap(true); -          } else { -            ClearSubclassDataAfterReassociation(I);            }            Changed = true; @@ -419,8 +431,14 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {            Op0->getOpcode() == Opcode && Op1->getOpcode() == Opcode &&            match(Op0, m_OneUse(m_BinOp(m_Value(A), m_Constant(C1)))) &&            match(Op1, m_OneUse(m_BinOp(m_Value(B), m_Constant(C2))))) { -        BinaryOperator *NewBO = BinaryOperator::Create(Opcode, A, B); -        if (isa<FPMathOperator>(NewBO)) { +        bool IsNUW = hasNoUnsignedWrap(I) && +           hasNoUnsignedWrap(*Op0) && +           hasNoUnsignedWrap(*Op1); +         BinaryOperator *NewBO = (IsNUW && Opcode == Instruction::Add) ? +           BinaryOperator::CreateNUW(Opcode, A, B) : +           BinaryOperator::Create(Opcode, A, B); + +         if (isa<FPMathOperator>(NewBO)) {            FastMathFlags Flags = I.getFastMathFlags();            Flags &= Op0->getFastMathFlags();            Flags &= Op1->getFastMathFlags(); @@ -433,6 +451,8 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {          // Conservatively clear the optional flags, since they may not be          // preserved by the reassociation.          ClearSubclassDataAfterReassociation(I); +        if (IsNUW) +          I.setHasNoUnsignedWrap(true);          Changed = true;          continue; @@ -570,32 +590,44 @@ Value *InstCombiner::tryFactorization(BinaryOperator &I,      ++NumFactor;      SimplifiedInst->takeName(&I); -    // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag. -    // TODO: Check for NUW. +    // Check if we can add NSW/NUW flags to SimplifiedInst. If so, set them.      if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) {        if (isa<OverflowingBinaryOperator>(SimplifiedInst)) {          bool HasNSW = false; -        if (isa<OverflowingBinaryOperator>(&I)) +        bool HasNUW = false; +        if (isa<OverflowingBinaryOperator>(&I)) {            HasNSW = I.hasNoSignedWrap(); +          HasNUW = I.hasNoUnsignedWrap(); +        } -        if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) +        if (auto *LOBO = dyn_cast<OverflowingBinaryOperator>(LHS)) {            HasNSW &= LOBO->hasNoSignedWrap(); +          HasNUW &= LOBO->hasNoUnsignedWrap(); +        } -        if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) +        if (auto *ROBO = dyn_cast<OverflowingBinaryOperator>(RHS)) {            HasNSW &= ROBO->hasNoSignedWrap(); +          HasNUW &= ROBO->hasNoUnsignedWrap(); +        } -        // We can propagate 'nsw' if we know that -        //  %Y = mul nsw i16 %X, C -        //  %Z = add nsw i16 %Y, %X -        // => -        //  %Z = mul nsw i16 %X, C+1 -        // -        // iff C+1 isn't INT_MIN          const APInt *CInt;          if (TopLevelOpcode == Instruction::Add && -            InnerOpcode == Instruction::Mul) -          if (match(V, m_APInt(CInt)) && !CInt->isMinSignedValue()) -            BO->setHasNoSignedWrap(HasNSW); +            InnerOpcode == Instruction::Mul) { +          // We can propagate 'nsw' if we know that +          //  %Y = mul nsw i16 %X, C +          //  %Z = add nsw i16 %Y, %X +          // => +          //  %Z = mul nsw i16 %X, C+1 +          // +          // iff C+1 isn't INT_MIN +          if (match(V, m_APInt(CInt))) { +            if (!CInt->isMinSignedValue()) +              BO->setHasNoSignedWrap(HasNSW); +          } + +          // nuw can be propagated with any constant or nuw value. +          BO->setHasNoUnsignedWrap(HasNUW); +        }        }      }    } @@ -922,8 +954,8 @@ Instruction *InstCombiner::foldOpIntoPhi(Instruction &I, PHINode *PN) {      // If the InVal is an invoke at the end of the pred block, then we can't      // insert a computation after it without breaking the edge. -    if (InvokeInst *II = dyn_cast<InvokeInst>(InVal)) -      if (II->getParent() == NonConstBB) +    if (isa<InvokeInst>(InVal)) +      if (cast<Instruction>(InVal)->getParent() == NonConstBB)          return nullptr;      // If the incoming non-constant value is in I's block, we will remove one @@ -1416,6 +1448,30 @@ Instruction *InstCombiner::foldVectorBinop(BinaryOperator &Inst) {      return createBinOpShuffle(V1, V2, Mask);    } +  // If both arguments of a commutative binop are select-shuffles that use the +  // same mask with commuted operands, the shuffles are unnecessary. +  if (Inst.isCommutative() && +      match(LHS, m_ShuffleVector(m_Value(V1), m_Value(V2), m_Constant(Mask))) && +      match(RHS, m_ShuffleVector(m_Specific(V2), m_Specific(V1), +                                 m_Specific(Mask)))) { +    auto *LShuf = cast<ShuffleVectorInst>(LHS); +    auto *RShuf = cast<ShuffleVectorInst>(RHS); +    // TODO: Allow shuffles that contain undefs in the mask? +    //       That is legal, but it reduces undef knowledge. +    // TODO: Allow arbitrary shuffles by shuffling after binop? +    //       That might be legal, but we have to deal with poison. +    if (LShuf->isSelect() && !LShuf->getMask()->containsUndefElement() && +        RShuf->isSelect() && !RShuf->getMask()->containsUndefElement()) { +      // Example: +      // LHS = shuffle V1, V2, <0, 5, 6, 3> +      // RHS = shuffle V2, V1, <0, 5, 6, 3> +      // LHS + RHS --> (V10+V20, V21+V11, V22+V12, V13+V23) --> V1 + V2 +      Instruction *NewBO = BinaryOperator::Create(Opcode, V1, V2); +      NewBO->copyIRFlags(&Inst); +      return NewBO; +    } +  } +    // If one argument is a shuffle within one vector and the other is a constant,    // try moving the shuffle after the binary operation. This canonicalization    // intends to move shuffles closer to other shuffles and binops closer to @@ -1558,6 +1614,23 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {    if (Value *V = SimplifyGEPInst(GEPEltType, Ops, SQ.getWithInstruction(&GEP)))      return replaceInstUsesWith(GEP, V); +  // For vector geps, use the generic demanded vector support. +  if (GEP.getType()->isVectorTy()) { +    auto VWidth = GEP.getType()->getVectorNumElements(); +    APInt UndefElts(VWidth, 0); +    APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); +    if (Value *V = SimplifyDemandedVectorElts(&GEP, AllOnesEltMask, +                                              UndefElts)) { +      if (V != &GEP) +        return replaceInstUsesWith(GEP, V); +      return &GEP; +    } + +    // TODO: 1) Scalarize splat operands, 2) scalarize entire instruction if +    // possible (decide on canonical form for pointer broadcast), 3) exploit +    // undef elements to decrease demanded bits   +  } +    Value *PtrOp = GEP.getOperand(0);    // Eliminate unneeded casts for indices, and replace indices which displace @@ -1756,9 +1829,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {              // put NewSrc at same location as %src              Builder.SetInsertPoint(cast<Instruction>(PtrOp));              auto *NewSrc = cast<GetElementPtrInst>( -                Builder.CreateGEP(SO0, GO1, Src->getName())); +                Builder.CreateGEP(GEPEltType, SO0, GO1, Src->getName()));              NewSrc->setIsInBounds(Src->isInBounds()); -            auto *NewGEP = GetElementPtrInst::Create(nullptr, NewSrc, {SO1}); +            auto *NewGEP = GetElementPtrInst::Create(GEPEltType, NewSrc, {SO1});              NewGEP->setIsInBounds(GEP.isInBounds());              return NewGEP;            } @@ -1882,6 +1955,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {    if (StrippedPtr != PtrOp) {      bool HasZeroPointerIndex = false; +    Type *StrippedPtrEltTy = StrippedPtrTy->getElementType(); +      if (auto *C = dyn_cast<ConstantInt>(GEP.getOperand(1)))        HasZeroPointerIndex = C->isZero(); @@ -1895,11 +1970,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {      if (HasZeroPointerIndex) {        if (auto *CATy = dyn_cast<ArrayType>(GEPEltType)) {          // GEP (bitcast i8* X to [0 x i8]*), i32 0, ... ? -        if (CATy->getElementType() == StrippedPtrTy->getElementType()) { +        if (CATy->getElementType() == StrippedPtrEltTy) {            // -> GEP i8* X, ...            SmallVector<Value*, 8> Idx(GEP.idx_begin()+1, GEP.idx_end());            GetElementPtrInst *Res = GetElementPtrInst::Create( -              StrippedPtrTy->getElementType(), StrippedPtr, Idx, GEP.getName()); +              StrippedPtrEltTy, StrippedPtr, Idx, GEP.getName());            Res->setIsInBounds(GEP.isInBounds());            if (StrippedPtrTy->getAddressSpace() == GEP.getAddressSpace())              return Res; @@ -1912,7 +1987,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {            return new AddrSpaceCastInst(Builder.Insert(Res), GEPType);          } -        if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrTy->getElementType())) { +        if (auto *XATy = dyn_cast<ArrayType>(StrippedPtrEltTy)) {            // GEP (bitcast [10 x i8]* X to [0 x i8]*), i32 0, ... ?            if (CATy->getElementType() == XATy->getElementType()) {              // -> GEP [10 x i8]* X, i32 0, ... @@ -1935,11 +2010,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {              // %0 = GEP [10 x i8] addrspace(1)* X, ...              // addrspacecast i8 addrspace(1)* %0 to i8*              SmallVector<Value*, 8> Idx(GEP.idx_begin(), GEP.idx_end()); -            Value *NewGEP = GEP.isInBounds() -                                ? Builder.CreateInBoundsGEP( -                                      nullptr, StrippedPtr, Idx, GEP.getName()) -                                : Builder.CreateGEP(nullptr, StrippedPtr, Idx, -                                                    GEP.getName()); +            Value *NewGEP = +                GEP.isInBounds() +                    ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, +                                                Idx, GEP.getName()) +                    : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx, +                                        GEP.getName());              return new AddrSpaceCastInst(NewGEP, GEPType);            }          } @@ -1948,17 +2024,17 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        // Transform things like:        // %t = getelementptr i32* bitcast ([2 x i32]* %str to i32*), i32 %V        // into:  %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast -      Type *SrcEltTy = StrippedPtrTy->getElementType(); -      if (SrcEltTy->isArrayTy() && -          DL.getTypeAllocSize(SrcEltTy->getArrayElementType()) == +      if (StrippedPtrEltTy->isArrayTy() && +          DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType()) ==                DL.getTypeAllocSize(GEPEltType)) {          Type *IdxType = DL.getIndexType(GEPType);          Value *Idx[2] = { Constant::getNullValue(IdxType), GEP.getOperand(1) };          Value *NewGEP =              GEP.isInBounds() -                ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, Idx, +                ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, Idx,                                              GEP.getName()) -                : Builder.CreateGEP(nullptr, StrippedPtr, Idx, GEP.getName()); +                : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Idx, +                                    GEP.getName());          // V and GEP are both pointer types --> BitCast          return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP, GEPType); @@ -1968,11 +2044,11 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        // %V = mul i64 %N, 4        // %t = getelementptr i8* bitcast (i32* %arr to i8*), i32 %V        // into:  %t1 = getelementptr i32* %arr, i32 %N; bitcast -      if (GEPEltType->isSized() && SrcEltTy->isSized()) { +      if (GEPEltType->isSized() && StrippedPtrEltTy->isSized()) {          // Check that changing the type amounts to dividing the index by a scale          // factor.          uint64_t ResSize = DL.getTypeAllocSize(GEPEltType); -        uint64_t SrcSize = DL.getTypeAllocSize(SrcEltTy); +        uint64_t SrcSize = DL.getTypeAllocSize(StrippedPtrEltTy);          if (ResSize && SrcSize % ResSize == 0) {            Value *Idx = GEP.getOperand(1);            unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -1991,9 +2067,9 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {              // GEP may not be "inbounds".              Value *NewGEP =                  GEP.isInBounds() && NSW -                    ? Builder.CreateInBoundsGEP(nullptr, StrippedPtr, NewIdx, -                                                GEP.getName()) -                    : Builder.CreateGEP(nullptr, StrippedPtr, NewIdx, +                    ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, +                                                NewIdx, GEP.getName()) +                    : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, NewIdx,                                          GEP.getName());              // The NewGEP must be pointer typed, so must the old one -> BitCast @@ -2007,13 +2083,13 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        // getelementptr i8* bitcast ([100 x double]* X to i8*), i32 %tmp        //   (where tmp = 8*tmp2) into:        // getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast -      if (GEPEltType->isSized() && SrcEltTy->isSized() && -          SrcEltTy->isArrayTy()) { +      if (GEPEltType->isSized() && StrippedPtrEltTy->isSized() && +          StrippedPtrEltTy->isArrayTy()) {          // Check that changing to the array element type amounts to dividing the          // index by a scale factor.          uint64_t ResSize = DL.getTypeAllocSize(GEPEltType);          uint64_t ArrayEltSize = -            DL.getTypeAllocSize(SrcEltTy->getArrayElementType()); +            DL.getTypeAllocSize(StrippedPtrEltTy->getArrayElementType());          if (ResSize && ArrayEltSize % ResSize == 0) {            Value *Idx = GEP.getOperand(1);            unsigned BitWidth = Idx->getType()->getPrimitiveSizeInBits(); @@ -2033,11 +2109,12 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {              Type *IndTy = DL.getIndexType(GEPType);              Value *Off[2] = {Constant::getNullValue(IndTy), NewIdx}; -            Value *NewGEP = GEP.isInBounds() && NSW -                                ? Builder.CreateInBoundsGEP( -                                      SrcEltTy, StrippedPtr, Off, GEP.getName()) -                                : Builder.CreateGEP(SrcEltTy, StrippedPtr, Off, -                                                    GEP.getName()); +            Value *NewGEP = +                GEP.isInBounds() && NSW +                    ? Builder.CreateInBoundsGEP(StrippedPtrEltTy, StrippedPtr, +                                                Off, GEP.getName()) +                    : Builder.CreateGEP(StrippedPtrEltTy, StrippedPtr, Off, +                                        GEP.getName());              // The NewGEP must be pointer typed, so must the old one -> BitCast              return CastInst::CreatePointerBitCastOrAddrSpaceCast(NewGEP,                                                                   GEPType); @@ -2085,8 +2162,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        // constructing an AddrSpaceCastInst        Value *NGEP =            GEP.isInBounds() -              ? Builder.CreateInBoundsGEP(nullptr, SrcOp, {Ops[1], Ops[2]}) -              : Builder.CreateGEP(nullptr, SrcOp, {Ops[1], Ops[2]}); +              ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]}) +              : Builder.CreateGEP(SrcEltType, SrcOp, {Ops[1], Ops[2]});        NGEP->takeName(&GEP);        // Preserve GEP address space to satisfy users @@ -2133,8 +2210,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        if (FindElementAtOffset(SrcType, Offset.getSExtValue(), NewIndices)) {          Value *NGEP =              GEP.isInBounds() -                ? Builder.CreateInBoundsGEP(nullptr, SrcOp, NewIndices) -                : Builder.CreateGEP(nullptr, SrcOp, NewIndices); +                ? Builder.CreateInBoundsGEP(SrcEltType, SrcOp, NewIndices) +                : Builder.CreateGEP(SrcEltType, SrcOp, NewIndices);          if (NGEP->getType() == GEPType)            return replaceInstUsesWith(GEP, NGEP); @@ -2160,7 +2237,8 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {          APInt AllocSize(IdxWidth, DL.getTypeAllocSize(AI->getAllocatedType()));          if (BasePtrOffset.ule(AllocSize)) {            return GetElementPtrInst::CreateInBounds( -              PtrOp, makeArrayRef(Ops).slice(1), GEP.getName()); +              GEP.getSourceElementType(), PtrOp, makeArrayRef(Ops).slice(1), +              GEP.getName());          }        }      } @@ -2297,8 +2375,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {          if (II->getIntrinsicID() == Intrinsic::objectsize) { -          ConstantInt *Result = lowerObjectSizeCall(II, DL, &TLI, -                                                    /*MustSucceed=*/true); +          Value *Result = +              lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/true);            replaceInstUsesWith(*I, Result);            eraseInstFromFunction(*I);            Users[i] = nullptr; // Skip examining in the next loop. @@ -2427,9 +2505,8 @@ Instruction *InstCombiner::visitFree(CallInst &FI) {    // free undef -> unreachable.    if (isa<UndefValue>(Op)) { -    // Insert a new store to null because we cannot modify the CFG here. -    Builder.CreateStore(ConstantInt::getTrue(FI.getContext()), -                        UndefValue::get(Type::getInt1PtrTy(FI.getContext()))); +    // Leave a marker since we can't modify the CFG here. +    CreateNonTerminatorUnreachable(&FI);      return eraseInstFromFunction(FI);    } @@ -2619,53 +2696,28 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {        return ExtractValueInst::Create(IV->getInsertedValueOperand(),                                        makeArrayRef(exti, exte));    } -  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) { -    // We're extracting from an intrinsic, see if we're the only user, which -    // allows us to simplify multiple result intrinsics to simpler things that -    // just get one value. -    if (II->hasOneUse()) { -      // Check if we're grabbing the overflow bit or the result of a 'with -      // overflow' intrinsic.  If it's the latter we can remove the intrinsic +  if (WithOverflowInst *WO = dyn_cast<WithOverflowInst>(Agg)) { +    // We're extracting from an overflow intrinsic, see if we're the only user, +    // which allows us to simplify multiple result intrinsics to simpler +    // things that just get one value. +    if (WO->hasOneUse()) { +      // Check if we're grabbing only the result of a 'with overflow' intrinsic        // and replace it with a traditional binary instruction. -      switch (II->getIntrinsicID()) { -      case Intrinsic::uadd_with_overflow: -      case Intrinsic::sadd_with_overflow: -        if (*EV.idx_begin() == 0) {  // Normal result. -          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); -          replaceInstUsesWith(*II, UndefValue::get(II->getType())); -          eraseInstFromFunction(*II); -          return BinaryOperator::CreateAdd(LHS, RHS); -        } - -        // If the normal result of the add is dead, and the RHS is a constant, -        // we can transform this into a range comparison. -        // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3 -        if (II->getIntrinsicID() == Intrinsic::uadd_with_overflow) -          if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getArgOperand(1))) -            return new ICmpInst(ICmpInst::ICMP_UGT, II->getArgOperand(0), -                                ConstantExpr::getNot(CI)); -        break; -      case Intrinsic::usub_with_overflow: -      case Intrinsic::ssub_with_overflow: -        if (*EV.idx_begin() == 0) {  // Normal result. -          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); -          replaceInstUsesWith(*II, UndefValue::get(II->getType())); -          eraseInstFromFunction(*II); -          return BinaryOperator::CreateSub(LHS, RHS); -        } -        break; -      case Intrinsic::umul_with_overflow: -      case Intrinsic::smul_with_overflow: -        if (*EV.idx_begin() == 0) {  // Normal result. -          Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); -          replaceInstUsesWith(*II, UndefValue::get(II->getType())); -          eraseInstFromFunction(*II); -          return BinaryOperator::CreateMul(LHS, RHS); -        } -        break; -      default: -        break; +      if (*EV.idx_begin() == 0) { +        Instruction::BinaryOps BinOp = WO->getBinaryOp(); +        Value *LHS = WO->getLHS(), *RHS = WO->getRHS(); +        replaceInstUsesWith(*WO, UndefValue::get(WO->getType())); +        eraseInstFromFunction(*WO); +        return BinaryOperator::Create(BinOp, LHS, RHS);        } + +      // If the normal result of the add is dead, and the RHS is a constant, +      // we can transform this into a range comparison. +      // overflow = uadd a, -4  -->  overflow = icmp ugt a, 3 +      if (WO->getIntrinsicID() == Intrinsic::uadd_with_overflow) +        if (ConstantInt *CI = dyn_cast<ConstantInt>(WO->getRHS())) +          return new ICmpInst(ICmpInst::ICMP_UGT, WO->getLHS(), +                              ConstantExpr::getNot(CI));      }    }    if (LoadInst *L = dyn_cast<LoadInst>(Agg)) @@ -2688,7 +2740,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {        Builder.SetInsertPoint(L);        Value *GEP = Builder.CreateInBoundsGEP(L->getType(),                                               L->getPointerOperand(), Indices); -      Instruction *NL = Builder.CreateLoad(GEP); +      Instruction *NL = Builder.CreateLoad(EV.getType(), GEP);        // Whatever aliasing information we had for the orignal load must also        // hold for the smaller load, so propagate the annotations.        AAMDNodes Nodes; @@ -3096,13 +3148,35 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {    ++NumSunkInst;    // Also sink all related debug uses from the source basic block. Otherwise we -  // get debug use before the def. -  SmallVector<DbgVariableIntrinsic *, 1> DbgUsers; +  // get debug use before the def. Attempt to salvage debug uses first, to +  // maximise the range variables have location for. If we cannot salvage, then +  // mark the location undef: we know it was supposed to receive a new location +  // here, but that computation has been sunk. +  SmallVector<DbgVariableIntrinsic *, 2> DbgUsers;    findDbgUsers(DbgUsers, I); -  for (auto *DII : DbgUsers) { +  for (auto *DII : reverse(DbgUsers)) {      if (DII->getParent() == SrcBlock) { -      DII->moveBefore(&*InsertPos); -      LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n'); +      // dbg.value is in the same basic block as the sunk inst, see if we can +      // salvage it. Clone a new copy of the instruction: on success we need +      // both salvaged and unsalvaged copies. +      SmallVector<DbgVariableIntrinsic *, 1> TmpUser{ +          cast<DbgVariableIntrinsic>(DII->clone())}; + +      if (!salvageDebugInfoForDbgValues(*I, TmpUser)) { +        // We are unable to salvage: sink the cloned dbg.value, and mark the +        // original as undef, terminating any earlier variable location. +        LLVM_DEBUG(dbgs() << "SINK: " << *DII << '\n'); +        TmpUser[0]->insertBefore(&*InsertPos); +        Value *Undef = UndefValue::get(I->getType()); +        DII->setOperand(0, MetadataAsValue::get(DII->getContext(), +                                                ValueAsMetadata::get(Undef))); +      } else { +        // We successfully salvaged: place the salvaged dbg.value in the +        // original location, and move the unmodified dbg.value to sink with +        // the sunk inst. +        TmpUser[0]->insertBefore(DII); +        DII->moveBefore(&*InsertPos); +      }      }    }    return true; @@ -3297,7 +3371,8 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,        if (isInstructionTriviallyDead(Inst, TLI)) {          ++NumDeadInst;          LLVM_DEBUG(dbgs() << "IC: DCE: " << *Inst << '\n'); -        salvageDebugInfo(*Inst); +        if (!salvageDebugInfo(*Inst)) +          replaceDbgUsesWithUndef(Inst);          Inst->eraseFromParent();          MadeIRChange = true;          continue; @@ -3410,7 +3485,8 @@ static bool prepareICWorklistFromFunction(Function &F, const DataLayout &DL,  static bool combineInstructionsOverFunction(      Function &F, InstCombineWorklist &Worklist, AliasAnalysis *AA,      AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, -    OptimizationRemarkEmitter &ORE, bool ExpensiveCombines = true, +    OptimizationRemarkEmitter &ORE, BlockFrequencyInfo *BFI, +    ProfileSummaryInfo *PSI, bool ExpensiveCombines = true,      LoopInfo *LI = nullptr) {    auto &DL = F.getParent()->getDataLayout();    ExpensiveCombines |= EnableExpensiveCombines; @@ -3440,8 +3516,8 @@ static bool combineInstructionsOverFunction(      MadeIRChange |= prepareICWorklistFromFunction(F, DL, &TLI, Worklist); -    InstCombiner IC(Worklist, Builder, F.optForMinSize(), ExpensiveCombines, AA, -                    AC, TLI, DT, ORE, DL, LI); +    InstCombiner IC(Worklist, Builder, F.hasMinSize(), ExpensiveCombines, AA, +                    AC, TLI, DT, ORE, BFI, PSI, DL, LI);      IC.MaxArraySizeForCombine = MaxArraySize;      if (!IC.run()) @@ -3461,8 +3537,15 @@ PreservedAnalyses InstCombinePass::run(Function &F,    auto *LI = AM.getCachedResult<LoopAnalysis>(F);    auto *AA = &AM.getResult<AAManager>(F); +  const ModuleAnalysisManager &MAM = +      AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager(); +  ProfileSummaryInfo *PSI = +      MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); +  auto *BFI = (PSI && PSI->hasProfileSummary()) ? +      &AM.getResult<BlockFrequencyAnalysis>(F) : nullptr; +    if (!combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, -                                       ExpensiveCombines, LI)) +                                       BFI, PSI, ExpensiveCombines, LI))      // No changes, all analyses are preserved.      return PreservedAnalyses::all(); @@ -3486,6 +3569,8 @@ void InstructionCombiningPass::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addPreserved<AAResultsWrapperPass>();    AU.addPreserved<BasicAAWrapperPass>();    AU.addPreserved<GlobalsAAWrapperPass>(); +  AU.addRequired<ProfileSummaryInfoWrapperPass>(); +  LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AU);  }  bool InstructionCombiningPass::runOnFunction(Function &F) { @@ -3502,9 +3587,15 @@ bool InstructionCombiningPass::runOnFunction(Function &F) {    // Optional analyses.    auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();    auto *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; +  ProfileSummaryInfo *PSI = +      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); +  BlockFrequencyInfo *BFI = +      (PSI && PSI->hasProfileSummary()) ? +      &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() : +      nullptr;    return combineInstructionsOverFunction(F, Worklist, AA, AC, TLI, DT, ORE, -                                         ExpensiveCombines, LI); +                                         BFI, PSI, ExpensiveCombines, LI);  }  char InstructionCombiningPass::ID = 0; @@ -3517,6 +3608,8 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)  INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)  INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)  INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)  INITIALIZE_PASS_END(InstructionCombiningPass, "instcombine",                      "Combine redundant instructions", false, false)  | 
