diff options
Diffstat (limited to 'contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp')
| -rw-r--r-- | contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp | 366 | 
1 files changed, 254 insertions, 112 deletions
| diff --git a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp index e6f2aa9ef930..003db39fe5f9 100644 --- a/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/contrib/llvm-project/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -30,7 +30,6 @@  #include "llvm/Analysis/ProfileSummaryInfo.h"  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/Analysis/VectorUtils.h"  #include "llvm/CodeGen/Analysis.h" @@ -61,6 +60,8 @@  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h"  #include "llvm/IR/Intrinsics.h" +#include "llvm/IR/IntrinsicsAArch64.h" +#include "llvm/IR/IntrinsicsX86.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/MDBuilder.h"  #include "llvm/IR/Module.h" @@ -73,6 +74,7 @@  #include "llvm/IR/Value.h"  #include "llvm/IR/ValueHandle.h"  #include "llvm/IR/ValueMap.h" +#include "llvm/InitializePasses.h"  #include "llvm/Pass.h"  #include "llvm/Support/BlockFrequency.h"  #include "llvm/Support/BranchProbability.h" @@ -88,7 +90,9 @@  #include "llvm/Target/TargetOptions.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/BypassSlowDivision.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/SimplifyLibCalls.h" +#include "llvm/Transforms/Utils/SizeOpts.h"  #include <algorithm>  #include <cassert>  #include <cstdint> @@ -222,6 +226,10 @@ static cl::opt<bool>                           cl::init(true),                           cl::desc("Enable splitting large offset of GEP.")); +static cl::opt<bool> EnableICMP_EQToICMP_ST( +    "cgp-icmp-eq2icmp-st", cl::Hidden, cl::init(false), +    cl::desc("Enable ICMP_EQ to ICMP_S(L|G)T conversion.")); +  namespace {  enum ExtType { @@ -251,6 +259,7 @@ class TypePromotionTransaction;      const LoopInfo *LI;      std::unique_ptr<BlockFrequencyInfo> BFI;      std::unique_ptr<BranchProbabilityInfo> BPI; +    ProfileSummaryInfo *PSI;      /// As we scan instructions optimizing them, this is the next instruction      /// to optimize. Transforms that can invalidate this should update it. @@ -293,7 +302,7 @@ class TypePromotionTransaction;      /// Keep track of SExt promoted.      ValueToSExts ValToSExtendedUses; -    /// True if optimizing for size. +    /// True if the function has the OptSize attribute.      bool OptSize;      /// DataLayout for the Function being processed. @@ -344,7 +353,7 @@ class TypePromotionTransaction;      // Get the DominatorTree, building if necessary.      DominatorTree &getDT(Function &F) {        if (!DT) -        DT = llvm::make_unique<DominatorTree>(F); +        DT = std::make_unique<DominatorTree>(F);        return *DT;      } @@ -370,6 +379,7 @@ class TypePromotionTransaction;      bool optimizeSwitchInst(SwitchInst *SI);      bool optimizeExtractElementInst(Instruction *Inst);      bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT); +    bool fixupDbgValue(Instruction *I);      bool placeDbgValues(Function &F);      bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,                        LoadInst *&LI, Instruction *&Inst, bool HasPromoted); @@ -424,15 +434,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) {      TLI = SubtargetInfo->getTargetLowering();      TRI = SubtargetInfo->getRegisterInfo();    } -  TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); +  TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);    TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();    BPI.reset(new BranchProbabilityInfo(F, *LI));    BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI)); +  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();    OptSize = F.hasOptSize(); - -  ProfileSummaryInfo *PSI = -      &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();    if (ProfileGuidedSectionPrefix) {      if (PSI->isFunctionHotInCallGraph(&F, *BFI))        F.setSectionPrefix(".hot"); @@ -451,7 +459,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {        // bypassSlowDivision may create new BBs, but we don't want to reapply the        // optimization to those blocks.        BasicBlock* Next = BB->getNextNode(); -      EverMadeChange |= bypassSlowDivision(BB, BypassWidths); +      // F.hasOptSize is already checked in the outer if statement. +      if (!llvm::shouldOptimizeForSize(BB, PSI, BFI.get())) +        EverMadeChange |= bypassSlowDivision(BB, BypassWidths);        BB = Next;      }    } @@ -1049,7 +1059,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {        // Collect all the relocate calls associated with a statepoint        AllRelocateCalls.push_back(Relocate); -  // We need atleast one base pointer relocation + one derived pointer +  // We need at least one base pointer relocation + one derived pointer    // relocation to mangle    if (AllRelocateCalls.size() < 2)      return false; @@ -1408,6 +1418,93 @@ static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {    return MadeChange;  } +/// For pattern like: +/// +///   DomCond = icmp sgt/slt CmpOp0, CmpOp1 (might not be in DomBB) +///   ... +/// DomBB: +///   ... +///   br DomCond, TrueBB, CmpBB +/// CmpBB: (with DomBB being the single predecessor) +///   ... +///   Cmp = icmp eq CmpOp0, CmpOp1 +///   ... +/// +/// It would use two comparison on targets that lowering of icmp sgt/slt is +/// different from lowering of icmp eq (PowerPC). This function try to convert +/// 'Cmp = icmp eq CmpOp0, CmpOp1' to ' Cmp = icmp slt/sgt CmpOp0, CmpOp1'. +/// After that, DomCond and Cmp can use the same comparison so reduce one +/// comparison. +/// +/// Return true if any changes are made. +static bool foldICmpWithDominatingICmp(CmpInst *Cmp, +                                       const TargetLowering &TLI) { +  if (!EnableICMP_EQToICMP_ST && TLI.isEqualityCmpFoldedWithSignedCmp()) +    return false; + +  ICmpInst::Predicate Pred = Cmp->getPredicate(); +  if (Pred != ICmpInst::ICMP_EQ) +    return false; + +  // If icmp eq has users other than BranchInst and SelectInst, converting it to +  // icmp slt/sgt would introduce more redundant LLVM IR. +  for (User *U : Cmp->users()) { +    if (isa<BranchInst>(U)) +      continue; +    if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == Cmp) +      continue; +    return false; +  } + +  // This is a cheap/incomplete check for dominance - just match a single +  // predecessor with a conditional branch. +  BasicBlock *CmpBB = Cmp->getParent(); +  BasicBlock *DomBB = CmpBB->getSinglePredecessor(); +  if (!DomBB) +    return false; + +  // We want to ensure that the only way control gets to the comparison of +  // interest is that a less/greater than comparison on the same operands is +  // false. +  Value *DomCond; +  BasicBlock *TrueBB, *FalseBB; +  if (!match(DomBB->getTerminator(), m_Br(m_Value(DomCond), TrueBB, FalseBB))) +    return false; +  if (CmpBB != FalseBB) +    return false; + +  Value *CmpOp0 = Cmp->getOperand(0), *CmpOp1 = Cmp->getOperand(1); +  ICmpInst::Predicate DomPred; +  if (!match(DomCond, m_ICmp(DomPred, m_Specific(CmpOp0), m_Specific(CmpOp1)))) +    return false; +  if (DomPred != ICmpInst::ICMP_SGT && DomPred != ICmpInst::ICMP_SLT) +    return false; + +  // Convert the equality comparison to the opposite of the dominating +  // comparison and swap the direction for all branch/select users. +  // We have conceptually converted: +  // Res = (a < b) ? <LT_RES> : (a == b) ? <EQ_RES> : <GT_RES>; +  // to +  // Res = (a < b) ? <LT_RES> : (a > b)  ? <GT_RES> : <EQ_RES>; +  // And similarly for branches. +  for (User *U : Cmp->users()) { +    if (auto *BI = dyn_cast<BranchInst>(U)) { +      assert(BI->isConditional() && "Must be conditional"); +      BI->swapSuccessors(); +      continue; +    } +    if (auto *SI = dyn_cast<SelectInst>(U)) { +      // Swap operands +      SI->swapValues(); +      SI->swapProfMetadata(); +      continue; +    } +    llvm_unreachable("Must be a branch or a select"); +  } +  Cmp->setPredicate(CmpInst::getSwappedPredicate(DomPred)); +  return true; +} +  bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {    if (sinkCmpExpression(Cmp, *TLI))      return true; @@ -1418,6 +1515,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {    if (combineToUSubWithOverflow(Cmp, ModifiedDT))      return true; +  if (foldICmpWithDominatingICmp(Cmp, *TLI)) +    return true; +    return false;  } @@ -1524,7 +1624,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,                       const TargetLowering &TLI, const DataLayout &DL) {    BasicBlock *UserBB = User->getParent();    DenseMap<BasicBlock *, CastInst *> InsertedTruncs; -  TruncInst *TruncI = dyn_cast<TruncInst>(User); +  auto *TruncI = cast<TruncInst>(User);    bool MadeChange = false;    for (Value::user_iterator TruncUI = TruncI->user_begin(), @@ -1812,7 +1912,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {        AllocaInst *AI;        if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&            DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2) -        AI->setAlignment(PrefAlign); +        AI->setAlignment(MaybeAlign(PrefAlign));        // Global variables can only be aligned if they are defined in this        // object (i.e. they are uniquely initialized in this object), and        // over-aligning global variables that have an explicit section is @@ -1822,7 +1922,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {            GV->getPointerAlignment(*DL) < PrefAlign &&            DL->getTypeAllocSize(GV->getValueType()) >=                MinSize + Offset2) -        GV->setAlignment(PrefAlign); +        GV->setAlignment(MaybeAlign(PrefAlign));      }      // If this is a memcpy (or similar) then we may be able to improve the      // alignment @@ -1842,7 +1942,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {    // cold block.  This interacts with our handling for loads and stores to    // ensure that we can fold all uses of a potential addressing computation    // into their uses.  TODO: generalize this to work over profiling data -  if (!OptSize && CI->hasFnAttr(Attribute::Cold)) +  bool OptForSize = OptSize || llvm::shouldOptimizeForSize(BB, PSI, BFI.get()); +  if (!OptForSize && CI->hasFnAttr(Attribute::Cold))      for (auto &Arg : CI->arg_operands()) {        if (!Arg->getType()->isPointerTy())          continue; @@ -1868,24 +1969,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {        });        return true;      } -    case Intrinsic::objectsize: { -      // Lower all uses of llvm.objectsize.* -      Value *RetVal = -          lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true); - -      resetIteratorIfInvalidatedWhileCalling(BB, [&]() { -        replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); -      }); -      return true; -    } -    case Intrinsic::is_constant: { -      // If is_constant hasn't folded away yet, lower it to false now. -      Constant *RetVal = ConstantInt::get(II->getType(), 0); -      resetIteratorIfInvalidatedWhileCalling(BB, [&]() { -        replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr); -      }); -      return true; -    } +    case Intrinsic::objectsize: +      llvm_unreachable("llvm.objectsize.* should have been lowered already"); +    case Intrinsic::is_constant: +      llvm_unreachable("llvm.is.constant.* should have been lowered already");      case Intrinsic::aarch64_stlxr:      case Intrinsic::aarch64_stxr: {        ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0)); @@ -1921,6 +2008,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {      case Intrinsic::ctlz:        // If counting zeros is expensive, try to avoid it.        return despeculateCountZeros(II, TLI, DL, ModifiedDT); +    case Intrinsic::dbg_value: +      return fixupDbgValue(II);      }      if (TLI) { @@ -2025,17 +2114,18 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT    /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail    /// call.    const Function *F = BB->getParent(); -  SmallVector<CallInst*, 4> TailCalls; +  SmallVector<BasicBlock*, 4> TailCallBBs;    if (PN) {      for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {        // Look through bitcasts.        Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();        CallInst *CI = dyn_cast<CallInst>(IncomingVal); +      BasicBlock *PredBB = PN->getIncomingBlock(I);        // Make sure the phi value is indeed produced by the tail call. -      if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) && +      if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&            TLI->mayBeEmittedAsTailCall(CI) &&            attributesPermitTailCall(F, CI, RetI, *TLI)) -        TailCalls.push_back(CI); +        TailCallBBs.push_back(PredBB);      }    } else {      SmallPtrSet<BasicBlock*, 4> VisitedBBs; @@ -2053,24 +2143,20 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT        CallInst *CI = dyn_cast<CallInst>(&*RI);        if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&            attributesPermitTailCall(F, CI, RetI, *TLI)) -        TailCalls.push_back(CI); +        TailCallBBs.push_back(*PI);      }    }    bool Changed = false; -  for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) { -    CallInst *CI = TailCalls[i]; -    CallSite CS(CI); - +  for (auto const &TailCallBB : TailCallBBs) {      // Make sure the call instruction is followed by an unconditional branch to      // the return block. -    BasicBlock *CallBB = CI->getParent(); -    BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator()); +    BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());      if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)        continue; -    // Duplicate the return into CallBB. -    (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB); +    // Duplicate the return into TailCallBB. +    (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);      ModifiedDT = Changed = true;      ++NumRetsDup;    } @@ -2684,26 +2770,26 @@ private:  void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,                                            Value *NewVal) { -  Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>( +  Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(        Inst, Idx, NewVal));  }  void TypePromotionTransaction::eraseInstruction(Instruction *Inst,                                                  Value *NewVal) {    Actions.push_back( -      llvm::make_unique<TypePromotionTransaction::InstructionRemover>( +      std::make_unique<TypePromotionTransaction::InstructionRemover>(            Inst, RemovedInsts, NewVal));  }  void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,                                                    Value *New) {    Actions.push_back( -      llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New)); +      std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));  }  void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {    Actions.push_back( -      llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy)); +      std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));  }  Value *TypePromotionTransaction::createTrunc(Instruction *Opnd, @@ -2733,7 +2819,7 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst,  void TypePromotionTransaction::moveBefore(Instruction *Inst,                                            Instruction *Before) {    Actions.push_back( -      llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>( +      std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(            Inst, Before));  } @@ -2794,16 +2880,24 @@ class AddressingModeMatcher {    /// When true, IsProfitableToFoldIntoAddressingMode always returns true.    bool IgnoreProfitability; +  /// True if we are optimizing for size. +  bool OptSize; + +  ProfileSummaryInfo *PSI; +  BlockFrequencyInfo *BFI; +    AddressingModeMatcher(        SmallVectorImpl<Instruction *> &AMI, const TargetLowering &TLI,        const TargetRegisterInfo &TRI, Type *AT, unsigned AS, Instruction *MI,        ExtAddrMode &AM, const SetOfInstrs &InsertedInsts,        InstrToOrigTy &PromotedInsts, TypePromotionTransaction &TPT, -      std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) +      std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, +      bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI)        : AddrModeInsts(AMI), TLI(TLI), TRI(TRI),          DL(MI->getModule()->getDataLayout()), AccessTy(AT), AddrSpace(AS),          MemoryInst(MI), AddrMode(AM), InsertedInsts(InsertedInsts), -        PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP) { +        PromotedInsts(PromotedInsts), TPT(TPT), LargeOffsetGEP(LargeOffsetGEP), +        OptSize(OptSize), PSI(PSI), BFI(BFI) {      IgnoreProfitability = false;    } @@ -2821,12 +2915,14 @@ public:          const TargetLowering &TLI, const TargetRegisterInfo &TRI,          const SetOfInstrs &InsertedInsts, InstrToOrigTy &PromotedInsts,          TypePromotionTransaction &TPT, -        std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP) { +        std::pair<AssertingVH<GetElementPtrInst>, int64_t> &LargeOffsetGEP, +        bool OptSize, ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI) {      ExtAddrMode Result;      bool Success = AddressingModeMatcher(AddrModeInsts, TLI, TRI, AccessTy, AS,                                           MemoryInst, Result, InsertedInsts, -                                         PromotedInsts, TPT, LargeOffsetGEP) +                                         PromotedInsts, TPT, LargeOffsetGEP, +                                         OptSize, PSI, BFI)                         .matchAddr(V, 0);      (void)Success; assert(Success && "Couldn't select *anything*?");      return Result; @@ -3049,7 +3145,7 @@ public:        To = dyn_cast<PHINode>(OldReplacement);        OldReplacement = Get(From);      } -    assert(Get(To) == To && "Replacement PHI node is already replaced."); +    assert(To && Get(To) == To && "Replacement PHI node is already replaced.");      Put(From, To);      From->replaceAllUsesWith(To);      AllPhiNodes.erase(From); @@ -3335,7 +3431,7 @@ private:          // So the values are different and does not match. So we need them to          // match. (But we register no more than one match per PHI node, so that          // we won't later try to replace them twice.) -        if (!MatchedPHIs.insert(FirstPhi).second) +        if (MatchedPHIs.insert(FirstPhi).second)            Matcher.insert({ FirstPhi, SecondPhi });          // But me must check it.          WorkList.push_back({ FirstPhi, SecondPhi }); @@ -3413,11 +3509,10 @@ private:          Select->setFalseValue(ST.Get(Map[FalseValue]));        } else {          // Must be a Phi node then. -        PHINode *PHI = cast<PHINode>(V); -        auto *CurrentPhi = dyn_cast<PHINode>(Current); +        auto *PHI = cast<PHINode>(V);          // Fill the Phi node with values from predecessors.          for (auto B : predecessors(PHI->getParent())) { -          Value *PV = CurrentPhi->getIncomingValueForBlock(B); +          Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);            assert(Map.find(PV) != Map.end() && "No predecessor Value!");            PHI->addIncoming(ST.Get(Map[PV]), B);          } @@ -3786,13 +3881,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,    //          poisoned value                    regular value    // It should be OK since undef covers valid value.    if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) { -    const Instruction *ExtInst = -        dyn_cast<const Instruction>(*Inst->user_begin()); +    const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());      if (ExtInst->hasOneUse()) { -      const Instruction *AndInst = -          dyn_cast<const Instruction>(*ExtInst->user_begin()); +      const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());        if (AndInst && AndInst->getOpcode() == Instruction::And) { -        const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1)); +        const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));          if (Cst &&              Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))            return true; @@ -4440,7 +4533,8 @@ static bool FindAllMemoryUses(      Instruction *I,      SmallVectorImpl<std::pair<Instruction *, unsigned>> &MemoryUses,      SmallPtrSetImpl<Instruction *> &ConsideredInsts, const TargetLowering &TLI, -    const TargetRegisterInfo &TRI, int SeenInsts = 0) { +    const TargetRegisterInfo &TRI, bool OptSize, ProfileSummaryInfo *PSI, +    BlockFrequencyInfo *BFI, int SeenInsts = 0) {    // If we already considered this instruction, we're done.    if (!ConsideredInsts.insert(I).second)      return false; @@ -4449,8 +4543,6 @@ static bool FindAllMemoryUses(    if (!MightBeFoldableInst(I))      return true; -  const bool OptSize = I->getFunction()->hasOptSize(); -    // Loop over all the uses, recursively processing them.    for (Use &U : I->uses()) {      // Conservatively return true if we're seeing a large number or a deep chain @@ -4491,7 +4583,9 @@ static bool FindAllMemoryUses(      if (CallInst *CI = dyn_cast<CallInst>(UserI)) {        // If this is a cold call, we can sink the addressing calculation into        // the cold path.  See optimizeCallInst -      if (!OptSize && CI->hasFnAttr(Attribute::Cold)) +      bool OptForSize = OptSize || +          llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); +      if (!OptForSize && CI->hasFnAttr(Attribute::Cold))          continue;        InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue()); @@ -4503,8 +4597,8 @@ static bool FindAllMemoryUses(        continue;      } -    if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, -                          SeenInsts)) +    if (FindAllMemoryUses(UserI, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, +                          PSI, BFI, SeenInsts))        return true;    } @@ -4592,7 +4686,8 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,    // the use is just a particularly nice way of sinking it.    SmallVector<std::pair<Instruction*,unsigned>, 16> MemoryUses;    SmallPtrSet<Instruction*, 16> ConsideredInsts; -  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI)) +  if (FindAllMemoryUses(I, MemoryUses, ConsideredInsts, TLI, TRI, OptSize, +                        PSI, BFI))      return false;  // Has a non-memory, non-foldable use!    // Now that we know that all uses of this instruction are part of a chain of @@ -4628,7 +4723,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,          TPT.getRestorationPoint();      AddressingModeMatcher Matcher(          MatchedAddrModeInsts, TLI, TRI, AddressAccessTy, AS, MemoryInst, Result, -        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); +        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, BFI);      Matcher.IgnoreProfitability = true;      bool Success = Matcher.matchAddr(Address, 0);      (void)Success; assert(Success && "Couldn't select *anything*?"); @@ -4734,7 +4829,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,                                                                        0);      ExtAddrMode NewAddrMode = AddressingModeMatcher::Match(          V, AccessTy, AddrSpace, MemoryInst, AddrModeInsts, *TLI, *TRI, -        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP); +        InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP, OptSize, PSI, +        BFI.get());      GetElementPtrInst *GEP = LargeOffsetGEP.first;      if (GEP && !NewGEPBases.count(GEP)) { @@ -4794,8 +4890,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,                        << " for " << *MemoryInst << "\n");      if (SunkAddr->getType() != Addr->getType())        SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType()); -  } else if (AddrSinkUsingGEPs || -             (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) { +  } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() && +                                   TM && SubtargetInfo->addrSinkUsingGEPs())) {      // By default, we use the GEP-based method when AA is used later. This      // prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.      LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode @@ -5817,7 +5913,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {      return false;    IRBuilder<> Builder(Load->getNextNode()); -  auto *NewAnd = dyn_cast<Instruction>( +  auto *NewAnd = cast<Instruction>(        Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));    // Mark this instruction as "inserted by CGP", so that other    // optimizations don't touch it. @@ -5952,7 +6048,9 @@ bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {  /// turn it into a branch.  bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {    // If branch conversion isn't desirable, exit early. -  if (DisableSelectToBranch || OptSize || !TLI) +  if (DisableSelectToBranch || +      OptSize || llvm::shouldOptimizeForSize(SI->getParent(), PSI, BFI.get()) || +      !TLI)      return false;    // Find all consecutive select instructions that share the same condition. @@ -6024,6 +6122,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {    BasicBlock *StartBlock = SI->getParent();    BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));    BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); +  BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock).getFrequency());    // Delete the unconditional branch that was just created by the split.    StartBlock->getTerminator()->eraseFromParent(); @@ -6194,35 +6293,49 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {    // OpsToSink can contain multiple uses in a use chain (e.g.    // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating -  // uses must come first, which means they are sunk first, temporarily creating -  // invalid IR. This will be fixed once their dominated users are sunk and -  // updated. +  // uses must come first, so we process the ops in reverse order so as to not +  // create invalid IR.    BasicBlock *TargetBB = I->getParent();    bool Changed = false;    SmallVector<Use *, 4> ToReplace; -  for (Use *U : OpsToSink) { +  for (Use *U : reverse(OpsToSink)) {      auto *UI = cast<Instruction>(U->get());      if (UI->getParent() == TargetBB || isa<PHINode>(UI))        continue;      ToReplace.push_back(U);    } -  SmallPtrSet<Instruction *, 4> MaybeDead; +  SetVector<Instruction *> MaybeDead; +  DenseMap<Instruction *, Instruction *> NewInstructions; +  Instruction *InsertPoint = I;    for (Use *U : ToReplace) {      auto *UI = cast<Instruction>(U->get());      Instruction *NI = UI->clone(); +    NewInstructions[UI] = NI;      MaybeDead.insert(UI);      LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n"); -    NI->insertBefore(I); +    NI->insertBefore(InsertPoint); +    InsertPoint = NI;      InsertedInsts.insert(NI); -    U->set(NI); + +    // Update the use for the new instruction, making sure that we update the +    // sunk instruction uses, if it is part of a chain that has already been +    // sunk. +    Instruction *OldI = cast<Instruction>(U->getUser()); +    if (NewInstructions.count(OldI)) +      NewInstructions[OldI]->setOperand(U->getOperandNo(), NI); +    else +      U->set(NI);      Changed = true;    }    // Remove instructions that are dead after sinking. -  for (auto *I : MaybeDead) -    if (!I->hasNUsesOrMore(1)) +  for (auto *I : MaybeDead) { +    if (!I->hasNUsesOrMore(1)) { +      LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");        I->eraseFromParent(); +    } +  }    return Changed;  } @@ -7107,7 +7220,6 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {      for (auto &I : reverse(BB)) {        if (makeBitReverse(I, *DL, *TLI)) {          MadeBitReverse = MadeChange = true; -        ModifiedDT = true;          break;        }      } @@ -7117,42 +7229,68 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {    return MadeChange;  } -// llvm.dbg.value is far away from the value then iSel may not be able -// handle it properly. iSel will drop llvm.dbg.value if it can not -// find a node corresponding to the value. +// Some CGP optimizations may move or alter what's computed in a block. Check +// whether a dbg.value intrinsic could be pointed at a more appropriate operand. +bool CodeGenPrepare::fixupDbgValue(Instruction *I) { +  assert(isa<DbgValueInst>(I)); +  DbgValueInst &DVI = *cast<DbgValueInst>(I); + +  // Does this dbg.value refer to a sunk address calculation? +  Value *Location = DVI.getVariableLocation(); +  WeakTrackingVH SunkAddrVH = SunkAddrs[Location]; +  Value *SunkAddr = SunkAddrVH.pointsToAliveValue() ? SunkAddrVH : nullptr; +  if (SunkAddr) { +    // Point dbg.value at locally computed address, which should give the best +    // opportunity to be accurately lowered. This update may change the type of +    // pointer being referred to; however this makes no difference to debugging +    // information, and we can't generate bitcasts that may affect codegen. +    DVI.setOperand(0, MetadataAsValue::get(DVI.getContext(), +                                           ValueAsMetadata::get(SunkAddr))); +    return true; +  } +  return false; +} + +// A llvm.dbg.value may be using a value before its definition, due to +// optimizations in this pass and others. Scan for such dbg.values, and rescue +// them by moving the dbg.value to immediately after the value definition. +// FIXME: Ideally this should never be necessary, and this has the potential +// to re-order dbg.value intrinsics.  bool CodeGenPrepare::placeDbgValues(Function &F) {    bool MadeChange = false; +  DominatorTree DT(F); +    for (BasicBlock &BB : F) { -    Instruction *PrevNonDbgInst = nullptr;      for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {        Instruction *Insn = &*BI++;        DbgValueInst *DVI = dyn_cast<DbgValueInst>(Insn); -      // Leave dbg.values that refer to an alloca alone. These -      // intrinsics describe the address of a variable (= the alloca) -      // being taken.  They should not be moved next to the alloca -      // (and to the beginning of the scope), but rather stay close to -      // where said address is used. -      if (!DVI || (DVI->getValue() && isa<AllocaInst>(DVI->getValue()))) { -        PrevNonDbgInst = Insn; +      if (!DVI)          continue; -      }        Instruction *VI = dyn_cast_or_null<Instruction>(DVI->getValue()); -      if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { -        // If VI is a phi in a block with an EHPad terminator, we can't insert -        // after it. -        if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) -          continue; -        LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" -                          << *DVI << ' ' << *VI); -        DVI->removeFromParent(); -        if (isa<PHINode>(VI)) -          DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); -        else -          DVI->insertAfter(VI); -        MadeChange = true; -        ++NumDbgValueMoved; -      } + +      if (!VI || VI->isTerminator()) +        continue; + +      // If VI is a phi in a block with an EHPad terminator, we can't insert +      // after it. +      if (isa<PHINode>(VI) && VI->getParent()->getTerminator()->isEHPad()) +        continue; + +      // If the defining instruction dominates the dbg.value, we do not need +      // to move the dbg.value. +      if (DT.dominates(VI, DVI)) +        continue; + +      LLVM_DEBUG(dbgs() << "Moving Debug Value before :\n" +                        << *DVI << ' ' << *VI); +      DVI->removeFromParent(); +      if (isa<PHINode>(VI)) +        DVI->insertBefore(&*VI->getParent()->getFirstInsertionPt()); +      else +        DVI->insertAfter(VI); +      MadeChange = true; +      ++NumDbgValueMoved;      }    }    return MadeChange; @@ -7208,6 +7346,10 @@ bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {      if (Br1->getMetadata(LLVMContext::MD_unpredictable))        continue; +    // The merging of mostly empty BB can cause a degenerate branch. +    if (TBB == FBB) +      continue; +      unsigned Opc;      Value *Cond1, *Cond2;      if (match(LogicOp, m_And(m_OneUse(m_Value(Cond1)), | 
