diff options
Diffstat (limited to 'lib/Transforms')
61 files changed, 1708 insertions, 609 deletions
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index a2c8a32dfe866..25db0eff8848e 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -106,9 +106,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,    AttributeList PAL = F->getAttributes();    // First, determine the new argument list -  unsigned ArgIndex = 0; +  unsigned ArgNo = 0;    for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; -       ++I, ++ArgIndex) { +       ++I, ++ArgNo) {      if (ByValArgsToTransform.count(&*I)) {        // Simple byval argument? Just add all the struct element types.        Type *AgTy = cast<PointerType>(I->getType())->getElementType(); @@ -120,7 +120,7 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,      } else if (!ArgsToPromote.count(&*I)) {        // Unchanged argument        Params.push_back(I->getType()); -      ArgAttrVec.push_back(PAL.getParamAttributes(ArgIndex)); +      ArgAttrVec.push_back(PAL.getParamAttributes(ArgNo));      } else if (I->use_empty()) {        // Dead argument (which are always marked as promotable)        ++NumArgumentsDead; @@ -214,12 +214,12 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,      // Loop over the operands, inserting GEP and loads in the caller as      // appropriate.      CallSite::arg_iterator AI = CS.arg_begin(); -    ArgIndex = 1; +    ArgNo = 0;      for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; -         ++I, ++AI, ++ArgIndex) +         ++I, ++AI, ++ArgNo)        if (!ArgsToPromote.count(&*I) && !ByValArgsToTransform.count(&*I)) {          Args.push_back(*AI); // Unmodified argument -        ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex)); +        ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));        } else if (ByValArgsToTransform.count(&*I)) {          // Emit a GEP and load for each element of the struct.          Type *AgTy = cast<PointerType>(I->getType())->getElementType(); @@ -280,9 +280,9 @@ doPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,        }      // Push any varargs arguments on the list. -    for (; AI != CS.arg_end(); ++AI, ++ArgIndex) { +    for (; AI != CS.arg_end(); ++AI, ++ArgNo) {        Args.push_back(*AI); -      ArgAttrVec.push_back(CallPAL.getAttributes(ArgIndex)); +      ArgAttrVec.push_back(CallPAL.getParamAttributes(ArgNo));      }      SmallVector<OperandBundleDef, 1> OpBundles; @@ -839,17 +839,12 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,      // avoiding a register copy.      if (PtrArg->hasStructRetAttr()) {        unsigned ArgNo = PtrArg->getArgNo(); -      F->setAttributes( -          F->getAttributes() -              .removeAttribute(F->getContext(), ArgNo + 1, Attribute::StructRet) -              .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); +      F->removeAttribute(ArgNo + 1, Attribute::StructRet); +      F->addAttribute(ArgNo + 1, Attribute::NoAlias);        for (Use &U : F->uses()) {          CallSite CS(U.getUser()); -        CS.setAttributes( -            CS.getAttributes() -                .removeAttribute(F->getContext(), ArgNo + 1, -                                 Attribute::StructRet) -                .addAttribute(F->getContext(), ArgNo + 1, Attribute::NoAlias)); +        CS.removeAttribute(ArgNo + 1, Attribute::StructRet); +        CS.addAttribute(ArgNo + 1, Attribute::NoAlias);        }      } diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index 9648883b7f275..031c3d8a9ebed 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -855,10 +855,11 @@ static bool addNoAliasAttrs(const SCCNodeSet &SCCNodes) {    bool MadeChange = false;    for (Function *F : SCCNodes) { -    if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy()) +    if (F->doesNotAlias(AttributeList::ReturnIndex) || +        !F->getReturnType()->isPointerTy())        continue; -    F->setDoesNotAlias(0); +    F->setDoesNotAlias(AttributeList::ReturnIndex);      ++NumNoAlias;      MadeChange = true;    } diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index d66411f04cc4a..c7ef2494e3b8b 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -17,6 +17,7 @@  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/StringSet.h"  #include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitcodeReader.h"  #include "llvm/IR/AutoUpgrade.h"  #include "llvm/IR/DiagnosticPrinter.h"  #include "llvm/IR/IntrinsicInst.h" @@ -25,7 +26,6 @@  #include "llvm/IRReader/IRReader.h"  #include "llvm/Linker/Linker.h"  #include "llvm/Object/IRObjectFile.h" -#include "llvm/Object/ModuleSummaryIndexObjectFile.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/SourceMgr.h" diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ae9d4ce11e0db..f277a51ae659a 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -239,7 +239,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,    // we delete a constant array, we may also be holding pointer to one of its    // elements (or an element of one of its elements if we're dealing with an    // array of arrays) in the worklist. -  SmallVector<WeakVH, 8> WorkList(V->user_begin(), V->user_end()); +  SmallVector<WeakTrackingVH, 8> WorkList(V->user_begin(), V->user_end());    while (!WorkList.empty()) {      Value *UV = WorkList.pop_back_val();      if (!UV) @@ -1792,7 +1792,9 @@ static void makeAllConstantUsesInstructions(Constant *C) {        NewU->insertBefore(UI);        UI->replaceUsesOfWith(U, NewU);      } -    U->dropAllReferences(); +    // We've replaced all the uses, so destroy the constant. (destroyConstant +    // will update value handles and metadata.) +    U->destroyConstant();    }  } diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index 9c83f88b2210c..a8b0f32fd785e 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,4 +20,4 @@ type = Library  name = IPO  parent = Transforms  library_name = ipo -required_libraries = Analysis BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation +required_libraries = Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index 771770ddc0600..0e478ba607be2 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -207,11 +207,13 @@ private:    /// A work queue of functions that may have been modified and should be    /// analyzed again. -  std::vector<WeakVH> Deferred; +  std::vector<WeakTrackingVH> Deferred;    /// Checks the rules of order relation introduced among functions set.    /// Returns true, if sanity check has been passed, and false if failed. -  bool doSanityCheck(std::vector<WeakVH> &Worklist); +#ifndef NDEBUG +  bool doSanityCheck(std::vector<WeakTrackingVH> &Worklist); +#endif    /// Insert a ComparableFunction into the FnTree, or merge it away if it's    /// equal to one that's already present. @@ -283,7 +285,8 @@ ModulePass *llvm::createMergeFunctionsPass() {    return new MergeFunctions();  } -bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { +#ifndef NDEBUG +bool MergeFunctions::doSanityCheck(std::vector<WeakTrackingVH> &Worklist) {    if (const unsigned Max = NumFunctionsForSanityCheck) {      unsigned TripleNumber = 0;      bool Valid = true; @@ -291,10 +294,12 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {      dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n";      unsigned i = 0; -    for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end(); +    for (std::vector<WeakTrackingVH>::iterator I = Worklist.begin(), +                                               E = Worklist.end();           I != E && i < Max; ++I, ++i) {        unsigned j = i; -      for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { +      for (std::vector<WeakTrackingVH>::iterator J = I; J != E && j < Max; +           ++J, ++j) {          Function *F1 = cast<Function>(*I);          Function *F2 = cast<Function>(*J);          int Res1 = FunctionComparator(F1, F2, &GlobalNumbers).compare(); @@ -312,7 +317,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {            continue;          unsigned k = j; -        for (std::vector<WeakVH>::iterator K = J; K != E && k < Max; +        for (std::vector<WeakTrackingVH>::iterator K = J; K != E && k < Max;               ++k, ++K, ++TripleNumber) {            if (K == J)              continue; @@ -351,6 +356,7 @@ bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) {    }    return true;  } +#endif  bool MergeFunctions::runOnModule(Module &M) {    if (skipModule(M)) @@ -381,12 +387,12 @@ bool MergeFunctions::runOnModule(Module &M) {      // consider merging it. Otherwise it is dropped and never considered again.      if ((I != S && std::prev(I)->first == I->first) ||          (std::next(I) != IE && std::next(I)->first == I->first) ) { -      Deferred.push_back(WeakVH(I->second)); +      Deferred.push_back(WeakTrackingVH(I->second));      }    }    do { -    std::vector<WeakVH> Worklist; +    std::vector<WeakTrackingVH> Worklist;      Deferred.swap(Worklist);      DEBUG(doSanityCheck(Worklist)); @@ -395,7 +401,7 @@ bool MergeFunctions::runOnModule(Module &M) {      DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n');      // Insert functions and merge them. -    for (WeakVH &I : Worklist) { +    for (WeakTrackingVH &I : Worklist) {        if (!I)          continue;        Function *F = cast<Function>(I); diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp index 78e71c18fe290..1bb9d654ec1cc 100644 --- a/lib/Transforms/IPO/PartialInlining.cpp +++ b/lib/Transforms/IPO/PartialInlining.cpp @@ -16,8 +16,12 @@  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/BlockFrequencyInfo.h"  #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/InlineCost.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/OptimizationDiagnosticInfo.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h"  #include "llvm/IR/CFG.h"  #include "llvm/IR/DiagnosticInfo.h"  #include "llvm/IR/Dominators.h" @@ -31,13 +35,18 @@ using namespace llvm;  #define DEBUG_TYPE "partial-inlining" -STATISTIC(NumPartialInlined, "Number of functions partially inlined"); +STATISTIC(NumPartialInlined, +          "Number of callsites functions partially inlined into.");  // Command line option to disable partial-inlining. The default is false:  static cl::opt<bool>      DisablePartialInlining("disable-partial-inlining", cl::init(false),                             cl::Hidden, cl::desc("Disable partial ininling")); +static cl::opt<unsigned> MaxNumInlineBlocks( +    "max-num-inline-blocks", cl::init(5), cl::Hidden, +    cl::desc("Max Number of Blocks  To be Partially Inlined")); +  // Command line option to set the maximum number of partial inlining allowed  // for the module. The default value of -1 means no limit.  static cl::opt<int> MaxNumPartialInlining( @@ -45,20 +54,52 @@ static cl::opt<int> MaxNumPartialInlining(      cl::desc("Max number of partial inlining. The default is unlimited"));  namespace { + +struct FunctionOutliningInfo { +  FunctionOutliningInfo() +      : Entries(), ReturnBlock(nullptr), NonReturnBlock(nullptr), +        ReturnBlockPreds() {} +  // Returns the number of blocks to be inlined including all blocks +  // in Entries and one return block. +  unsigned GetNumInlinedBlocks() const { return Entries.size() + 1; } + +  // A set of blocks including the function entry that guard +  // the region to be outlined. +  SmallVector<BasicBlock *, 4> Entries; +  // The return block that is not included in the outlined region. +  BasicBlock *ReturnBlock; +  // The dominating block of the region ot be outlined. +  BasicBlock *NonReturnBlock; +  // The set of blocks in Entries that that are predecessors to ReturnBlock +  SmallVector<BasicBlock *, 4> ReturnBlockPreds; +}; +  struct PartialInlinerImpl { -  PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(std::move(IFI)) {} +  PartialInlinerImpl( +      std::function<AssumptionCache &(Function &)> *GetAC, +      std::function<TargetTransformInfo &(Function &)> *GTTI, +      Optional<function_ref<BlockFrequencyInfo &(Function &)>> GBFI, +      ProfileSummaryInfo *ProfSI) +      : GetAssumptionCache(GetAC), GetTTI(GTTI), GetBFI(GBFI), PSI(ProfSI) {}    bool run(Module &M);    Function *unswitchFunction(Function *F); +  std::unique_ptr<FunctionOutliningInfo> computeOutliningInfo(Function *F); +  private: -  InlineFunctionInfo IFI;    int NumPartialInlining = 0; +  std::function<AssumptionCache &(Function &)> *GetAssumptionCache; +  std::function<TargetTransformInfo &(Function &)> *GetTTI; +  Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI; +  ProfileSummaryInfo *PSI; +  bool shouldPartialInline(CallSite CS, OptimizationRemarkEmitter &ORE);    bool IsLimitReached() {      return (MaxNumPartialInlining != -1 &&              NumPartialInlining >= MaxNumPartialInlining);    }  }; +  struct PartialInlinerLegacyPass : public ModulePass {    static char ID; // Pass identification, replacement for typeid    PartialInlinerLegacyPass() : ModulePass(ID) { @@ -67,91 +108,319 @@ struct PartialInlinerLegacyPass : public ModulePass {    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.addRequired<AssumptionCacheTracker>(); +    AU.addRequired<ProfileSummaryInfoWrapperPass>(); +    AU.addRequired<TargetTransformInfoWrapperPass>();    }    bool runOnModule(Module &M) override {      if (skipModule(M))        return false;      AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>(); +    TargetTransformInfoWrapperPass *TTIWP = +        &getAnalysis<TargetTransformInfoWrapperPass>(); +    ProfileSummaryInfo *PSI = +        getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); +      std::function<AssumptionCache &(Function &)> GetAssumptionCache =          [&ACT](Function &F) -> AssumptionCache & {        return ACT->getAssumptionCache(F);      }; -    InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); -    return PartialInlinerImpl(IFI).run(M); + +    std::function<TargetTransformInfo &(Function &)> GetTTI = +        [&TTIWP](Function &F) -> TargetTransformInfo & { +      return TTIWP->getTTI(F); +    }; + +    return PartialInlinerImpl(&GetAssumptionCache, &GetTTI, None, PSI).run(M);    }  };  } -Function *PartialInlinerImpl::unswitchFunction(Function *F) { -  // First, verify that this function is an unswitching candidate... -  if (F->hasAddressTaken()) -    return nullptr; - +std::unique_ptr<FunctionOutliningInfo> +PartialInlinerImpl::computeOutliningInfo(Function *F) {    BasicBlock *EntryBlock = &F->front();    BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());    if (!BR || BR->isUnconditional()) -    return nullptr; +    return std::unique_ptr<FunctionOutliningInfo>(); + +  // Returns true if Succ is BB's successor +  auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) { +    return is_contained(successors(BB), Succ); +  }; + +  auto SuccSize = [](BasicBlock *BB) { +    return std::distance(succ_begin(BB), succ_end(BB)); +  }; + +  auto IsReturnBlock = [](BasicBlock *BB) { +    TerminatorInst *TI = BB->getTerminator(); +    return isa<ReturnInst>(TI); +  }; + +  auto GetReturnBlock = [=](BasicBlock *Succ1, BasicBlock *Succ2) { +    if (IsReturnBlock(Succ1)) +      return std::make_tuple(Succ1, Succ2); +    if (IsReturnBlock(Succ2)) +      return std::make_tuple(Succ2, Succ1); + +    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); +  }; + +  // Detect a triangular shape: +  auto GetCommonSucc = [=](BasicBlock *Succ1, BasicBlock *Succ2) { +    if (IsSuccessor(Succ1, Succ2)) +      return std::make_tuple(Succ1, Succ2); +    if (IsSuccessor(Succ2, Succ1)) +      return std::make_tuple(Succ2, Succ1); + +    return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr); +  }; + +  std::unique_ptr<FunctionOutliningInfo> OutliningInfo = +      llvm::make_unique<FunctionOutliningInfo>(); + +  BasicBlock *CurrEntry = EntryBlock; +  bool CandidateFound = false; +  do { +    // The number of blocks to be inlined has already reached +    // the limit. When MaxNumInlineBlocks is set to 0 or 1, this +    // disables partial inlining for the function. +    if (OutliningInfo->GetNumInlinedBlocks() >= MaxNumInlineBlocks) +      break; + +    if (SuccSize(CurrEntry) != 2) +      break; + +    BasicBlock *Succ1 = *succ_begin(CurrEntry); +    BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1); + +    BasicBlock *ReturnBlock, *NonReturnBlock; +    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); + +    if (ReturnBlock) { +      OutliningInfo->Entries.push_back(CurrEntry); +      OutliningInfo->ReturnBlock = ReturnBlock; +      OutliningInfo->NonReturnBlock = NonReturnBlock; +      CandidateFound = true; +      break; +    } + +    BasicBlock *CommSucc; +    BasicBlock *OtherSucc; +    std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2); + +    if (!CommSucc) +      break; + +    OutliningInfo->Entries.push_back(CurrEntry); +    CurrEntry = OtherSucc; + +  } while (true); + +  if (!CandidateFound) +    return std::unique_ptr<FunctionOutliningInfo>(); + +  // Do sanity check of the entries: threre should not +  // be any successors (not in the entry set) other than +  // {ReturnBlock, NonReturnBlock} +  assert(OutliningInfo->Entries[0] == &F->front()); +  DenseSet<BasicBlock *> Entries; +  for (BasicBlock *E : OutliningInfo->Entries) +    Entries.insert(E); + +  // Returns true of BB has Predecessor which is not +  // in Entries set. +  auto HasNonEntryPred = [Entries](BasicBlock *BB) { +    for (auto Pred : predecessors(BB)) { +      if (!Entries.count(Pred)) +        return true; +    } +    return false; +  }; +  auto CheckAndNormalizeCandidate = +      [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) { +        for (BasicBlock *E : OutliningInfo->Entries) { +          for (auto Succ : successors(E)) { +            if (Entries.count(Succ)) +              continue; +            if (Succ == OutliningInfo->ReturnBlock) +              OutliningInfo->ReturnBlockPreds.push_back(E); +            else if (Succ != OutliningInfo->NonReturnBlock) +              return false; +          } +          // There should not be any outside incoming edges either: +          if (HasNonEntryPred(E)) +            return false; +        } +        return true; +      }; + +  if (!CheckAndNormalizeCandidate(OutliningInfo.get())) +    return std::unique_ptr<FunctionOutliningInfo>(); -  BasicBlock *ReturnBlock = nullptr; -  BasicBlock *NonReturnBlock = nullptr; -  unsigned ReturnCount = 0; -  for (BasicBlock *BB : successors(EntryBlock)) { -    if (isa<ReturnInst>(BB->getTerminator())) { -      ReturnBlock = BB; -      ReturnCount++; -    } else -      NonReturnBlock = BB; +  // Now further growing the candidate's inlining region by +  // peeling off dominating blocks from the outlining region: +  while (OutliningInfo->GetNumInlinedBlocks() < MaxNumInlineBlocks) { +    BasicBlock *Cand = OutliningInfo->NonReturnBlock; +    if (SuccSize(Cand) != 2) +      break; + +    if (HasNonEntryPred(Cand)) +      break; + +    BasicBlock *Succ1 = *succ_begin(Cand); +    BasicBlock *Succ2 = *(succ_begin(Cand) + 1); + +    BasicBlock *ReturnBlock, *NonReturnBlock; +    std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2); +    if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock) +      break; + +    if (NonReturnBlock->getSinglePredecessor() != Cand) +      break; + +    // Now grow and update OutlininigInfo: +    OutliningInfo->Entries.push_back(Cand); +    OutliningInfo->NonReturnBlock = NonReturnBlock; +    OutliningInfo->ReturnBlockPreds.push_back(Cand); +    Entries.insert(Cand);    } -  if (ReturnCount != 1) +  return OutliningInfo; +} + +bool PartialInlinerImpl::shouldPartialInline(CallSite CS, +                                             OptimizationRemarkEmitter &ORE) { +  // TODO : more sharing with shouldInline in Inliner.cpp +  using namespace ore; +  Instruction *Call = CS.getInstruction(); +  Function *Callee = CS.getCalledFunction(); +  Function *Caller = CS.getCaller(); +  auto &CalleeTTI = (*GetTTI)(*Callee); +  InlineCost IC = getInlineCost(CS, getInlineParams(), CalleeTTI, +                                *GetAssumptionCache, GetBFI, PSI); + +  if (IC.isAlways()) { +    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call) +             << NV("Callee", Callee) +             << " should always be fully inlined, not partially"); +    return false; +  } + +  if (IC.isNever()) { +    ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NeverInline", Call) +             << NV("Callee", Callee) << " not partially inlined into " +             << NV("Caller", Caller) +             << " because it should never be inlined (cost=never)"); +    return false; +  } + +  if (!IC) { +    ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "TooCostly", Call) +             << NV("Callee", Callee) << " not partially inlined into " +             << NV("Caller", Caller) << " because too costly to inline (cost=" +             << NV("Cost", IC.getCost()) << ", threshold=" +             << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); +    return false; +  } + +  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBePartiallyInlined", Call) +           << NV("Callee", Callee) << " can be partially inlined into " +           << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost()) +           << " (threshold=" +           << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")"); +  return true; +} + +Function *PartialInlinerImpl::unswitchFunction(Function *F) { + +  if (F->hasAddressTaken()) +    return nullptr; + +  std::unique_ptr<FunctionOutliningInfo> OutliningInfo = +      computeOutliningInfo(F); + +  if (!OutliningInfo)      return nullptr;    // Clone the function, so that we can hack away on it.    ValueToValueMapTy VMap;    Function *DuplicateFunction = CloneFunction(F, VMap); -  DuplicateFunction->setLinkage(GlobalValue::InternalLinkage); -  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]); -  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]); -  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]); +  BasicBlock *NewReturnBlock = +      cast<BasicBlock>(VMap[OutliningInfo->ReturnBlock]); +  BasicBlock *NewNonReturnBlock = +      cast<BasicBlock>(VMap[OutliningInfo->NonReturnBlock]); +  DenseSet<BasicBlock *> NewEntries; +  for (BasicBlock *BB : OutliningInfo->Entries) { +    NewEntries.insert(cast<BasicBlock>(VMap[BB])); +  }    // Go ahead and update all uses to the duplicate, so that we can just    // use the inliner functionality when we're done hacking.    F->replaceAllUsesWith(DuplicateFunction); +  auto getFirstPHI = [](BasicBlock *BB) { +    BasicBlock::iterator I = BB->begin(); +    PHINode *FirstPhi = nullptr; +    while (I != BB->end()) { +      PHINode *Phi = dyn_cast<PHINode>(I); +      if (!Phi) +        break; +      if (!FirstPhi) { +        FirstPhi = Phi; +        break; +      } +    } +    return FirstPhi; +  };    // Special hackery is needed with PHI nodes that have inputs from more than    // one extracted block.  For simplicity, just split the PHIs into a two-level    // sequence of PHIs, some of which will go in the extracted region, and some    // of which will go outside.    BasicBlock *PreReturn = NewReturnBlock; -  NewReturnBlock = NewReturnBlock->splitBasicBlock( -      NewReturnBlock->getFirstNonPHI()->getIterator()); -  BasicBlock::iterator I = PreReturn->begin(); -  Instruction *Ins = &NewReturnBlock->front(); -  while (I != PreReturn->end()) { -    PHINode *OldPhi = dyn_cast<PHINode>(I); -    if (!OldPhi) -      break; - -    PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins); -    OldPhi->replaceAllUsesWith(RetPhi); -    Ins = NewReturnBlock->getFirstNonPHI(); - -    RetPhi->addIncoming(&*I, PreReturn); -    RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock), -                        NewEntryBlock); -    OldPhi->removeIncomingValue(NewEntryBlock); - -    ++I; +  // only split block when necessary: +  PHINode *FirstPhi = getFirstPHI(PreReturn); +  unsigned NumPredsFromEntries = OutliningInfo->ReturnBlockPreds.size(); +  if (FirstPhi && FirstPhi->getNumIncomingValues() > NumPredsFromEntries + 1) { + +    NewReturnBlock = NewReturnBlock->splitBasicBlock( +        NewReturnBlock->getFirstNonPHI()->getIterator()); +    BasicBlock::iterator I = PreReturn->begin(); +    Instruction *Ins = &NewReturnBlock->front(); +    while (I != PreReturn->end()) { +      PHINode *OldPhi = dyn_cast<PHINode>(I); +      if (!OldPhi) +        break; + +      PHINode *RetPhi = +          PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins); +      OldPhi->replaceAllUsesWith(RetPhi); +      Ins = NewReturnBlock->getFirstNonPHI(); + +      RetPhi->addIncoming(&*I, PreReturn); +      for (BasicBlock *E : OutliningInfo->ReturnBlockPreds) { +        BasicBlock *NewE = cast<BasicBlock>(VMap[E]); +        RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewE), NewE); +        OldPhi->removeIncomingValue(NewE); +      } +      ++I; +    } +    for (auto E : OutliningInfo->ReturnBlockPreds) { +      BasicBlock *NewE = cast<BasicBlock>(VMap[E]); +      NewE->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); +    }    } -  NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock); +  // Returns true if the block is to be partial inlined into the caller +  // (i.e. not to be extracted to the out of line function) +  auto ToBeInlined = [=](BasicBlock *BB) { +    return BB == NewReturnBlock || NewEntries.count(BB); +  };    // Gather up the blocks that we're going to extract.    std::vector<BasicBlock *> ToExtract;    ToExtract.push_back(NewNonReturnBlock);    for (BasicBlock &BB : *DuplicateFunction) -    if (&BB != NewEntryBlock && &BB != NewReturnBlock && -        &BB != NewNonReturnBlock) +    if (!ToBeInlined(&BB) && &BB != NewNonReturnBlock)        ToExtract.push_back(&BB);    // The CodeExtractor needs a dominator tree. @@ -183,16 +452,22 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {      if (IsLimitReached())        continue; -    NumPartialInlining++;      OptimizationRemarkEmitter ORE(CS.getCaller()); +    if (!shouldPartialInline(CS, ORE)) +      continue; +      DebugLoc DLoc = CS.getInstruction()->getDebugLoc();      BasicBlock *Block = CS.getParent();      ORE.emit(OptimizationRemark(DEBUG_TYPE, "PartiallyInlined", DLoc, Block)               << ore::NV("Callee", F) << " partially inlined into "               << ore::NV("Caller", CS.getCaller())); +    InlineFunctionInfo IFI(nullptr, GetAssumptionCache);      InlineFunction(CS, IFI); +    NumPartialInlining++; +    // update stats +    NumPartialInlined++;    }    // Ditch the duplicate, since we're done with it, and rewrite all remaining @@ -200,7 +475,6 @@ Function *PartialInlinerImpl::unswitchFunction(Function *F) {    DuplicateFunction->replaceAllUsesWith(F);    DuplicateFunction->eraseFromParent(); -  ++NumPartialInlined;    return ExtractedFunction;  } @@ -246,6 +520,8 @@ char PartialInlinerLegacyPass::ID = 0;  INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",                        "Partial Inliner", false, false)  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)  INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",                      "Partial Inliner", false, false) @@ -256,12 +532,25 @@ ModulePass *llvm::createPartialInliningPass() {  PreservedAnalyses PartialInlinerPass::run(Module &M,                                            ModuleAnalysisManager &AM) {    auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); +    std::function<AssumptionCache &(Function &)> GetAssumptionCache =        [&FAM](Function &F) -> AssumptionCache & {      return FAM.getResult<AssumptionAnalysis>(F);    }; -  InlineFunctionInfo IFI(nullptr, &GetAssumptionCache); -  if (PartialInlinerImpl(IFI).run(M)) + +  std::function<BlockFrequencyInfo &(Function &)> GetBFI = +      [&FAM](Function &F) -> BlockFrequencyInfo & { +    return FAM.getResult<BlockFrequencyAnalysis>(F); +  }; + +  std::function<TargetTransformInfo &(Function &)> GetTTI = +      [&FAM](Function &F) -> TargetTransformInfo & { +    return FAM.getResult<TargetIRAnalysis>(F); +  }; + +  ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M); + +  if (PartialInlinerImpl(&GetAssumptionCache, &GetTTI, {GetBFI}, PSI).run(M))      return PreservedAnalyses::none();    return PreservedAnalyses::all();  } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 0d5910ebbfcc9..203594572618a 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -38,6 +38,7 @@  #include "llvm/Transforms/Instrumentation.h"  #include "llvm/Transforms/Scalar.h"  #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"  #include "llvm/Transforms/Vectorize.h"  using namespace llvm; @@ -137,14 +138,19 @@ static cl::opt<int> PreInlineThreshold(               "(default = 75)"));  static cl::opt<bool> EnableGVNHoist( -    "enable-gvn-hoist", cl::init(true), cl::Hidden, -    cl::desc("Enable the GVN hoisting pass (default = on)")); +    "enable-gvn-hoist", cl::init(false), cl::Hidden, +    cl::desc("Enable the GVN hoisting pass (default = off)"));  static cl::opt<bool>      DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),                                cl::Hidden,                                cl::desc("Disable shrink-wrap library calls")); +static cl::opt<bool> +    EnableSimpleLoopUnswitch("enable-simple-loop-unswitch", cl::init(false), +                             cl::Hidden, +                             cl::desc("Enable the simple loop unswitch pass.")); +  PassManagerBuilder::PassManagerBuilder() {      OptLevel = 2;      SizeLevel = 0; @@ -318,7 +324,10 @@ void PassManagerBuilder::addFunctionSimplificationPasses(    // Rotate Loop - disable header duplication at -Oz    MPM.add(createLoopRotatePass(SizeLevel == 2 ? 0 : -1));    MPM.add(createLICMPass());                  // Hoist loop invariants -  MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget)); +  if (EnableSimpleLoopUnswitch) +    MPM.add(createSimpleLoopUnswitchLegacyPass()); +  else +    MPM.add(createLoopUnswitchPass(SizeLevel || OptLevel < 3, DivergentTarget));    MPM.add(createCFGSimplificationPass());    addInstructionCombiningPass(MPM);    MPM.add(createIndVarSimplifyPass());        // Canonicalize indvars diff --git a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 9801a0a614165..d3a3c24ce7b44 100644 --- a/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -30,42 +30,11 @@  #include "llvm/Transforms/IPO.h"  #include "llvm/Transforms/IPO/FunctionAttrs.h"  #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/ModuleUtils.h"  using namespace llvm;  namespace { -// Produce a unique identifier for this module by taking the MD5 sum of the -// names of the module's strong external symbols. This identifier is -// normally guaranteed to be unique, or the program would fail to link due to -// multiply defined symbols. -// -// If the module has no strong external symbols (such a module may still have a -// semantic effect if it performs global initialization), we cannot produce a -// unique identifier for this module, so we return the empty string, which -// causes the entire module to be written as a regular LTO module. -std::string getModuleId(Module *M) { -  MD5 Md5; -  bool ExportsSymbols = false; -  for (auto &GV : M->global_values()) { -    if (GV.isDeclaration() || GV.getName().startswith("llvm.") || -        !GV.hasExternalLinkage()) -      continue; -    ExportsSymbols = true; -    Md5.update(GV.getName()); -    Md5.update(ArrayRef<uint8_t>{0}); -  } - -  if (!ExportsSymbols) -    return ""; - -  MD5::MD5Result R; -  Md5.final(R); - -  SmallString<32> Str; -  MD5::stringifyResult(R, Str); -  return ("$" + Str).str(); -} -  // Promote each local-linkage entity defined by ExportM and used by ImportM by  // changing visibility and appending the given ModuleId.  void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) { @@ -251,7 +220,7 @@ void forEachVirtualFunction(Constant *C, function_ref<void(Function *)> Fn) {  void splitAndWriteThinLTOBitcode(      raw_ostream &OS, raw_ostream *ThinLinkOS,      function_ref<AAResults &(Function &)> AARGetter, Module &M) { -  std::string ModuleId = getModuleId(&M); +  std::string ModuleId = getUniqueModuleId(&M);    if (ModuleId.empty()) {      // We couldn't generate a module ID for this module, just write it out as a      // regular LTO module. diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index 030461004f568..4f1f194997683 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -861,7 +861,7 @@ static bool checkRippleForAdd(const APInt &Op0KnownZero,    // Find the most significant known 0 other than the sign bit.    int BitWidth = Op0KnownZero.getBitWidth();    APInt Op0KnownZeroTemp(Op0KnownZero); -  Op0KnownZeroTemp.clearBit(BitWidth - 1); +  Op0KnownZeroTemp.clearSignBit();    int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1;    int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1; @@ -1037,7 +1037,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {      return replaceInstUsesWith(I, V);    if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), -                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) +                                 I.hasNoUnsignedWrap(), SQ))      return replaceInstUsesWith(I, V);     // (A*B)+(A*C) -> A*(B+C) etc @@ -1358,8 +1358,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = -          SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), SQ))      return replaceInstUsesWith(I, V);    if (isa<Constant>(RHS)) @@ -1550,7 +1549,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {      return replaceInstUsesWith(I, V);    if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), -                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) +                                 I.hasNoUnsignedWrap(), SQ))      return replaceInstUsesWith(I, V);    // (A*B)-(A*C) -> A*(B-C) etc @@ -1756,8 +1755,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = -          SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), SQ))      return replaceInstUsesWith(I, V);    // fsub nsz 0, X ==> fsub nsz -0.0, X diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a97b5a9ec0bb8..c7092bf3a398b 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1213,7 +1213,7 @@ static Instruction *foldAndToXor(BinaryOperator &I,    // (~B | A) & (~A | B) --> ~(A ^ B)    // (~B | A) & (B | ~A) --> ~(A ^ B)    if (match(Op0, m_c_Or(m_Value(A), m_Not(m_Value(B)))) && -      match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Value(B)))) +      match(Op1, m_c_Or(m_Not(m_Specific(A)), m_Specific(B))))      return BinaryOperator::CreateNot(Builder.CreateXor(A, B));    return nullptr; @@ -1254,7 +1254,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyAndInst(Op0, Op1, SQ))      return replaceInstUsesWith(I, V);    // See if we can simplify any instructions used by the instruction whose sole @@ -2039,7 +2039,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyOrInst(Op0, Op1, SQ))      return replaceInstUsesWith(I, V);    // See if we can simplify any instructions used by the instruction whose sole @@ -2415,7 +2415,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyXorInst(Op0, Op1, SQ))      return replaceInstUsesWith(I, V);    if (Instruction *NewXor = foldXorToXor(I)) @@ -2433,25 +2433,32 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {    if (Value *V = SimplifyBSwap(I))      return replaceInstUsesWith(I, V); +  // Apply DeMorgan's Law for 'nand' / 'nor' logic with an inverted operand. +  Value *X, *Y; + +  // We must eliminate the and/or (one-use) for these transforms to not increase +  // the instruction count. +  // ~(~X & Y) --> (X | ~Y) +  // ~(Y & ~X) --> (X | ~Y) +  if (match(&I, m_Not(m_OneUse(m_c_And(m_Not(m_Value(X)), m_Value(Y)))))) { +    Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); +    return BinaryOperator::CreateOr(X, NotY); +  } +  // ~(~X | Y) --> (X & ~Y) +  // ~(Y | ~X) --> (X & ~Y) +  if (match(&I, m_Not(m_OneUse(m_c_Or(m_Not(m_Value(X)), m_Value(Y)))))) { +    Value *NotY = Builder->CreateNot(Y, Y->getName() + ".not"); +    return BinaryOperator::CreateAnd(X, NotY); +  } +    // Is this a 'not' (~) fed by a binary operator?    BinaryOperator *NotOp;    if (match(&I, m_Not(m_BinOp(NotOp)))) {      if (NotOp->getOpcode() == Instruction::And ||          NotOp->getOpcode() == Instruction::Or) { -      // ~(~X & Y) --> (X | ~Y) - De Morgan's Law -      // ~(~X | Y) === (X & ~Y) - De Morgan's Law -      if (dyn_castNotVal(NotOp->getOperand(1))) -        NotOp->swapOperands(); -      if (Value *Op0NotVal = dyn_castNotVal(NotOp->getOperand(0))) { -        Value *NotY = Builder->CreateNot( -            NotOp->getOperand(1), NotOp->getOperand(1)->getName() + ".not"); -        if (NotOp->getOpcode() == Instruction::And) -          return BinaryOperator::CreateOr(Op0NotVal, NotY); -        return BinaryOperator::CreateAnd(Op0NotVal, NotY); -      } - -      // ~(X & Y) --> (~X | ~Y) - De Morgan's Law -      // ~(X | Y) === (~X & ~Y) - De Morgan's Law +      // Apply DeMorgan's Law when inverts are free: +      // ~(X & Y) --> (~X | ~Y) +      // ~(X | Y) --> (~X & ~Y)        if (IsFreeToInvert(NotOp->getOperand(0),                           NotOp->getOperand(0)->hasOneUse()) &&            IsFreeToInvert(NotOp->getOperand(1), diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index 313ab13b9e2b7..e9286b1bf1750 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -379,7 +379,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,      for (unsigned i = 0; i != NumSubElts; ++i) {        unsigned SubEltIdx = (NumSubElts - 1) - i;        auto SubElt = cast<ConstantInt>(CDV->getElementAsConstant(SubEltIdx)); -      Count = Count.shl(BitWidth); +      Count <<= BitWidth;        Count |= SubElt->getValue().zextOrTrunc(64);      }    } @@ -1384,17 +1384,17 @@ static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {    // Create a mask for bits above (ctlz) or below (cttz) the first known one.    bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz; -  unsigned NumMaskBits = IsTZ ? Known.One.countTrailingZeros() -                              : Known.One.countLeadingZeros(); -  APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits) -                    : APInt::getHighBitsSet(BitWidth, NumMaskBits); +  unsigned PossibleZeros = IsTZ ? Known.One.countTrailingZeros() +                                : Known.One.countLeadingZeros(); +  unsigned DefiniteZeros = IsTZ ? Known.Zero.countTrailingOnes() +                                : Known.Zero.countLeadingOnes();    // If all bits above (ctlz) or below (cttz) the first known one are known    // zero, this value is constant.    // FIXME: This should be in InstSimplify because we're replacing an    // instruction with a constant. -  if (Mask.isSubsetOf(Known.Zero)) { -    auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits)); +  if (PossibleZeros == DefiniteZeros) { +    auto *C = ConstantInt::get(IT, DefiniteZeros);      return IC.replaceInstUsesWith(II, C);    } @@ -1818,8 +1818,8 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {  /// lifting.  Instruction *InstCombiner::visitCallInst(CallInst &CI) {    auto Args = CI.arg_operands(); -  if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL, -                              &TLI, &DT, &AC)) +  if (Value *V = +          SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), SQ))      return replaceInstUsesWith(CI, V);    if (isFreeCall(&CI, &TLI)) diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index d846a631b96f6..60970775de63b 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -190,8 +190,8 @@ static void computeSignedMinMaxValuesFromKnownBits(const KnownBits &Known,    Max = Known.One|UnknownBits;    if (UnknownBits.isNegative()) { // Sign bit is unknown -    Min.setBit(Min.getBitWidth()-1); -    Max.clearBit(Max.getBitWidth()-1); +    Min.setSignBit(); +    Max.clearSignBit();    }  } @@ -4269,8 +4269,8 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {      Changed = true;    } -  if (Value *V = -          SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &AC, &I)) +  if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, +                                  SQ.getWithInstruction(&I)))      return replaceInstUsesWith(I, V);    // comparing -val or val with non-zero is the same as just comparing val @@ -4778,8 +4778,9 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); -  if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, -                                  I.getFastMathFlags(), DL, &TLI, &DT, &AC, &I)) +  if (Value *V = +          SimplifyFCmpInst(I.getPredicate(), Op0, Op1, I.getFastMathFlags(), +                           SQ.getWithInstruction(&I)))      return replaceInstUsesWith(I, V);    // Simplify 'fcmp pred X, X' diff --git a/lib/Transforms/InstCombine/InstCombineInternal.h b/lib/Transforms/InstCombine/InstCombineInternal.h index 776686d3d1171..3be6419a129a4 100644 --- a/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/lib/Transforms/InstCombine/InstCombineInternal.h @@ -17,9 +17,11 @@  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/TargetFolder.h"  #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DIBuilder.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/InstVisitor.h" @@ -27,10 +29,9 @@  #include "llvm/IR/Operator.h"  #include "llvm/IR/PatternMatch.h"  #include "llvm/Pass.h" +#include "llvm/Support/Dwarf.h"  #include "llvm/Transforms/InstCombine/InstCombineWorklist.h"  #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/IR/DIBuilder.h"  #define DEBUG_TYPE "instcombine" @@ -193,7 +194,7 @@ private:    TargetLibraryInfo &TLI;    DominatorTree &DT;    const DataLayout &DL; - +  const SimplifyQuery SQ;    // Optional analyses. When non-null, these can both be used to do better    // combining and will be updated to reflect any changes.    LoopInfo *LI; @@ -203,11 +204,11 @@ private:  public:    InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,                 bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA, -               AssumptionCache &AC, TargetLibraryInfo &TLI, -               DominatorTree &DT, const DataLayout &DL, LoopInfo *LI) +               AssumptionCache &AC, TargetLibraryInfo &TLI, DominatorTree &DT, +               const DataLayout &DL, LoopInfo *LI)        : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),          ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT), -        DL(DL), LI(LI), MadeIRChange(false) {} +        DL(DL), SQ(DL, &TLI, &DT, &AC), LI(LI), MadeIRChange(false) {}    /// \brief Run the combiner over the entire worklist until it is empty.    /// @@ -533,6 +534,12 @@ private:    /// value, or null if it didn't simplify.    Value *SimplifyUsingDistributiveLaws(BinaryOperator &I); +  /// This tries to simplify binary operations by factorizing out common terms +  /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). +  Value *tryFactorization(InstCombiner::BuilderTy *, BinaryOperator &, +                          Instruction::BinaryOps, Value *, Value *, Value *, +                          Value *); +    /// \brief Attempts to replace V with a simpler value based on the demanded    /// bits.    Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask, KnownBits &Known, diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index ce66581a491a0..face9d9237ae1 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -179,7 +179,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyMulInst(Op0, Op1, SQ))      return replaceInstUsesWith(I, V);    if (Value *V = SimplifyUsingDistributiveLaws(I)) @@ -606,8 +606,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {    if (isa<Constant>(Op0))      std::swap(Op0, Op1); -  if (Value *V = -          SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), SQ))      return replaceInstUsesWith(I, V);    bool AllowReassociate = I.hasUnsafeAlgebra(); @@ -1111,7 +1110,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyUDivInst(Op0, Op1, SQ))      return replaceInstUsesWith(I, V);    // Handle the integer div common cases @@ -1184,7 +1183,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifySDivInst(Op0, Op1, SQ))      return replaceInstUsesWith(I, V);    // Handle the integer div common cases @@ -1296,8 +1295,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), -                                  DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(), SQ))      return replaceInstUsesWith(I, V);    if (isa<Constant>(Op0)) @@ -1481,7 +1479,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyURemInst(Op0, Op1, SQ))      return replaceInstUsesWith(I, V);    if (Instruction *common = commonIRemTransforms(I)) @@ -1524,7 +1522,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifySRemInst(Op0, Op1, SQ))      return replaceInstUsesWith(I, V);    // Handle the integer rem common cases @@ -1597,8 +1595,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {    if (Value *V = SimplifyVectorOp(I))      return replaceInstUsesWith(I, V); -  if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), -                                  DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(), SQ))      return replaceInstUsesWith(I, V);    // Handle cases involving: rem X, (select Cond, Y, Z) diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp index 85e5b6ba2dc2f..1117c11f4f51d 100644 --- a/lib/Transforms/InstCombine/InstCombinePHI.cpp +++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp @@ -880,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {  // PHINode simplification  //  Instruction *InstCombiner::visitPHINode(PHINode &PN) { -  if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyInstruction(&PN, SQ))      return replaceInstUsesWith(PN, V);    if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN)) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 76829c5e457bf..7afb8814fe52d 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1121,8 +1121,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {    Value *FalseVal = SI.getFalseValue();    Type *SelType = SI.getType(); -  if (Value *V = -          SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifySelectInst(CondVal, TrueVal, FalseVal, SQ))      return replaceInstUsesWith(SI, V);    if (Instruction *I = canonicalizeSelectToShuffle(SI)) diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index f77d713b9b071..219effce7ba56 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -520,7 +520,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);    if (Value *V = SimplifyShlInst(Op0, Op1, I.hasNoSignedWrap(), -                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC)) +                                 I.hasNoUnsignedWrap(), SQ))      return replaceInstUsesWith(I, V);    if (Instruction *V = commonShiftTransforms(I)) @@ -618,7 +618,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {      return replaceInstUsesWith(I, V);    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); -  if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyLShrInst(Op0, Op1, I.isExact(), SQ))      return replaceInstUsesWith(I, V);    if (Instruction *R = commonShiftTransforms(I)) @@ -702,7 +702,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {      return replaceInstUsesWith(I, V);    Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); -  if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyAShrInst(Op0, Op1, I.isExact(), SQ))      return replaceInstUsesWith(I, V);    if (Instruction *R = commonShiftTransforms(I)) diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 8d0ed8532779e..0195c5e727c9a 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -589,12 +589,12 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,          // If LHS is non-negative or has all low bits zero, then the upper bits          // are all zero. -        if (LHSKnown.Zero.isSignBitSet() || LowBits.isSubsetOf(LHSKnown.Zero)) +        if (LHSKnown.isNonNegative() || LowBits.isSubsetOf(LHSKnown.Zero))            Known.Zero |= ~LowBits;          // If LHS is negative and not all low bits are zero, then the upper bits          // are all one. -        if (LHSKnown.One.isSignBitSet() && LowBits.intersects(LHSKnown.One)) +        if (LHSKnown.isNegative() && LowBits.intersects(LHSKnown.One))            Known.One |= ~LowBits;          assert(!(Known.Zero & Known.One) && "Bits known to be one AND zero?"); @@ -607,8 +607,8 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,      if (DemandedMask.isSignBitSet()) {        computeKnownBits(I->getOperand(0), LHSKnown, Depth + 1, CxtI);        // If it's known zero, our sign bit is also zero. -      if (LHSKnown.Zero.isSignBitSet()) -        Known.Zero.setSignBit(); +      if (LHSKnown.isNonNegative()) +        Known.makeNonNegative();      }      break;    case Instruction::URem: { @@ -1537,7 +1537,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,          for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {            APInt LaneElts = OpUndefElts.lshr(InnerVWidthPerLane * Lane);            LaneElts = LaneElts.getLoBits(InnerVWidthPerLane); -          LaneElts = LaneElts.shl(InnerVWidthPerLane * (2 * Lane + OpNum)); +          LaneElts <<= InnerVWidthPerLane * (2 * Lane + OpNum);            UndefElts |= LaneElts;          }        } diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index e89b400a4afc8..7fc6774f1849c 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -144,8 +144,8 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {  }  Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { -  if (Value *V = SimplifyExtractElementInst( -          EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyExtractElementInst(EI.getVectorOperand(), +                                            EI.getIndexOperand(), SQ))      return replaceInstUsesWith(EI, V);    // If vector val is constant with all elements the same, replace EI with @@ -1140,8 +1140,8 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {    SmallVector<int, 16> Mask = SVI.getShuffleMask();    Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); -  if (auto *V = SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), -                                          SVI.getType(), DL, &TLI, &DT, &AC)) +  if (auto *V = +          SimplifyShuffleVectorInst(LHS, RHS, SVI.getMask(), SVI.getType(), SQ))      return replaceInstUsesWith(SVI, V);    bool MadeChange = false; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 4729c79ca4c3d..1eb98b18bfb50 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -256,7 +256,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {          Value *C = I.getOperand(1);          // Does "B op C" simplify? -        if (Value *V = SimplifyBinOp(Opcode, B, C, DL)) { +        if (Value *V = SimplifyBinOp(Opcode, B, C, SQ)) {            // It simplifies to V.  Form "A op V".            I.setOperand(0, A);            I.setOperand(1, V); @@ -285,7 +285,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {          Value *C = Op1->getOperand(1);          // Does "A op B" simplify? -        if (Value *V = SimplifyBinOp(Opcode, A, B, DL)) { +        if (Value *V = SimplifyBinOp(Opcode, A, B, SQ)) {            // It simplifies to V.  Form "V op C".            I.setOperand(0, V);            I.setOperand(1, C); @@ -313,7 +313,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {          Value *C = I.getOperand(1);          // Does "C op A" simplify? -        if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { +        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {            // It simplifies to V.  Form "V op B".            I.setOperand(0, V);            I.setOperand(1, B); @@ -333,7 +333,7 @@ bool InstCombiner::SimplifyAssociativeOrCommutative(BinaryOperator &I) {          Value *C = Op1->getOperand(1);          // Does "C op A" simplify? -        if (Value *V = SimplifyBinOp(Opcode, C, A, DL)) { +        if (Value *V = SimplifyBinOp(Opcode, C, A, SQ)) {            // It simplifies to V.  Form "B op V".            I.setOperand(0, B);            I.setOperand(1, V); @@ -498,10 +498,10 @@ getBinOpsForFactorization(Instruction::BinaryOps TopLevelOpcode,  /// This tries to simplify binary operations by factorizing out common terms  /// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). -static Value *tryFactorization(InstCombiner::BuilderTy *Builder, -                               const DataLayout &DL, BinaryOperator &I, -                               Instruction::BinaryOps InnerOpcode, Value *A, -                               Value *B, Value *C, Value *D) { +Value *InstCombiner::tryFactorization(InstCombiner::BuilderTy *Builder, +                                      BinaryOperator &I, +                                      Instruction::BinaryOps InnerOpcode, +                                      Value *A, Value *B, Value *C, Value *D) {    assert(A && B && C && D && "All values must be provided");    Value *V = nullptr; @@ -521,7 +521,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,          std::swap(C, D);        // Consider forming "A op' (B op D)".        // If "B op D" simplifies then it can be formed with no cost. -      V = SimplifyBinOp(TopLevelOpcode, B, D, DL); +      V = SimplifyBinOp(TopLevelOpcode, B, D, SQ);        // If "B op D" doesn't simplify then only go on if both of the existing        // operations "A op' B" and "C op' D" will be zapped as no longer used.        if (!V && LHS->hasOneUse() && RHS->hasOneUse()) @@ -540,7 +540,7 @@ static Value *tryFactorization(InstCombiner::BuilderTy *Builder,          std::swap(C, D);        // Consider forming "(A op C) op' B".        // If "A op C" simplifies then it can be formed with no cost. -      V = SimplifyBinOp(TopLevelOpcode, A, C, DL); +      V = SimplifyBinOp(TopLevelOpcode, A, C, SQ);        // If "A op C" doesn't simplify then only go on if both of the existing        // operations "A op' B" and "C op' D" will be zapped as no longer used. @@ -610,23 +610,23 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {      // The instruction has the form "(A op' B) op (C op' D)".  Try to factorize      // a common term.      if (Op0 && Op1 && LHSOpcode == RHSOpcode) -      if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D)) +      if (Value *V = tryFactorization(Builder, I, LHSOpcode, A, B, C, D))          return V;      // The instruction has the form "(A op' B) op (C)".  Try to factorize common      // term.      if (Op0)        if (Value *Ident = getIdentityValue(LHSOpcode, RHS)) -        if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS, -                                        Ident)) +        if (Value *V = +                tryFactorization(Builder, I, LHSOpcode, A, B, RHS, Ident))            return V;      // The instruction has the form "(B) op (C op' D)".  Try to factorize common      // term.      if (Op1)        if (Value *Ident = getIdentityValue(RHSOpcode, LHS)) -        if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, Ident, -                                        C, D)) +        if (Value *V = +                tryFactorization(Builder, I, RHSOpcode, LHS, Ident, C, D))            return V;    } @@ -638,8 +638,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {      Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op'      // Do "A op C" and "B op C" both simplify? -    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, DL)) -      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, DL)) { +    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) +      if (Value *R = SimplifyBinOp(TopLevelOpcode, B, C, SQ)) {          // They do! Return "L op' R".          ++NumExpand;          C = Builder->CreateBinOp(InnerOpcode, L, R); @@ -655,8 +655,8 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {      Instruction::BinaryOps InnerOpcode = Op1->getOpcode(); // op'      // Do "A op B" and "A op C" both simplify? -    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, DL)) -      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, DL)) { +    if (Value *L = SimplifyBinOp(TopLevelOpcode, A, B, SQ)) +      if (Value *R = SimplifyBinOp(TopLevelOpcode, A, C, SQ)) {          // They do! Return "L op' R".          ++NumExpand;          A = Builder->CreateBinOp(InnerOpcode, L, R); @@ -672,14 +672,14 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {        if (SI0->getCondition() == SI1->getCondition()) {          Value *SI = nullptr;          if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(), -                                     SI1->getFalseValue(), DL, &TLI, &DT, &AC)) +                                     SI1->getFalseValue(), SQ))            SI = Builder->CreateSelect(SI0->getCondition(),                                       Builder->CreateBinOp(TopLevelOpcode,                                                            SI0->getTrueValue(),                                                            SI1->getTrueValue()),                                       V);          if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(), -                                     SI1->getTrueValue(), DL, &TLI, &DT, &AC)) +                                     SI1->getTrueValue(), SQ))            SI = Builder->CreateSelect(                SI0->getCondition(), V,                Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(), @@ -1399,8 +1399,7 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {  Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {    SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end()); -  if (Value *V = -          SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, SQ))      return replaceInstUsesWith(GEP, V);    Value *PtrOp = GEP.getOperand(0); @@ -1589,7 +1588,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {        if (SO1->getType() != GO1->getType())          return nullptr; -      Value* Sum = SimplifyAddInst(GO1, SO1, false, false, DL, &TLI, &DT, &AC); +      Value *Sum = SimplifyAddInst(GO1, SO1, false, false, SQ);        // Only do the combine when we are sure the cost after the        // merge is never more than that before the merge.        if (Sum == nullptr) @@ -1949,9 +1948,9 @@ static bool isNeverEqualToUnescapedAlloc(Value *V, const TargetLibraryInfo *TLI,    return isAllocLikeFn(V, TLI) && V != AI;  } -static bool -isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users, -                     const TargetLibraryInfo *TLI) { +static bool isAllocSiteRemovable(Instruction *AI, +                                 SmallVectorImpl<WeakTrackingVH> &Users, +                                 const TargetLibraryInfo *TLI) {    SmallVector<Instruction*, 4> Worklist;    Worklist.push_back(AI); @@ -2035,7 +2034,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {    // If we have a malloc call which is only used in any amount of comparisons    // to null and free calls, delete the calls and replace the comparisons with    // true or false as appropriate. -  SmallVector<WeakVH, 64> Users; +  SmallVector<WeakTrackingVH, 64> Users;    if (isAllocSiteRemovable(&MI, Users, &TLI)) {      for (unsigned i = 0, e = Users.size(); i != e; ++i) {        // Lowering all @llvm.objectsize calls first because they may @@ -2304,8 +2303,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {    if (!EV.hasIndices())      return replaceInstUsesWith(EV, Agg); -  if (Value *V = -          SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT, &AC)) +  if (Value *V = SimplifyExtractValueInst(Agg, EV.getIndices(), SQ))      return replaceInstUsesWith(EV, V);    if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) { diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index b866958e3c4b8..b034ccc469338 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -101,6 +101,10 @@ static const char *const kAsanRegisterImageGlobalsName =    "__asan_register_image_globals";  static const char *const kAsanUnregisterImageGlobalsName =    "__asan_unregister_image_globals"; +static const char *const kAsanRegisterElfGlobalsName = +  "__asan_register_elf_globals"; +static const char *const kAsanUnregisterElfGlobalsName = +  "__asan_unregister_elf_globals";  static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init";  static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init";  static const char *const kAsanInitName = "__asan_init"; @@ -120,8 +124,11 @@ static const char *const kAsanPoisonStackMemoryName =      "__asan_poison_stack_memory";  static const char *const kAsanUnpoisonStackMemoryName =      "__asan_unpoison_stack_memory"; + +// ASan version script has __asan_* wildcard. Triple underscore prevents a +// linker (gold) warning about attempting to export a local symbol.  static const char *const kAsanGlobalsRegisteredFlagName = -    "__asan_globals_registered"; +    "___asan_globals_registered";  static const char *const kAsanOptionDetectUseAfterReturn =      "__asan_option_detect_stack_use_after_return"; @@ -270,6 +277,13 @@ static cl::opt<bool>                              "code stripping of globals"),                     cl::Hidden, cl::init(true)); +// This is on by default even though there is a bug in gold: +// https://sourceware.org/bugzilla/show_bug.cgi?id=19002 +static cl::opt<bool> +    ClWithComdat("asan-with-comdat", +                 cl::desc("Place ASan constructors in comdat sections"), +                 cl::Hidden, cl::init(true)); +  // Debug flags.  static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,                              cl::init(0)); @@ -607,10 +621,14 @@ public:  private:    void initializeCallbacks(Module &M); -  bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); +  bool InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat);    void InstrumentGlobalsCOFF(IRBuilder<> &IRB, Module &M,                               ArrayRef<GlobalVariable *> ExtendedGlobals,                               ArrayRef<Constant *> MetadataInitializers); +  void InstrumentGlobalsELF(IRBuilder<> &IRB, Module &M, +                            ArrayRef<GlobalVariable *> ExtendedGlobals, +                            ArrayRef<Constant *> MetadataInitializers, +                            const std::string &UniqueModuleId);    void InstrumentGlobalsMachO(IRBuilder<> &IRB, Module &M,                                ArrayRef<GlobalVariable *> ExtendedGlobals,                                ArrayRef<Constant *> MetadataInitializers); @@ -621,7 +639,8 @@ private:    GlobalVariable *CreateMetadataGlobal(Module &M, Constant *Initializer,                                         StringRef OriginalName); -  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata); +  void SetComdatForGlobalMetadata(GlobalVariable *G, GlobalVariable *Metadata, +                                  StringRef InternalSuffix);    IRBuilder<> CreateAsanModuleDtor(Module &M);    bool ShouldInstrumentGlobal(GlobalVariable *G); @@ -647,6 +666,11 @@ private:    Function *AsanUnregisterGlobals;    Function *AsanRegisterImageGlobals;    Function *AsanUnregisterImageGlobals; +  Function *AsanRegisterElfGlobals; +  Function *AsanUnregisterElfGlobals; + +  Function *AsanCtorFunction = nullptr; +  Function *AsanDtorFunction = nullptr;  };  // Stack poisoning does not play well with exception handling. @@ -1431,8 +1455,13 @@ void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit,  void AddressSanitizerModule::createInitializerPoisonCalls(      Module &M, GlobalValue *ModuleName) {    GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); +  if (!GV) +    return; + +  ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer()); +  if (!CA) +    return; -  ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());    for (Use &OP : CA->operands()) {      if (isa<ConstantAggregateZero>(OP)) continue;      ConstantStruct *CS = cast<ConstantStruct>(OP); @@ -1594,12 +1623,22 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) {        checkSanitizerInterfaceFunction(M.getOrInsertFunction(            kAsanUnregisterImageGlobalsName, IRB.getVoidTy(), IntptrTy));    AsanUnregisterImageGlobals->setLinkage(Function::ExternalLinkage); + +  AsanRegisterElfGlobals = checkSanitizerInterfaceFunction( +      M.getOrInsertFunction(kAsanRegisterElfGlobalsName, IRB.getVoidTy(), +                            IntptrTy, IntptrTy, IntptrTy)); +  AsanRegisterElfGlobals->setLinkage(Function::ExternalLinkage); + +  AsanUnregisterElfGlobals = checkSanitizerInterfaceFunction( +      M.getOrInsertFunction(kAsanUnregisterElfGlobalsName, IRB.getVoidTy(), +                            IntptrTy, IntptrTy, IntptrTy)); +  AsanUnregisterElfGlobals->setLinkage(Function::ExternalLinkage);  }  // Put the metadata and the instrumented global in the same group. This ensures  // that the metadata is discarded if the instrumented global is discarded.  void AddressSanitizerModule::SetComdatForGlobalMetadata( -    GlobalVariable *G, GlobalVariable *Metadata) { +    GlobalVariable *G, GlobalVariable *Metadata, StringRef InternalSuffix) {    Module &M = *G->getParent();    Comdat *C = G->getComdat();    if (!C) { @@ -1609,7 +1648,15 @@ void AddressSanitizerModule::SetComdatForGlobalMetadata(        assert(G->hasLocalLinkage());        G->setName(Twine(kAsanGenPrefix) + "_anon_global");      } -    C = M.getOrInsertComdat(G->getName()); + +    if (!InternalSuffix.empty() && G->hasLocalLinkage()) { +      std::string Name = G->getName(); +      Name += InternalSuffix; +      C = M.getOrInsertComdat(Name); +    } else { +      C = M.getOrInsertComdat(G->getName()); +    } +      // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF.      if (TargetTriple.isOSBinFormatCOFF())        C->setSelectionKind(Comdat::NoDuplicates); @@ -1636,11 +1683,10 @@ AddressSanitizerModule::CreateMetadataGlobal(Module &M, Constant *Initializer,  }  IRBuilder<> AddressSanitizerModule::CreateAsanModuleDtor(Module &M) { -  Function *AsanDtorFunction = +  AsanDtorFunction =        Function::Create(FunctionType::get(Type::getVoidTy(*C), false),                         GlobalValue::InternalLinkage, kAsanModuleDtorName, &M);    BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction); -  appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);    return IRBuilder<>(ReturnInst::Create(*C, AsanDtorBB));  } @@ -1665,8 +1711,67 @@ void AddressSanitizerModule::InstrumentGlobalsCOFF(             "global metadata will not be padded appropriately");      Metadata->setAlignment(SizeOfGlobalStruct); -    SetComdatForGlobalMetadata(G, Metadata); +    SetComdatForGlobalMetadata(G, Metadata, ""); +  } +} + +void AddressSanitizerModule::InstrumentGlobalsELF( +    IRBuilder<> &IRB, Module &M, ArrayRef<GlobalVariable *> ExtendedGlobals, +    ArrayRef<Constant *> MetadataInitializers, +    const std::string &UniqueModuleId) { +  assert(ExtendedGlobals.size() == MetadataInitializers.size()); + +  SmallVector<GlobalValue *, 16> MetadataGlobals(ExtendedGlobals.size()); +  for (size_t i = 0; i < ExtendedGlobals.size(); i++) { +    GlobalVariable *G = ExtendedGlobals[i]; +    GlobalVariable *Metadata = +        CreateMetadataGlobal(M, MetadataInitializers[i], G->getName()); +    MDNode *MD = MDNode::get(M.getContext(), ValueAsMetadata::get(G)); +    Metadata->setMetadata(LLVMContext::MD_associated, MD); +    MetadataGlobals[i] = Metadata; + +    SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId);    } + +  // Update llvm.compiler.used, adding the new metadata globals. This is +  // needed so that during LTO these variables stay alive. +  if (!MetadataGlobals.empty()) +    appendToCompilerUsed(M, MetadataGlobals); + +  // RegisteredFlag serves two purposes. First, we can pass it to dladdr() +  // to look up the loaded image that contains it. Second, we can store in it +  // whether registration has already occurred, to prevent duplicate +  // registration. +  // +  // Common linkage ensures that there is only one global per shared library. +  GlobalVariable *RegisteredFlag = new GlobalVariable( +      M, IntptrTy, false, GlobalVariable::CommonLinkage, +      ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName); +  RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility); + +  // Create start and stop symbols. +  GlobalVariable *StartELFMetadata = new GlobalVariable( +      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, +      "__start_" + getGlobalMetadataSection()); +  StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); +  GlobalVariable *StopELFMetadata = new GlobalVariable( +      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr, +      "__stop_" + getGlobalMetadataSection()); +  StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility); + +  // Create a call to register the globals with the runtime. +  IRB.CreateCall(AsanRegisterElfGlobals, +                 {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), +                  IRB.CreatePointerCast(StartELFMetadata, IntptrTy), +                  IRB.CreatePointerCast(StopELFMetadata, IntptrTy)}); + +  // We also need to unregister globals at the end, e.g., when a shared library +  // gets closed. +  IRBuilder<> IRB_Dtor = CreateAsanModuleDtor(M); +  IRB_Dtor.CreateCall(AsanUnregisterElfGlobals, +                      {IRB.CreatePointerCast(RegisteredFlag, IntptrTy), +                       IRB.CreatePointerCast(StartELFMetadata, IntptrTy), +                       IRB.CreatePointerCast(StopELFMetadata, IntptrTy)});  }  void AddressSanitizerModule::InstrumentGlobalsMachO( @@ -1756,7 +1861,10 @@ void AddressSanitizerModule::InstrumentGlobalsWithMetadataArray(  // This function replaces all global variables with new variables that have  // trailing redzones. It also creates a function that poisons  // redzones and inserts this function into llvm.global_ctors. -bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { +// Sets *CtorComdat to true if the global registration code emitted into the +// asan constructor is comdat-compatible. +bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M, bool *CtorComdat) { +  *CtorComdat = false;    GlobalsMD.init(M);    SmallVector<GlobalVariable *, 16> GlobalsToChange; @@ -1766,7 +1874,10 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {    }    size_t n = GlobalsToChange.size(); -  if (n == 0) return false; +  if (n == 0) { +    *CtorComdat = true; +    return false; +  }    auto &DL = M.getDataLayout(); @@ -1911,7 +2022,14 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {      Initializers[i] = Initializer;    } -  if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) { +  std::string ELFUniqueModuleId = +      (UseGlobalsGC && TargetTriple.isOSBinFormatELF()) ? getUniqueModuleId(&M) +                                                        : ""; + +  if (!ELFUniqueModuleId.empty()) { +    InstrumentGlobalsELF(IRB, M, NewGlobals, Initializers, ELFUniqueModuleId); +    *CtorComdat = true; +  } else if (UseGlobalsGC && TargetTriple.isOSBinFormatCOFF()) {      InstrumentGlobalsCOFF(IRB, M, NewGlobals, Initializers);    } else if (UseGlobalsGC && ShouldUseMachOGlobalsSection()) {      InstrumentGlobalsMachO(IRB, M, NewGlobals, Initializers); @@ -1938,17 +2056,36 @@ bool AddressSanitizerModule::runOnModule(Module &M) {    if (CompileKernel)      return false; -  Function *AsanCtorFunction; +  // Create a module constructor. A destructor is created lazily because not all +  // platforms, and not all modules need it.    std::tie(AsanCtorFunction, std::ignore) = createSanitizerCtorAndInitFunctions(        M, kAsanModuleCtorName, kAsanInitName, /*InitArgTypes=*/{},        /*InitArgs=*/{}, kAsanVersionCheckName); -  appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); +  bool CtorComdat = true;    bool Changed = false;    // TODO(glider): temporarily disabled globals instrumentation for KASan.    if (ClGlobals) {      IRBuilder<> IRB(AsanCtorFunction->getEntryBlock().getTerminator()); -    Changed |= InstrumentGlobals(IRB, M); +    Changed |= InstrumentGlobals(IRB, M, &CtorComdat); +  } + +  // Put the constructor and destructor in comdat if both +  // (1) global instrumentation is not TU-specific +  // (2) target is ELF. +  if (ClWithComdat && TargetTriple.isOSBinFormatELF() && CtorComdat) { +    AsanCtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleCtorName)); +    appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority, +                        AsanCtorFunction); +    if (AsanDtorFunction) { +      AsanDtorFunction->setComdat(M.getOrInsertComdat(kAsanModuleDtorName)); +      appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority, +                          AsanDtorFunction); +    } +  } else { +    appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); +    if (AsanDtorFunction) +      appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority);    }    return Changed; @@ -2586,7 +2723,7 @@ void FunctionStackPoisoner::processStaticAllocas() {      Value *NewAllocaPtr = IRB.CreateIntToPtr(          IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)),          AI->getType()); -    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, /*Deref=*/false); +    replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB, DIExpression::NoDeref);      AI->replaceAllUsesWith(NewAllocaPtr);    } diff --git a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp index d7eb857cff7e1..493d014586c6a 100644 --- a/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp +++ b/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp @@ -771,7 +771,7 @@ public:        if (perform(MI)) {          Changed = true;          ++NumOfPGOMemOPOpt; -        DEBUG(dbgs() << "MemOP calls: " << MI->getCalledFunction()->getName() +        DEBUG(dbgs() << "MemOP call: " << MI->getCalledFunction()->getName()                       << "is Transformed.\n");        }      } @@ -863,13 +863,23 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {      ActualCount = *BBEdgeCount;    } +  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals); +  DEBUG(dbgs() << "Read one memory intrinsic profile with count " << ActualCount +               << "\n"); +  DEBUG( +      for (auto &VD +           : VDs) { dbgs() << "  (" << VD.Value << "," << VD.Count << ")\n"; }); +    if (ActualCount < MemOPCountThreshold)      return false; +  // Skip if the total value profiled count is 0, in which case we can't +  // scale up the counts properly (and there is no profitable transformation). +  if (TotalCount == 0) +    return false; -  ArrayRef<InstrProfValueData> VDs(ValueDataArray.get(), NumVals);    TotalCount = ActualCount;    if (MemOPScaleCount) -    DEBUG(dbgs() << "Scale counts: numberator = " << ActualCount +    DEBUG(dbgs() << "Scale counts: numerator = " << ActualCount                   << " denominator = " << SavedTotalCount << "\n");    // Keeping track of the count of the default case: @@ -915,14 +925,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {      MaxCount = RemainCount;    uint64_t SumForOpt = TotalCount - RemainCount; -  DEBUG(dbgs() << "Read one memory intrinsic profile: " << SumForOpt << " vs " -               << TotalCount << "\n"); -  DEBUG( -      for (auto &VD -           : VDs) { dbgs() << "  (" << VD.Value << "," << VD.Count << ")\n"; });    DEBUG(dbgs() << "Optimize one memory intrinsic call to " << Version -               << " Versions\n"); +               << " Versions (covering " << SumForOpt << " out of " +               << TotalCount << ")\n");    // mem_op(..., size)    // ==> diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index 190f05db4b0c1..3e480a6df446d 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -2643,7 +2643,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {                 "ByVal argument is not a pointer!");          Size = DL.getTypeAllocSize(A->getType()->getPointerElementType());          if (ArgOffset + Size > kParamTLSSize) break; -        unsigned ParamAlignment = CS.getParamAlignment(i + 1); +        unsigned ParamAlignment = CS.getParamAlignment(i);          unsigned Alignment = std::min(ParamAlignment, kShadowTLSAlignment);          Store = IRB.CreateMemCpy(ArgShadowBase,                                   getShadowPtr(A, Type::getInt8Ty(*MS.C), IRB), @@ -3502,7 +3502,7 @@ struct VarArgPowerPC64Helper : public VarArgHelper {          assert(A->getType()->isPointerTy());          Type *RealTy = A->getType()->getPointerElementType();          uint64_t ArgSize = DL.getTypeAllocSize(RealTy); -        uint64_t ArgAlign = CS.getParamAlignment(ArgNo + 1); +        uint64_t ArgAlign = CS.getParamAlignment(ArgNo);          if (ArgAlign < 8)            ArgAlign = 8;          VAArgOffset = alignTo(VAArgOffset, ArgAlign); diff --git a/lib/Transforms/ObjCARC/ObjCARC.h b/lib/Transforms/ObjCARC/ObjCARC.h index f02b75f0b4560..cd9b3d96a14f7 100644 --- a/lib/Transforms/ObjCARC/ObjCARC.h +++ b/lib/Transforms/ObjCARC/ObjCARC.h @@ -69,6 +69,19 @@ static inline void EraseInstruction(Instruction *CI) {      RecursivelyDeleteTriviallyDeadInstructions(OldArg);  } +/// If Inst is a ReturnRV and its operand is a call or invoke, return the +/// operand. Otherwise return null. +static inline const Instruction *getreturnRVOperand(const Instruction &Inst, +                                                    ARCInstKind Class) { +  if (Class != ARCInstKind::RetainRV) +    return nullptr; + +  const auto *Opnd = Inst.getOperand(0)->stripPointerCasts(); +  if (const auto *C = dyn_cast<CallInst>(Opnd)) +    return C; +  return dyn_cast<InvokeInst>(Opnd); +} +  } // end namespace objcarc  } // end namespace llvm diff --git a/lib/Transforms/ObjCARC/PtrState.cpp b/lib/Transforms/ObjCARC/PtrState.cpp index c1bbc4e96b16d..d13e941044f14 100644 --- a/lib/Transforms/ObjCARC/PtrState.cpp +++ b/lib/Transforms/ObjCARC/PtrState.cpp @@ -244,6 +244,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,                                            const Value *Ptr,                                            ProvenanceAnalysis &PA,                                            ARCInstKind Class) { +  auto SetSeqAndInsertReverseInsertPt = [&](Sequence NewSeq){ +    assert(!HasReverseInsertPts()); +    SetSeq(NewSeq); +    // If this is an invoke instruction, we're scanning it as part of +    // one of its successor blocks, since we can't insert code after it +    // in its own block, and we don't want to split critical edges. +    if (isa<InvokeInst>(Inst)) +      InsertReverseInsertPt(&*BB->getFirstInsertionPt()); +    else +      InsertReverseInsertPt(&*++Inst->getIterator()); +  }; +    // Check for possible direct uses.    switch (GetSeq()) {    case S_Release: @@ -251,26 +263,18 @@ void BottomUpPtrState::HandlePotentialUse(BasicBlock *BB, Instruction *Inst,      if (CanUse(Inst, Ptr, PA, Class)) {        DEBUG(dbgs() << "            CanUse: Seq: " << GetSeq() << "; " << *Ptr                     << "\n"); -      assert(!HasReverseInsertPts()); -      // If this is an invoke instruction, we're scanning it as part of -      // one of its successor blocks, since we can't insert code after it -      // in its own block, and we don't want to split critical edges. -      if (isa<InvokeInst>(Inst)) -        InsertReverseInsertPt(&*BB->getFirstInsertionPt()); -      else -        InsertReverseInsertPt(&*++Inst->getIterator()); -      SetSeq(S_Use); +      SetSeqAndInsertReverseInsertPt(S_Use);      } else if (Seq == S_Release && IsUser(Class)) {        DEBUG(dbgs() << "            PreciseReleaseUse: Seq: " << GetSeq() << "; "                     << *Ptr << "\n");        // Non-movable releases depend on any possible objc pointer use. -      SetSeq(S_Stop); -      assert(!HasReverseInsertPts()); -      // As above; handle invoke specially. -      if (isa<InvokeInst>(Inst)) -        InsertReverseInsertPt(&*BB->getFirstInsertionPt()); -      else -        InsertReverseInsertPt(&*++Inst->getIterator()); +      SetSeqAndInsertReverseInsertPt(S_Stop); +    } else if (const auto *Call = getreturnRVOperand(*Inst, Class)) { +      if (CanUse(Call, Ptr, PA, GetBasicARCInstKind(Call))) { +        DEBUG(dbgs() << "            ReleaseUse: Seq: " << GetSeq() << "; " +                     << *Ptr << "\n"); +        SetSeqAndInsertReverseInsertPt(S_Stop); +      }      }      break;    case S_Stop: diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index b323ab3bd443c..523390758769a 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -55,6 +55,7 @@ add_llvm_library(LLVMScalarOpts    Scalar.cpp    Scalarizer.cpp    SeparateConstOffsetFromGEP.cpp +  SimpleLoopUnswitch.cpp    SimplifyCFGPass.cpp    Sink.cpp    SpeculativeExecution.cpp diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index b5a4cc2f39533..dc864f48bf1f8 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -151,7 +151,7 @@ static bool processPHI(PHINode *P, LazyValueInfo *LVI,      Changed = true;    } -  if (Value *V = SimplifyInstruction(P, SQ.getWithInstruction(P))) { +  if (Value *V = SimplifyInstruction(P, SQ)) {      P->replaceAllUsesWith(V);      P->eraseFromParent();      Changed = true; @@ -565,25 +565,14 @@ bool CorrelatedValuePropagation::runOnFunction(Function &F) {      return false;    LazyValueInfo *LVI = &getAnalysis<LazyValueInfoWrapperPass>().getLVI(); -  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); -  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; -  auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); -  auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; -  auto *ACWP = getAnalysisIfAvailable<AssumptionCacheTracker>(); -  auto *AC = ACWP ? &ACWP->getAssumptionCache(F) : nullptr; -  const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); -  return runImpl(F, LVI, SQ); +  return runImpl(F, LVI, getBestSimplifyQuery(*this, F));  }  PreservedAnalyses  CorrelatedValuePropagationPass::run(Function &F, FunctionAnalysisManager &AM) {    LazyValueInfo *LVI = &AM.getResult<LazyValueAnalysis>(F); -  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F); -  auto *TLI = AM.getCachedResult<TargetLibraryAnalysis>(F); -  auto *AC = AM.getCachedResult<AssumptionAnalysis>(F); -  const SimplifyQuery SQ(F.getParent()->getDataLayout(), TLI, DT, AC); -  bool Changed = runImpl(F, LVI, SQ); +  bool Changed = runImpl(F, LVI, getBestSimplifyQuery(AM, F));    if (!Changed)      return PreservedAnalyses::all(); diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index 04479b6e49ac8..d8f8a58a5fdfa 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -253,6 +253,7 @@ public:    const TargetTransformInfo &TTI;    DominatorTree &DT;    AssumptionCache &AC; +  const SimplifyQuery SQ;    MemorySSA *MSSA;    std::unique_ptr<MemorySSAUpdater> MSSAUpdater;    typedef RecyclingAllocator< @@ -315,9 +316,10 @@ public:    unsigned CurrentGeneration;    /// \brief Set up the EarlyCSE runner for a particular function. -  EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI, -           DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA) -      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), +  EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI, +           const TargetTransformInfo &TTI, DominatorTree &DT, +           AssumptionCache &AC, MemorySSA *MSSA) +      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),          MSSAUpdater(make_unique<MemorySSAUpdater>(MSSA)), CurrentGeneration(0) {    } @@ -616,8 +618,6 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {    /// stores which can occur in bitfield code among other things.    Instruction *LastStore = nullptr; -  const DataLayout &DL = BB->getModule()->getDataLayout(); -    // See if any instructions in the block can be eliminated.  If so, do it.  If    // not, add them to AvailableValues.    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { @@ -635,10 +635,16 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {      // Skip assume intrinsics, they don't really have side effects (although      // they're marked as such to ensure preservation of control dependencies), -    // and this pass will not disturb any of the assumption's control -    // dependencies. +    // and this pass will not bother with its removal. However, we should mark +    // its condition as true for all dominated blocks.      if (match(Inst, m_Intrinsic<Intrinsic::assume>())) { -      DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); +      auto *CondI = +          dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0)); +      if (CondI && SimpleValue::canHandle(CondI)) { +        DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst << '\n'); +        AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); +      } else +        DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');        continue;      } @@ -658,10 +664,25 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {      if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {        if (auto *CondI =                dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) { -        // The condition we're on guarding here is true for all dominated -        // locations. -        if (SimpleValue::canHandle(CondI)) +        if (SimpleValue::canHandle(CondI)) { +          // Do we already know the actual value of this condition? +          if (auto *KnownCond = AvailableValues.lookup(CondI)) { +            // Is the condition known to be true? +            if (isa<ConstantInt>(KnownCond) && +                cast<ConstantInt>(KnownCond)->isOneValue()) { +              DEBUG(dbgs() << "EarlyCSE removing guard: " << *Inst << '\n'); +              removeMSSA(Inst); +              Inst->eraseFromParent(); +              Changed = true; +              continue; +            } else +              // Use the known value if it wasn't true. +              cast<CallInst>(Inst)->setArgOperand(0, KnownCond); +          } +          // The condition we're on guarding here is true for all dominated +          // locations.            AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); +        }        }        // Guard intrinsics read all memory, but don't write any memory. @@ -673,7 +694,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {      // If the instruction can be simplified (e.g. X+0 = X) then replace it with      // its simpler value. -    if (Value *V = SimplifyInstruction(Inst, DL, &TLI, &DT, &AC)) { +    if (Value *V = SimplifyInstruction(Inst, SQ)) {        DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V << '\n');        bool Killed = false;        if (!Inst->use_empty()) { @@ -964,7 +985,7 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,    auto *MSSA =        UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr; -  EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); +  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);    if (!CSE.run())      return PreservedAnalyses::all(); @@ -1008,7 +1029,7 @@ public:      auto *MSSA =          UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr; -    EarlyCSE CSE(TLI, TTI, DT, AC, MSSA); +    EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);      return CSE.run();    } diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index be696df548d52..c04646eed49a8 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1687,7 +1687,7 @@ bool GVN::processInstruction(Instruction *I) {    // example if it determines that %y is equal to %x then the instruction    // "%z = and i32 %x, %y" becomes "%z = and i32 %x, %x" which we now simplify.    const DataLayout &DL = I->getModule()->getDataLayout(); -  if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) { +  if (Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC})) {      bool Changed = false;      if (!I->use_empty()) {        I->replaceAllUsesWith(V); diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index dcb2a4a0c6e6b..3953198fe6052 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -97,7 +97,7 @@ class IndVarSimplify {    TargetLibraryInfo *TLI;    const TargetTransformInfo *TTI; -  SmallVector<WeakVH, 16> DeadInsts; +  SmallVector<WeakTrackingVH, 16> DeadInsts;    bool Changed = false;    bool isValidRewrite(Value *FromVal, Value *ToVal); @@ -415,8 +415,8 @@ void IndVarSimplify::handleFloatingPointIV(Loop *L, PHINode *PN) {                                        Compare->getName());    // In the following deletions, PN may become dead and may be deleted. -  // Use a WeakVH to observe whether this happens. -  WeakVH WeakPH = PN; +  // Use a WeakTrackingVH to observe whether this happens. +  WeakTrackingVH WeakPH = PN;    // Delete the old floating point exit comparison.  The branch starts using the    // new comparison. @@ -451,7 +451,7 @@ void IndVarSimplify::rewriteNonIntegerIVs(Loop *L) {    //    BasicBlock *Header = L->getHeader(); -  SmallVector<WeakVH, 8> PHIs; +  SmallVector<WeakTrackingVH, 8> PHIs;    for (BasicBlock::iterator I = Header->begin();         PHINode *PN = dyn_cast<PHINode>(I); ++I)      PHIs.push_back(PN); @@ -901,7 +901,7 @@ class WidenIV {    PHINode *WidePhi;    Instruction *WideInc;    const SCEV *WideIncExpr; -  SmallVectorImpl<WeakVH> &DeadInsts; +  SmallVectorImpl<WeakTrackingVH> &DeadInsts;    SmallPtrSet<Instruction *,16> Widened;    SmallVector<NarrowIVDefUse, 8> NarrowIVUsers; @@ -941,20 +941,13 @@ class WidenIV {    }  public: -  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, -          ScalarEvolution *SEv, DominatorTree *DTree, -          SmallVectorImpl<WeakVH> &DI, bool HasGuards) : -    OrigPhi(WI.NarrowIV), -    WideType(WI.WidestNativeType), -    LI(LInfo), -    L(LI->getLoopFor(OrigPhi->getParent())), -    SE(SEv), -    DT(DTree), -    HasGuards(HasGuards), -    WidePhi(nullptr), -    WideInc(nullptr), -    WideIncExpr(nullptr), -    DeadInsts(DI) { +  WidenIV(const WideIVInfo &WI, LoopInfo *LInfo, ScalarEvolution *SEv, +          DominatorTree *DTree, SmallVectorImpl<WeakTrackingVH> &DI, +          bool HasGuards) +      : OrigPhi(WI.NarrowIV), WideType(WI.WidestNativeType), LI(LInfo), +        L(LI->getLoopFor(OrigPhi->getParent())), SE(SEv), DT(DTree), +        HasGuards(HasGuards), WidePhi(nullptr), WideInc(nullptr), +        WideIncExpr(nullptr), DeadInsts(DI) {      assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");      ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;    } diff --git a/lib/Transforms/Scalar/InferAddressSpaces.cpp b/lib/Transforms/Scalar/InferAddressSpaces.cpp index 9e2563879da2b..5e116ef2fe75e 100644 --- a/lib/Transforms/Scalar/InferAddressSpaces.cpp +++ b/lib/Transforms/Scalar/InferAddressSpaces.cpp @@ -138,7 +138,7 @@ private:    // Tries to infer the specific address space of each address expression in    // Postorder. -  void inferAddressSpaces(const std::vector<Value *> &Postorder, +  void inferAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,                            ValueToAddrSpaceMapTy *InferredAddrSpace) const;    bool isSafeToCastConstAddrSpace(Constant *C, unsigned NewAS) const; @@ -147,7 +147,7 @@ private:    // address spaces if InferredAddrSpace says so. Postorder is the postorder of    // all flat expressions in the use-def graph of function F.    bool -  rewriteWithNewAddressSpaces(const std::vector<Value *> &Postorder, +  rewriteWithNewAddressSpaces(ArrayRef<WeakTrackingVH> Postorder,                                const ValueToAddrSpaceMapTy &InferredAddrSpace,                                Function *F) const; @@ -162,7 +162,7 @@ private:      std::vector<std::pair<Value *, bool>> &PostorderStack,      DenseSet<Value *> &Visited) const; -  std::vector<Value *> collectFlatAddressExpressions(Function &F) const; +  std::vector<WeakTrackingVH> collectFlatAddressExpressions(Function &F) const;    Value *cloneValueWithNewAddressSpace(      Value *V, unsigned NewAddrSpace, @@ -274,16 +274,36 @@ void InferAddressSpaces::appendsFlatAddressExpressionToPostorderStack(      Value *V, std::vector<std::pair<Value *, bool>> &PostorderStack,      DenseSet<Value *> &Visited) const {    assert(V->getType()->isPointerTy()); + +  // Generic addressing expressions may be hidden in nested constant +  // expressions. +  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { +    // TODO: Look in non-address parts, like icmp operands. +    if (isAddressExpression(*CE) && Visited.insert(CE).second) +      PostorderStack.push_back(std::make_pair(CE, false)); + +    return; +  } +    if (isAddressExpression(*V) &&        V->getType()->getPointerAddressSpace() == FlatAddrSpace) { -    if (Visited.insert(V).second) +    if (Visited.insert(V).second) {        PostorderStack.push_back(std::make_pair(V, false)); + +      Operator *Op = cast<Operator>(V); +      for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I) { +        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Op->getOperand(I))) { +          if (isAddressExpression(*CE) && Visited.insert(CE).second) +            PostorderStack.emplace_back(CE, false); +        } +      } +    }    }  }  // Returns all flat address expressions in function F. The elements are ordered  // ordered in postorder. -std::vector<Value *> +std::vector<WeakTrackingVH>  InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {    // This function implements a non-recursive postorder traversal of a partial    // use-def graph of function F. @@ -326,21 +346,25 @@ InferAddressSpaces::collectFlatAddressExpressions(Function &F) const {          PushPtrOperand(Cmp->getOperand(0));          PushPtrOperand(Cmp->getOperand(1));        } +    } else if (auto *ASC = dyn_cast<AddrSpaceCastInst>(&I)) { +      if (!ASC->getType()->isVectorTy()) +        PushPtrOperand(ASC->getPointerOperand());      }    } -  std::vector<Value *> Postorder; // The resultant postorder. +  std::vector<WeakTrackingVH> Postorder; // The resultant postorder.    while (!PostorderStack.empty()) { +    Value *TopVal = PostorderStack.back().first;      // If the operands of the expression on the top are already explored,      // adds that expression to the resultant postorder.      if (PostorderStack.back().second) { -      Postorder.push_back(PostorderStack.back().first); +      Postorder.push_back(TopVal);        PostorderStack.pop_back();        continue;      }      // Otherwise, adds its operands to the stack and explores them.      PostorderStack.back().second = true; -    for (Value *PtrOperand : getPointerOperands(*PostorderStack.back().first)) { +    for (Value *PtrOperand : getPointerOperands(*TopVal)) {        appendsFlatAddressExpressionToPostorderStack(PtrOperand, PostorderStack,                                                     Visited);      } @@ -559,7 +583,7 @@ bool InferAddressSpaces::runOnFunction(Function &F) {      return false;    // Collects all flat address expressions in postorder. -  std::vector<Value *> Postorder = collectFlatAddressExpressions(F); +  std::vector<WeakTrackingVH> Postorder = collectFlatAddressExpressions(F);    // Runs a data-flow analysis to refine the address spaces of every expression    // in Postorder. @@ -571,8 +595,10 @@ bool InferAddressSpaces::runOnFunction(Function &F) {    return rewriteWithNewAddressSpaces(Postorder, InferredAddrSpace, &F);  } +// Constants need to be tracked through RAUW to handle cases with nested +// constant expressions, so wrap values in WeakTrackingVH.  void InferAddressSpaces::inferAddressSpaces( -    const std::vector<Value *> &Postorder, +    ArrayRef<WeakTrackingVH> Postorder,      ValueToAddrSpaceMapTy *InferredAddrSpace) const {    SetVector<Value *> Worklist(Postorder.begin(), Postorder.end());    // Initially, all expressions are in the uninitialized address space. @@ -784,8 +810,8 @@ static Value::use_iterator skipToNextUser(Value::use_iterator I,  }  bool InferAddressSpaces::rewriteWithNewAddressSpaces( -  const std::vector<Value *> &Postorder, -  const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const { +    ArrayRef<WeakTrackingVH> Postorder, +    const ValueToAddrSpaceMapTy &InferredAddrSpace, Function *F) const {    // For each address expression to be modified, creates a clone of it with its    // pointer operands converted to the new address space. Since the pointer    // operands are converted, the clone is naturally in the new address space by @@ -812,8 +838,12 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(      NewV->setOperand(OperandNo, ValueWithNewAddrSpace.lookup(UndefUse->get()));    } +  SmallVector<Instruction *, 16> DeadInstructions; +    // Replaces the uses of the old address expressions with the new ones. -  for (Value *V : Postorder) { +  for (const WeakTrackingVH &WVH : Postorder) { +    assert(WVH && "value was unexpectedly deleted"); +    Value *V = WVH;      Value *NewV = ValueWithNewAddrSpace.lookup(V);      if (NewV == nullptr)        continue; @@ -821,6 +851,17 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(      DEBUG(dbgs() << "Replacing the uses of " << *V                   << "\n  with\n  " << *NewV << '\n'); +    if (Constant *C = dyn_cast<Constant>(V)) { +      Constant *Replace = ConstantExpr::getAddrSpaceCast(cast<Constant>(NewV), +                                                         C->getType()); +      if (C != Replace) { +        DEBUG(dbgs() << "Inserting replacement const cast: " +              << Replace << ": " << *Replace << '\n'); +        C->replaceAllUsesWith(Replace); +        V = Replace; +      } +    } +      Value::use_iterator I, E, Next;      for (I = V->use_begin(), E = V->use_end(); I != E; ) {        Use &U = *I; @@ -881,6 +922,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(            }          } +        if (AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(CurUser)) { +          unsigned NewAS = NewV->getType()->getPointerAddressSpace(); +          if (ASC->getDestAddressSpace() == NewAS) { +            ASC->replaceAllUsesWith(NewV); +            DeadInstructions.push_back(ASC); +            continue; +          } +        } +          // Otherwise, replaces the use with flat(NewV).          if (Instruction *I = dyn_cast<Instruction>(V)) {            BasicBlock::iterator InsertPos = std::next(I->getIterator()); @@ -894,10 +944,15 @@ bool InferAddressSpaces::rewriteWithNewAddressSpaces(        }      } -    if (V->use_empty()) -      RecursivelyDeleteTriviallyDeadInstructions(V); +    if (V->use_empty()) { +      if (Instruction *I = dyn_cast<Instruction>(V)) +        DeadInstructions.push_back(I); +    }    } +  for (Instruction *I : DeadInstructions) +    RecursivelyDeleteTriviallyDeadInstructions(I); +    return true;  } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index a0da81605a809..7dacaba1193e8 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -557,7 +557,7 @@ bool JumpThreadingPass::ComputeValueKnownInPredecessors(          Value *LHS = PN->getIncomingValue(i);          Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB); -        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, DL); +        Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, {DL});          if (!Res) {            if (!isa<Constant>(RHS))              continue; @@ -1250,37 +1250,53 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,    BasicBlock *OnlyDest = nullptr;    BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL; +  Constant *OnlyVal = nullptr; +  Constant *MultipleVal = (Constant *)(intptr_t)~0ULL; +  unsigned PredWithKnownDest = 0;    for (const auto &PredValue : PredValues) {      BasicBlock *Pred = PredValue.second;      if (!SeenPreds.insert(Pred).second)        continue;  // Duplicate predecessor entry. -    // If the predecessor ends with an indirect goto, we can't change its -    // destination. -    if (isa<IndirectBrInst>(Pred->getTerminator())) -      continue; -      Constant *Val = PredValue.first;      BasicBlock *DestBB;      if (isa<UndefValue>(Val))        DestBB = nullptr; -    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) +    else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) { +      assert(isa<ConstantInt>(Val) && "Expecting a constant integer");        DestBB = BI->getSuccessor(cast<ConstantInt>(Val)->isZero()); -    else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { +    } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) { +      assert(isa<ConstantInt>(Val) && "Expecting a constant integer");        DestBB = SI->findCaseValue(cast<ConstantInt>(Val))->getCaseSuccessor();      } else {        assert(isa<IndirectBrInst>(BB->getTerminator())                && "Unexpected terminator"); +      assert(isa<BlockAddress>(Val) && "Expecting a constant blockaddress");        DestBB = cast<BlockAddress>(Val)->getBasicBlock();      }      // If we have exactly one destination, remember it for efficiency below. -    if (PredToDestList.empty()) +    if (PredToDestList.empty()) {        OnlyDest = DestBB; -    else if (OnlyDest != DestBB) -      OnlyDest = MultipleDestSentinel; +      OnlyVal = Val; +    } else { +      if (OnlyDest != DestBB) +        OnlyDest = MultipleDestSentinel; +      // It possible we have same destination, but different value, e.g. default +      // case in switchinst. +      if (Val != OnlyVal) +        OnlyVal = MultipleVal; +    } + +    // We know where this predecessor is going. +    ++PredWithKnownDest; + +    // If the predecessor ends with an indirect goto, we can't change its +    // destination. +    if (isa<IndirectBrInst>(Pred->getTerminator())) +      continue;      PredToDestList.push_back(std::make_pair(Pred, DestBB));    } @@ -1293,7 +1309,7 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,    // not thread. By doing so, we do not need to duplicate the current block and    // also miss potential opportunities in case we dont/cant duplicate.    if (OnlyDest && OnlyDest != MultipleDestSentinel) { -    if (PredToDestList.size() == +    if (PredWithKnownDest ==          (size_t)std::distance(pred_begin(BB), pred_end(BB))) {        bool SeenFirstBranchToOnlyDest = false;        for (BasicBlock *SuccBB : successors(BB)) { @@ -1310,11 +1326,18 @@ bool JumpThreadingPass::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,        // If the condition is now dead due to the removal of the old terminator,        // erase it. -      auto *CondInst = dyn_cast<Instruction>(Cond); -      if (CondInst && CondInst->use_empty()) -        CondInst->eraseFromParent(); -      // FIXME: in case this instruction is defined in the current BB and it -      // resolves to a single value from all predecessors, we can do RAUW. +      if (auto *CondInst = dyn_cast<Instruction>(Cond)) { +        if (CondInst->use_empty() && !CondInst->mayHaveSideEffects()) +          CondInst->eraseFromParent(); +        else if (OnlyVal && OnlyVal != MultipleVal && +                 CondInst->getParent() == BB) { +          // If we just learned Cond is the same value for all uses of the +          // condition, replace it with a constant value +          CondInst->replaceAllUsesWith(OnlyVal); +          if (!CondInst->mayHaveSideEffects()) +            CondInst->eraseFromParent(); +        } +      }        return true;      }    } @@ -1883,8 +1906,9 @@ bool JumpThreadingPass::DuplicateCondBranchOnPHIIntoPred(      // If this instruction can be simplified after the operands are updated,      // just use the simplified value instead.  This frequently happens due to      // phi translation. -    if (Value *IV = -            SimplifyInstruction(New, BB->getModule()->getDataLayout())) { +    if (Value *IV = SimplifyInstruction( +            New, +            {BB->getModule()->getDataLayout(), TLI, nullptr, nullptr, New})) {        ValueMapping[&*BI] = IV;        if (!New->mayHaveSideEffects()) {          delete New; diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 5042fc18d7c49..410fbb03068f4 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -499,7 +499,7 @@ bool LoopIdiomRecognize::runOnLoopBlock(      Instruction *Inst = &*I++;      // Look for memset instructions, which may be optimized to a larger memset.      if (MemSetInst *MSI = dyn_cast<MemSetInst>(Inst)) { -      WeakVH InstPtr(&*I); +      WeakTrackingVH InstPtr(&*I);        if (!processLoopMemSet(MSI, BECount))          continue;        MadeChange = true; @@ -856,7 +856,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(  /// If the stored value is a strided load in the same loop with the same stride  /// this may be transformable into a memcpy.  This kicks in for stuff like -///   for (i) A[i] = B[i]; +/// for (i) A[i] = B[i];  bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,                                                      const SCEV *BECount) {    assert(SI->isSimple() && "Expected only non-volatile stores."); diff --git a/lib/Transforms/Scalar/LoopInstSimplify.cpp b/lib/Transforms/Scalar/LoopInstSimplify.cpp index 28e71ca05436c..af095560cc025 100644 --- a/lib/Transforms/Scalar/LoopInstSimplify.cpp +++ b/lib/Transforms/Scalar/LoopInstSimplify.cpp @@ -77,7 +77,7 @@ static bool SimplifyLoopInst(Loop *L, DominatorTree *DT, LoopInfo *LI,          // Don't bother simplifying unused instructions.          if (!I->use_empty()) { -          Value *V = SimplifyInstruction(I, DL, TLI, DT, AC); +          Value *V = SimplifyInstruction(I, {DL, TLI, DT, AC});            if (V && LI->replacementPreservesLCSSAForm(I, V)) {              // Mark all uses for resimplification next time round the loop.              for (User *U : I->users()) diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 8ce96cf1b7a67..2ba9265566a8c 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -341,7 +341,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {      // With the operands remapped, see if the instruction constant folds or is      // otherwise simplifyable.  This commonly occurs because the entry from PHI      // nodes allows icmps and other instructions to fold. -    Value *V = SimplifyInstruction(C, SQ.getWithInstruction(C)); +    Value *V = SimplifyInstruction(C, SQ);      if (V && LI->replacementPreservesLCSSAForm(C, V)) {        // If so, then delete the temporary instruction and stick the folded value        // in the map. @@ -670,8 +670,9 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM,                                        LPMUpdater &) {    int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;    const DataLayout &DL = L.getHeader()->getModule()->getDataLayout(); -  const SimplifyQuery SQ(DL, &AR.TLI, &AR.DT, &AR.AC); -  LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, SQ); +  const SimplifyQuery SQ = getBestSimplifyQuery(AR, DL); +  LoopRotate LR(Threshold, &AR.LI, &AR.TTI, &AR.AC, &AR.DT, &AR.SE, +                SQ);    bool Changed = LR.processLoop(&L);    if (!Changed) @@ -714,10 +715,7 @@ public:      auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;      auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();      auto *SE = SEWP ? &SEWP->getSE() : nullptr; -    auto *TLIWP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); -    auto *TLI = TLIWP ? &TLIWP->getTLI() : nullptr; -    const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); -    const SimplifyQuery SQ(DL, TLI, DT, AC); +    const SimplifyQuery SQ = getBestSimplifyQuery(*this, F);      LoopRotate LR(MaxHeaderSize, LI, TTI, AC, DT, SE, SQ);      return LR.processLoop(L);    } diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index a5a81c33a8ebd..35c05e84fd68d 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -40,7 +40,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) {    bool Changed = false;    // Copy blocks into a temporary array to avoid iterator invalidation issues    // as we remove them. -  SmallVector<WeakVH, 16> Blocks(L.blocks()); +  SmallVector<WeakTrackingVH, 16> Blocks(L.blocks());    for (auto &Block : Blocks) {      // Attempt to merge blocks in the trivial case. Don't modify blocks which diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index af137f6faa634..ccedb98d7fa15 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -900,7 +900,7 @@ static bool isHighCostExpansion(const SCEV *S,  /// If any of the instructions is the specified set are trivially dead, delete  /// them and see if this makes any of their operands subsequently dead.  static bool -DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) { +DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakTrackingVH> &DeadInsts) {    bool Changed = false;    while (!DeadInsts.empty()) { @@ -1845,7 +1845,7 @@ class LSRInstance {    void FinalizeChain(IVChain &Chain);    void CollectChains();    void GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, -                       SmallVectorImpl<WeakVH> &DeadInsts); +                       SmallVectorImpl<WeakTrackingVH> &DeadInsts);    void CollectInterestingTypesAndFactors();    void CollectFixupsAndInitialFormulae(); @@ -1920,19 +1920,15 @@ class LSRInstance {                                    const LSRUse &LU,                                    SCEVExpander &Rewriter) const; -  Value *Expand(const LSRUse &LU, const LSRFixup &LF, -                const Formula &F, -                BasicBlock::iterator IP, -                SCEVExpander &Rewriter, -                SmallVectorImpl<WeakVH> &DeadInsts) const; +  Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, +                BasicBlock::iterator IP, SCEVExpander &Rewriter, +                SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;    void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, -                     const Formula &F, -                     SCEVExpander &Rewriter, -                     SmallVectorImpl<WeakVH> &DeadInsts) const; -  void Rewrite(const LSRUse &LU, const LSRFixup &LF, -               const Formula &F, +                     const Formula &F, SCEVExpander &Rewriter, +                     SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; +  void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,                 SCEVExpander &Rewriter, -               SmallVectorImpl<WeakVH> &DeadInsts) const; +               SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;    void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);  public: @@ -3014,7 +3010,7 @@ static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,  /// Generate an add or subtract for each IVInc in a chain to materialize the IV  /// user's operand from the previous IV user's operand.  void LSRInstance::GenerateIVChain(const IVChain &Chain, SCEVExpander &Rewriter, -                                  SmallVectorImpl<WeakVH> &DeadInsts) { +                                  SmallVectorImpl<WeakTrackingVH> &DeadInsts) {    // Find the new IVOperand for the head of the chain. It may have been replaced    // by LSR.    const IVInc &Head = Chain.Incs[0]; @@ -4759,12 +4755,10 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,  /// Emit instructions for the leading candidate expression for this LSRUse (this  /// is called "expanding"). -Value *LSRInstance::Expand(const LSRUse &LU, -                           const LSRFixup &LF, -                           const Formula &F, -                           BasicBlock::iterator IP, +Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, +                           const Formula &F, BasicBlock::iterator IP,                             SCEVExpander &Rewriter, -                           SmallVectorImpl<WeakVH> &DeadInsts) const { +                           SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {    if (LU.RigidFormula)      return LF.OperandValToReplace; @@ -4939,12 +4933,9 @@ Value *LSRInstance::Expand(const LSRUse &LU,  /// Helper for Rewrite. PHI nodes are special because the use of their operands  /// effectively happens in their predecessor blocks, so the expression may need  /// to be expanded in multiple places. -void LSRInstance::RewriteForPHI(PHINode *PN, -                                const LSRUse &LU, -                                const LSRFixup &LF, -                                const Formula &F, -                                SCEVExpander &Rewriter, -                                SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::RewriteForPHI( +    PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, +    SCEVExpander &Rewriter, SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {    DenseMap<BasicBlock *, Value *> Inserted;    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)      if (PN->getIncomingValue(i) == LF.OperandValToReplace) { @@ -5016,11 +5007,9 @@ void LSRInstance::RewriteForPHI(PHINode *PN,  /// Emit instructions for the leading candidate expression for this LSRUse (this  /// is called "expanding"), and update the UserInst to reference the newly  /// expanded value. -void LSRInstance::Rewrite(const LSRUse &LU, -                          const LSRFixup &LF, -                          const Formula &F, -                          SCEVExpander &Rewriter, -                          SmallVectorImpl<WeakVH> &DeadInsts) const { +void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, +                          const Formula &F, SCEVExpander &Rewriter, +                          SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {    // First, find an insertion point that dominates UserInst. For PHI nodes,    // find the nearest block which dominates all the relevant uses.    if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { @@ -5058,7 +5047,7 @@ void LSRInstance::ImplementSolution(      const SmallVectorImpl<const Formula *> &Solution) {    // Keep track of instructions we may have made dead, so that    // we can remove them after we are done working. -  SmallVector<WeakVH, 16> DeadInsts; +  SmallVector<WeakTrackingVH, 16> DeadInsts;    SCEVExpander Rewriter(SE, L->getHeader()->getModule()->getDataLayout(),                          "lsr"); @@ -5308,7 +5297,7 @@ static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,    // Remove any extra phis created by processing inner loops.    Changed |= DeleteDeadPHIs(L->getHeader());    if (EnablePhiElim && L->isLoopSimplifyForm()) { -    SmallVector<WeakVH, 16> DeadInsts; +    SmallVector<WeakTrackingVH, 16> DeadInsts;      const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();      SCEVExpander Rewriter(SE, DL, "lsr");  #ifndef NDEBUG diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 8fa806a7e8bcc..6ef1464e9338e 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -1231,11 +1231,12 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,    LoopProcessWorklist.push_back(NewLoop);    redoLoop = true; -  // Keep a WeakVH holding onto LIC.  If the first call to RewriteLoopBody +  // Keep a WeakTrackingVH holding onto LIC.  If the first call to +  // RewriteLoopBody    // deletes the instruction (for example by simplifying a PHI that feeds into    // the condition that we're unswitching on), we don't rewrite the second    // iteration. -  WeakVH LICHandle(LIC); +  WeakTrackingVH LICHandle(LIC);    // Now we rewrite the original code to know that the condition is true and the    // new code to know that the condition is false. @@ -1262,7 +1263,7 @@ static void RemoveFromWorklist(Instruction *I,  static void ReplaceUsesOfWith(Instruction *I, Value *V,                                std::vector<Instruction*> &Worklist,                                Loop *L, LPPassManager *LPM) { -  DEBUG(dbgs() << "Replace with '" << *V << "': " << *I); +  DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n");    // Add uses to the worklist, which may be dead now.    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) @@ -1275,7 +1276,8 @@ static void ReplaceUsesOfWith(Instruction *I, Value *V,    LPM->deleteSimpleAnalysisValue(I, L);    RemoveFromWorklist(I, Worklist);    I->replaceAllUsesWith(V); -  I->eraseFromParent(); +  if (!I->mayHaveSideEffects()) +    I->eraseFromParent();    ++NumSimplify;  } @@ -1431,7 +1433,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {      // Simple DCE.      if (isInstructionTriviallyDead(I)) { -      DEBUG(dbgs() << "Remove dead instruction '" << *I); +      DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n");        // Add uses to the worklist, which may be dead now.        for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp index a3f3f25c1e0f6..21a632073da7a 100644 --- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp +++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp @@ -1323,7 +1323,7 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {    // Get the alignment of the byval.  If the call doesn't specify the alignment,    // then it is some target specific value that we can't know. -  unsigned ByValAlign = CS.getParamAlignment(ArgNo+1); +  unsigned ByValAlign = CS.getParamAlignment(ArgNo);    if (ByValAlign == 0) return false;    // If it is greater than the memcpy, then we check to see if we can force the diff --git a/lib/Transforms/Scalar/NaryReassociate.cpp b/lib/Transforms/Scalar/NaryReassociate.cpp index c5bf2f28d1852..d0bfe36038973 100644 --- a/lib/Transforms/Scalar/NaryReassociate.cpp +++ b/lib/Transforms/Scalar/NaryReassociate.cpp @@ -211,7 +211,8 @@ bool NaryReassociatePass::doOneIteration(Function &F) {            Changed = true;            SE->forgetValue(&*I);            I->replaceAllUsesWith(NewI); -          // If SeenExprs constains I's WeakVH, that entry will be replaced with +          // If SeenExprs constains I's WeakTrackingVH, that entry will be +          // replaced with            // nullptr.            RecursivelyDeleteTriviallyDeadInstructions(&*I, TLI);            I = NewI->getIterator(); @@ -219,7 +220,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {          // Add the rewritten instruction to SeenExprs; the original instruction          // is deleted.          const SCEV *NewSCEV = SE->getSCEV(&*I); -        SeenExprs[NewSCEV].push_back(WeakVH(&*I)); +        SeenExprs[NewSCEV].push_back(WeakTrackingVH(&*I));          // Ideally, NewSCEV should equal OldSCEV because tryReassociate(I)          // is equivalent to I. However, ScalarEvolution::getSCEV may          // weaken nsw causing NewSCEV not to equal OldSCEV. For example, suppose @@ -239,7 +240,7 @@ bool NaryReassociatePass::doOneIteration(Function &F) {          //          // This improvement is exercised in @reassociate_gep_nsw in nary-gep.ll.          if (NewSCEV != OldSCEV) -          SeenExprs[OldSCEV].push_back(WeakVH(&*I)); +          SeenExprs[OldSCEV].push_back(WeakTrackingVH(&*I));        }      }    } @@ -494,7 +495,8 @@ NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,    // future instruction either. Therefore, we pop it out of the stack. This    // optimization makes the algorithm O(n).    while (!Candidates.empty()) { -    // Candidates stores WeakVHs, so a candidate can be nullptr if it's removed +    // Candidates stores WeakTrackingVHs, so a candidate can be nullptr if it's +    // removed      // during rewriting.      if (Value *Candidate = Candidates.back()) {        Instruction *CandidateInstruction = cast<Instruction>(Candidate); diff --git a/lib/Transforms/Scalar/NewGVN.cpp b/lib/Transforms/Scalar/NewGVN.cpp index a014ddd9ba0a4..162d91beae764 100644 --- a/lib/Transforms/Scalar/NewGVN.cpp +++ b/lib/Transforms/Scalar/NewGVN.cpp @@ -395,7 +395,6 @@ namespace {  class NewGVN {    Function &F;    DominatorTree *DT; -  AssumptionCache *AC;    const TargetLibraryInfo *TLI;    AliasAnalysis *AA;    MemorySSA *MSSA; @@ -405,6 +404,7 @@ class NewGVN {    BumpPtrAllocator ExpressionAllocator;    ArrayRecycler<Value *> ArgRecycler;    TarjanSCC SCCFinder; +  const SimplifyQuery SQ;    // Number of function arguments, used by ranking    unsigned int NumFuncArgs; @@ -504,8 +504,9 @@ public:    NewGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,           TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA,           const DataLayout &DL) -      : F(F), DT(DT), AC(AC), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), -        PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)) {} +      : F(F), DT(DT), TLI(TLI), AA(AA), MSSA(MSSA), DL(DL), +        PredInfo(make_unique<PredicateInfo>(F, *DT, *AC)), SQ(DL, TLI, DT, AC) { +  }    bool runGVN();  private: @@ -782,8 +783,7 @@ const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,    E->op_push_back(lookupOperandLeader(Arg1));    E->op_push_back(lookupOperandLeader(Arg2)); -  Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), DL, TLI, -                           DT, AC); +  Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), SQ);    if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V))      return SimplifiedE;    return E; @@ -864,8 +864,8 @@ const Expression *NewGVN::createExpression(Instruction *I) {             "Wrong types on cmp instruction");      assert((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&              E->getOperand(1)->getType() == I->getOperand(1)->getType())); -    Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), -                               DL, TLI, DT, AC); +    Value *V = +        SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1), SQ);      if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))        return SimplifiedE;    } else if (isa<SelectInst>(I)) { @@ -874,23 +874,23 @@ const Expression *NewGVN::createExpression(Instruction *I) {        assert(E->getOperand(1)->getType() == I->getOperand(1)->getType() &&               E->getOperand(2)->getType() == I->getOperand(2)->getType());        Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1), -                                    E->getOperand(2), DL, TLI, DT, AC); +                                    E->getOperand(2), SQ);        if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))          return SimplifiedE;      }    } else if (I->isBinaryOp()) { -    Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), -                             DL, TLI, DT, AC); +    Value *V = +        SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1), SQ);      if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))        return SimplifiedE;    } else if (auto *BI = dyn_cast<BitCastInst>(I)) { -    Value *V = SimplifyInstruction(BI, DL, TLI, DT, AC); +    Value *V = +        SimplifyCastInst(BI->getOpcode(), BI->getOperand(0), BI->getType(), SQ);      if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))        return SimplifiedE;    } else if (isa<GetElementPtrInst>(I)) { -    Value *V = SimplifyGEPInst(E->getType(), -                               ArrayRef<Value *>(E->op_begin(), E->op_end()), -                               DL, TLI, DT, AC); +    Value *V = SimplifyGEPInst( +        E->getType(), ArrayRef<Value *>(E->op_begin(), E->op_end()), SQ);      if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))        return SimplifiedE;    } else if (AllConstant) { @@ -1628,15 +1628,15 @@ const Expression *NewGVN::performSymbolicCmpEvaluation(Instruction *I) {          if (PBranch->TrueEdge) {            // If we know the previous predicate is true and we are in the true            // edge then we may be implied true or false. -          if (CmpInst::isImpliedTrueByMatchingCmp(OurPredicate, -                                                  BranchPredicate)) { +          if (CmpInst::isImpliedTrueByMatchingCmp(BranchPredicate, +                                                  OurPredicate)) {              addPredicateUsers(PI, I);              return createConstantExpression(                  ConstantInt::getTrue(CI->getType()));            } -          if (CmpInst::isImpliedFalseByMatchingCmp(OurPredicate, -                                                   BranchPredicate)) { +          if (CmpInst::isImpliedFalseByMatchingCmp(BranchPredicate, +                                                   OurPredicate)) {              addPredicateUsers(PI, I);              return createConstantExpression(                  ConstantInt::getFalse(CI->getType())); diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 3dcab60907896..ef29d4141600a 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -982,7 +982,7 @@ static unsigned FindInOperandList(SmallVectorImpl<ValueEntry> &Ops, unsigned i,  /// Emit a tree of add instructions, summing Ops together  /// and returning the result.  Insert the tree before I.  static Value *EmitAddTreeOfValues(Instruction *I, -                                  SmallVectorImpl<WeakVH> &Ops){ +                                  SmallVectorImpl<WeakTrackingVH> &Ops) {    if (Ops.size() == 1) return Ops.back();    Value *V1 = Ops.back(); @@ -1559,7 +1559,7 @@ Value *ReassociatePass::OptimizeAdd(Instruction *I,              ? BinaryOperator::CreateAdd(MaxOccVal, MaxOccVal)              : BinaryOperator::CreateFAdd(MaxOccVal, MaxOccVal); -    SmallVector<WeakVH, 4> NewMulOps; +    SmallVector<WeakTrackingVH, 4> NewMulOps;      for (unsigned i = 0; i != Ops.size(); ++i) {        // Only try to remove factors from expressions we're allowed to.        BinaryOperator *BOp = diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index f344eb151464a..c11247c06b856 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -1128,39 +1128,23 @@ normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,  // Create new attribute set containing only attributes which can be transferred  // from original call to the safepoint. -static AttributeList legalizeCallAttributes(AttributeList AS) { -  AttributeList Ret; - -  for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) { -    unsigned Index = AS.getSlotIndex(Slot); - -    if (Index == AttributeList::ReturnIndex || -        Index == AttributeList::FunctionIndex) { - -      for (Attribute Attr : make_range(AS.begin(Slot), AS.end(Slot))) { - -        // Do not allow certain attributes - just skip them -        // Safepoint can not be read only or read none. -        if (Attr.hasAttribute(Attribute::ReadNone) || -            Attr.hasAttribute(Attribute::ReadOnly)) -          continue; - -        // These attributes control the generation of the gc.statepoint call / -        // invoke itself; and once the gc.statepoint is in place, they're of no -        // use. -        if (isStatepointDirectiveAttr(Attr)) -          continue; - -        Ret = Ret.addAttributes( -            AS.getContext(), Index, -            AttributeList::get(AS.getContext(), Index, AttrBuilder(Attr))); -      } -    } - -    // Just skip parameter attributes for now -  } - -  return Ret; +static AttributeList legalizeCallAttributes(AttributeList AL) { +  if (AL.isEmpty()) +    return AL; + +  // Remove the readonly, readnone, and statepoint function attributes. +  AttrBuilder FnAttrs = AL.getFnAttributes(); +  FnAttrs.removeAttribute(Attribute::ReadNone); +  FnAttrs.removeAttribute(Attribute::ReadOnly); +  for (Attribute A : AL.getFnAttributes()) { +    if (isStatepointDirectiveAttr(A)) +      FnAttrs.remove(A); +  } + +  // Just skip parameter and return attributes for now +  LLVMContext &Ctx = AL.getContext(); +  return AttributeList::get(Ctx, AttributeList::FunctionIndex, +                            AttributeSet::get(Ctx, FnAttrs));  }  /// Helper function to place all gc relocates necessary for the given @@ -1402,13 +1386,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */      Call->setCallingConv(ToReplace->getCallingConv());      // Currently we will fail on parameter attributes and on certain -    // function attributes. -    AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); -    // In case if we can handle this set of attributes - set up function attrs -    // directly on statepoint and return attrs later for gc_result intrinsic. -    Call->setAttributes(AttributeList::get(Call->getContext(), -                                           AttributeList::FunctionIndex, -                                           NewAttrs.getFnAttributes())); +    // function attributes.  In case if we can handle this set of attributes - +    // set up function attrs directly on statepoint and return attrs later for +    // gc_result intrinsic. +    Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));      Token = Call; @@ -1431,13 +1412,10 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */      Invoke->setCallingConv(ToReplace->getCallingConv());      // Currently we will fail on parameter attributes and on certain -    // function attributes. -    AttributeList NewAttrs = legalizeCallAttributes(ToReplace->getAttributes()); -    // In case if we can handle this set of attributes - set up function attrs -    // directly on statepoint and return attrs later for gc_result intrinsic. -    Invoke->setAttributes(AttributeList::get(Invoke->getContext(), -                                             AttributeList::FunctionIndex, -                                             NewAttrs.getFnAttributes())); +    // function attributes.  In case if we can handle this set of attributes - +    // set up function attrs directly on statepoint and return attrs later for +    // gc_result intrinsic. +    Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));      Token = Invoke; diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index d01e91a7f2356..1d9beffaf06bf 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -25,6 +25,7 @@  #include "llvm/Transforms/Scalar/SROA.h"  #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/AssumptionCache.h" @@ -2186,8 +2187,8 @@ class llvm::sroa::AllocaSliceRewriter    Instruction *OldPtr;    // Track post-rewrite users which are PHI nodes and Selects. -  SmallPtrSetImpl<PHINode *> &PHIUsers; -  SmallPtrSetImpl<SelectInst *> &SelectUsers; +  SmallSetVector<PHINode *, 8> &PHIUsers; +  SmallSetVector<SelectInst *, 8> &SelectUsers;    // Utility IR builder, whose name prefix is setup for each visited use, and    // the insertion point is set to point to the user. @@ -2199,8 +2200,8 @@ public:                        uint64_t NewAllocaBeginOffset,                        uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,                        VectorType *PromotableVecTy, -                      SmallPtrSetImpl<PHINode *> &PHIUsers, -                      SmallPtrSetImpl<SelectInst *> &SelectUsers) +                      SmallSetVector<PHINode *, 8> &PHIUsers, +                      SmallSetVector<SelectInst *, 8> &SelectUsers)        : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),          NewAllocaBeginOffset(NewAllocaBeginOffset),          NewAllocaEndOffset(NewAllocaEndOffset), @@ -3880,8 +3881,8 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,    // fact scheduled for promotion.    unsigned PPWOldSize = PostPromotionWorklist.size();    unsigned NumUses = 0; -  SmallPtrSet<PHINode *, 8> PHIUsers; -  SmallPtrSet<SelectInst *, 8> SelectUsers; +  SmallSetVector<PHINode *, 8> PHIUsers; +  SmallSetVector<SelectInst *, 8> SelectUsers;    AllocaSliceRewriter Rewriter(DL, AS, *this, AI, *NewAI, P.beginOffset(),                                 P.endOffset(), IsIntegerPromotable, VecTy, @@ -3902,19 +3903,16 @@ AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,    // Now that we've processed all the slices in the new partition, check if any    // PHIs or Selects would block promotion. -  for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(), -                                            E = PHIUsers.end(); -       I != E; ++I) -    if (!isSafePHIToSpeculate(**I)) { +  for (PHINode *PHI : PHIUsers) +    if (!isSafePHIToSpeculate(*PHI)) {        Promotable = false;        PHIUsers.clear();        SelectUsers.clear();        break;      } -  for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(), -                                               E = SelectUsers.end(); -       I != E; ++I) -    if (!isSafeSelectToSpeculate(**I)) { + +  for (SelectInst *Sel : SelectUsers) +    if (!isSafeSelectToSpeculate(*Sel)) {        Promotable = false;        PHIUsers.clear();        SelectUsers.clear(); diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index 00e3c95f6f06d..52201d8f3e51a 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -21,6 +21,7 @@  #include "llvm/Analysis/ScopedNoAliasAA.h"  #include "llvm/Analysis/TypeBasedAliasAnalysis.h"  #include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Verifier.h"  #include "llvm/InitializePasses.h" @@ -83,6 +84,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {    initializeCFGSimplifyPassPass(Registry);    initializeLateCFGSimplifyPassPass(Registry);    initializeStructurizeCFGPass(Registry); +  initializeSimpleLoopUnswitchLegacyPassPass(Registry);    initializeSinkingLegacyPassPass(Registry);    initializeTailCallElimPass(Registry);    initializeSeparateConstOffsetFromGEPPass(Registry); diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index 4d594532c3651..cde659b9d189f 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -1138,7 +1138,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {    // Add I to DominatingExprs if it's an add/sub that can't sign overflow.    if (match(I, m_NSWAdd(m_Value(LHS), m_Value(RHS))) ||        match(I, m_NSWSub(m_Value(LHS), m_Value(RHS)))) { -    if (isKnownNotFullPoison(I)) { +    if (programUndefinedIfFullPoison(I)) {        const SCEV *Key =            SE->getAddExpr(SE->getUnknown(LHS), SE->getUnknown(RHS));        DominatingExprs[Key].push_back(I); diff --git a/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp new file mode 100644 index 0000000000000..fb1b47c48276c --- /dev/null +++ b/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp @@ -0,0 +1,626 @@ +//===-- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow --------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Utils/LoopUtils.h" + +#define DEBUG_TYPE "simple-loop-unswitch" + +using namespace llvm; + +STATISTIC(NumBranches, "Number of branches unswitched"); +STATISTIC(NumSwitches, "Number of switches unswitched"); +STATISTIC(NumTrivial, "Number of unswitches that are trivial"); + +static void replaceLoopUsesWithConstant(Loop &L, Value &LIC, +                                        Constant &Replacement) { +  assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?"); + +  // Replace uses of LIC in the loop with the given constant. +  for (auto UI = LIC.use_begin(), UE = LIC.use_end(); UI != UE;) { +    // Grab the use and walk past it so we can clobber it in the use list. +    Use *U = &*UI++; +    Instruction *UserI = dyn_cast<Instruction>(U->getUser()); +    if (!UserI || !L.contains(UserI)) +      continue; + +    // Replace this use within the loop body. +    *U = &Replacement; +  } +} + +/// Update the dominator tree after removing one exiting predecessor of a loop +/// exit block. +static void updateLoopExitIDom(BasicBlock *LoopExitBB, Loop &L, +                                            DominatorTree &DT) { +  assert(pred_begin(LoopExitBB) != pred_end(LoopExitBB) && +         "Cannot have empty predecessors of the loop exit block if we split " +         "off a block to unswitch!"); + +  BasicBlock *IDom = *pred_begin(LoopExitBB); +  // Walk all of the other predecessors finding the nearest common dominator +  // until all predecessors are covered or we reach the loop header. The loop +  // header necessarily dominates all loop exit blocks in loop simplified form +  // so we can early-exit the moment we hit that block. +  for (auto PI = std::next(pred_begin(LoopExitBB)), PE = pred_end(LoopExitBB); +       PI != PE && IDom != L.getHeader(); ++PI) +    IDom = DT.findNearestCommonDominator(IDom, *PI); + +  DT.changeImmediateDominator(LoopExitBB, IDom); +} + +/// Update the dominator tree after unswitching a particular former exit block. +/// +/// This handles the full update of the dominator tree after hoisting a block +/// that previously was an exit block (or split off of an exit block) up to be +/// reached from the new immediate dominator of the preheader. +/// +/// The common case is simple -- we just move the unswitched block to have an +/// immediate dominator of the old preheader. But in complex cases, there may +/// be other blocks reachable from the unswitched block that are immediately +/// dominated by some node between the unswitched one and the old preheader. +/// All of these also need to be hoisted in the dominator tree. We also want to +/// minimize queries to the dominator tree because each step of this +/// invalidates any DFS numbers that would make queries fast. +static void updateDTAfterUnswitch(BasicBlock *UnswitchedBB, BasicBlock *OldPH, +                                  DominatorTree &DT) { +  DomTreeNode *OldPHNode = DT[OldPH]; +  DomTreeNode *UnswitchedNode = DT[UnswitchedBB]; +  // If the dominator tree has already been updated for this unswitched node, +  // we're done. This makes it easier to use this routine if there are multiple +  // paths to the same unswitched destination. +  if (UnswitchedNode->getIDom() == OldPHNode) +    return; + +  // First collect the domtree nodes that we are hoisting over. These are the +  // set of nodes which may have children that need to be hoisted as well. +  SmallPtrSet<DomTreeNode *, 4> DomChain; +  for (auto *IDom = UnswitchedNode->getIDom(); IDom != OldPHNode; +       IDom = IDom->getIDom()) +    DomChain.insert(IDom); + +  // The unswitched block ends up immediately dominated by the old preheader -- +  // regardless of whether it is the loop exit block or split off of the loop +  // exit block. +  DT.changeImmediateDominator(UnswitchedNode, OldPHNode); + +  // Blocks reachable from the unswitched block may need to change their IDom +  // as well. +  SmallSetVector<BasicBlock *, 4> Worklist; +  for (auto *SuccBB : successors(UnswitchedBB)) +    Worklist.insert(SuccBB); + +  // Walk the worklist. We grow the list in the loop and so must recompute size. +  for (int i = 0; i < (int)Worklist.size(); ++i) { +    auto *BB = Worklist[i]; + +    DomTreeNode *Node = DT[BB]; +    assert(!DomChain.count(Node) && +           "Cannot be dominated by a block you can reach!"); +    // If this block doesn't have an immediate dominator somewhere in the chain +    // we hoisted over, then its position in the domtree hasn't changed. Either +    // it is above the region hoisted and still valid, or it is below the +    // hoisted block and so was trivially updated. This also applies to +    // everything reachable from this block so we're completely done with the +    // it. +    if (!DomChain.count(Node->getIDom())) +      continue; + +    // We need to change the IDom for this node but also walk its successors +    // which could have similar dominance position. +    DT.changeImmediateDominator(Node, OldPHNode); +    for (auto *SuccBB : successors(BB)) +      Worklist.insert(SuccBB); +  } +} + +/// Unswitch a trivial branch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the branch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and one of the successors is a loop exit. This +/// allows us to unswitch without duplicating the loop, making it trivial. +/// +/// If this routine fails to unswitch the branch it returns false. +/// +/// If the branch can be unswitched, this routine splits the preheader and +/// hoists the branch above that split. Preserves loop simplified form +/// (splitting the exit block as necessary). It simplifies the branch within +/// the loop to an unconditional branch but doesn't remove it entirely. Further +/// cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT, +                                  LoopInfo &LI) { +  assert(BI.isConditional() && "Can only unswitch a conditional branch!"); +  DEBUG(dbgs() << "  Trying to unswitch branch: " << BI << "\n"); + +  Value *LoopCond = BI.getCondition(); + +  // Need a trivial loop condition to unswitch. +  if (!L.isLoopInvariant(LoopCond)) +    return false; + +  // FIXME: We should compute this once at the start and update it! +  SmallVector<BasicBlock *, 16> ExitBlocks; +  L.getExitBlocks(ExitBlocks); +  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), +                                             ExitBlocks.end()); + +  // Check to see if a successor of the branch is guaranteed to +  // exit through a unique exit block without having any +  // side-effects.  If so, determine the value of Cond that causes +  // it to do this. +  ConstantInt *CondVal = ConstantInt::getTrue(BI.getContext()); +  ConstantInt *Replacement = ConstantInt::getFalse(BI.getContext()); +  int LoopExitSuccIdx = 0; +  auto *LoopExitBB = BI.getSuccessor(0); +  if (!ExitBlockSet.count(LoopExitBB)) { +    std::swap(CondVal, Replacement); +    LoopExitSuccIdx = 1; +    LoopExitBB = BI.getSuccessor(1); +    if (!ExitBlockSet.count(LoopExitBB)) +      return false; +  } +  auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx); +  assert(L.contains(ContinueBB) && +         "Cannot have both successors exit and still be in the loop!"); + +  // If the loop exit block contains phi nodes, this isn't trivial. +  // FIXME: We should examine the PHI to determine whether or not we can handle +  // it trivially. +  if (isa<PHINode>(LoopExitBB->begin())) +    return false; + +  DEBUG(dbgs() << "    unswitching trivial branch when: " << CondVal +               << " == " << LoopCond << "\n"); + +  // Split the preheader, so that we know that there is a safe place to insert +  // the conditional branch. We will change the preheader to have a conditional +  // branch on LoopCond. +  BasicBlock *OldPH = L.getLoopPreheader(); +  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); + +  // Now that we have a place to insert the conditional branch, create a place +  // to branch to: this is the exit block out of the loop that we are +  // unswitching. We need to split this if there are other loop predecessors. +  // Because the loop is in simplified form, *any* other predecessor is enough. +  BasicBlock *UnswitchedBB; +  if (BasicBlock *PredBB = LoopExitBB->getUniquePredecessor()) { +    (void)PredBB; +    assert(PredBB == BI.getParent() && "A branch's parent is't a predecessor!"); +    UnswitchedBB = LoopExitBB; +  } else { +    UnswitchedBB = SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI); +  } + +  BasicBlock *ParentBB = BI.getParent(); + +  // Now splice the branch to gate reaching the new preheader and re-point its +  // successors. +  OldPH->getInstList().splice(std::prev(OldPH->end()), +                              BI.getParent()->getInstList(), BI); +  OldPH->getTerminator()->eraseFromParent(); +  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB); +  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH); + +  // Create a new unconditional branch that will continue the loop as a new +  // terminator. +  BranchInst::Create(ContinueBB, ParentBB); + +  // Now we need to update the dominator tree. +  updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); +  // But if we split something off of the loop exit block then we also removed +  // one of the predecessors for the loop exit block and may need to update its +  // idom. +  if (UnswitchedBB != LoopExitBB) +    updateLoopExitIDom(LoopExitBB, L, DT); + +  // Since this is an i1 condition we can also trivially replace uses of it +  // within the loop with a constant. +  replaceLoopUsesWithConstant(L, *LoopCond, *Replacement); + +  ++NumTrivial; +  ++NumBranches; +  return true; +} + +/// Unswitch a trivial switch if the condition is loop invariant. +/// +/// This routine should only be called when loop code leading to the switch has +/// been validated as trivial (no side effects). This routine checks if the +/// condition is invariant and that at least one of the successors is a loop +/// exit. This allows us to unswitch without duplicating the loop, making it +/// trivial. +/// +/// If this routine fails to unswitch the switch it returns false. +/// +/// If the switch can be unswitched, this routine splits the preheader and +/// copies the switch above that split. If the default case is one of the +/// exiting cases, it copies the non-exiting cases and points them at the new +/// preheader. If the default case is not exiting, it copies the exiting cases +/// and points the default at the preheader. It preserves loop simplified form +/// (splitting the exit blocks as necessary). It simplifies the switch within +/// the loop by removing now-dead cases. If the default case is one of those +/// unswitched, it replaces its destination with a new basic block containing +/// only unreachable. Such basic blocks, while technically loop exits, are not +/// considered for unswitching so this is a stable transform and the same +/// switch will not be revisited. If after unswitching there is only a single +/// in-loop successor, the switch is further simplified to an unconditional +/// branch. Still more cleanup can be done with some simplify-cfg like pass. +static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT, +                                  LoopInfo &LI) { +  DEBUG(dbgs() << "  Trying to unswitch switch: " << SI << "\n"); +  Value *LoopCond = SI.getCondition(); + +  // If this isn't switching on an invariant condition, we can't unswitch it. +  if (!L.isLoopInvariant(LoopCond)) +    return false; + +  // FIXME: We should compute this once at the start and update it! +  SmallVector<BasicBlock *, 16> ExitBlocks; +  L.getExitBlocks(ExitBlocks); +  SmallPtrSet<BasicBlock *, 16> ExitBlockSet(ExitBlocks.begin(), +                                             ExitBlocks.end()); + +  SmallVector<int, 4> ExitCaseIndices; +  for (auto Case : SI.cases()) { +    auto *SuccBB = Case.getCaseSuccessor(); +    if (ExitBlockSet.count(SuccBB) && !isa<PHINode>(SuccBB->begin())) +      ExitCaseIndices.push_back(Case.getCaseIndex()); +  } +  BasicBlock *DefaultExitBB = nullptr; +  if (ExitBlockSet.count(SI.getDefaultDest()) && +      !isa<PHINode>(SI.getDefaultDest()->begin()) && +      !isa<UnreachableInst>(SI.getDefaultDest()->getTerminator())) +    DefaultExitBB = SI.getDefaultDest(); +  else if (ExitCaseIndices.empty()) +    return false; + +  DEBUG(dbgs() << "    unswitching trivial cases...\n"); + +  SmallVector<std::pair<ConstantInt *, BasicBlock *>, 4> ExitCases; +  ExitCases.reserve(ExitCaseIndices.size()); +  // We walk the case indices backwards so that we remove the last case first +  // and don't disrupt the earlier indices. +  for (unsigned Index : reverse(ExitCaseIndices)) { +    auto CaseI = SI.case_begin() + Index; +    // Save the value of this case. +    ExitCases.push_back({CaseI->getCaseValue(), CaseI->getCaseSuccessor()}); +    // Delete the unswitched cases. +    SI.removeCase(CaseI); +  } + +  // Check if after this all of the remaining cases point at the same +  // successor. +  BasicBlock *CommonSuccBB = nullptr; +  if (SI.getNumCases() > 0 && +      std::all_of(std::next(SI.case_begin()), SI.case_end(), +                  [&SI](const SwitchInst::CaseHandle &Case) { +                    return Case.getCaseSuccessor() == +                           SI.case_begin()->getCaseSuccessor(); +                  })) +    CommonSuccBB = SI.case_begin()->getCaseSuccessor(); + +  if (DefaultExitBB) { +    // We can't remove the default edge so replace it with an edge to either +    // the single common remaining successor (if we have one) or an unreachable +    // block. +    if (CommonSuccBB) { +      SI.setDefaultDest(CommonSuccBB); +    } else { +      BasicBlock *ParentBB = SI.getParent(); +      BasicBlock *UnreachableBB = BasicBlock::Create( +          ParentBB->getContext(), +          Twine(ParentBB->getName()) + ".unreachable_default", +          ParentBB->getParent()); +      new UnreachableInst(ParentBB->getContext(), UnreachableBB); +      SI.setDefaultDest(UnreachableBB); +      DT.addNewBlock(UnreachableBB, ParentBB); +    } +  } else { +    // If we're not unswitching the default, we need it to match any cases to +    // have a common successor or if we have no cases it is the common +    // successor. +    if (SI.getNumCases() == 0) +      CommonSuccBB = SI.getDefaultDest(); +    else if (SI.getDefaultDest() != CommonSuccBB) +      CommonSuccBB = nullptr; +  } + +  // Split the preheader, so that we know that there is a safe place to insert +  // the switch. +  BasicBlock *OldPH = L.getLoopPreheader(); +  BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI); +  OldPH->getTerminator()->eraseFromParent(); + +  // Now add the unswitched switch. +  auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH); + +  // Split any exit blocks with remaining in-loop predecessors. We walk in +  // reverse so that we split in the same order as the cases appeared. This is +  // purely for convenience of reading the resulting IR, but it doesn't cost +  // anything really. +  SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap; +  // Handle the default exit if necessary. +  // FIXME: It'd be great if we could merge this with the loop below but LLVM's +  // ranges aren't quite powerful enough yet. +  if (DefaultExitBB && !pred_empty(DefaultExitBB)) { +    auto *SplitBB = +        SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI); +    updateLoopExitIDom(DefaultExitBB, L, DT); +    DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB; +  } +  // Note that we must use a reference in the for loop so that we update the +  // container. +  for (auto &CasePair : reverse(ExitCases)) { +    // Grab a reference to the exit block in the pair so that we can update it. +    BasicBlock *&ExitBB = CasePair.second; + +    // If this case is the last edge into the exit block, we can simply reuse it +    // as it will no longer be a loop exit. No mapping necessary. +    if (pred_empty(ExitBB)) +      continue; + +    // Otherwise we need to split the exit block so that we retain an exit +    // block from the loop and a target for the unswitched condition. +    BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB]; +    if (!SplitExitBB) { +      // If this is the first time we see this, do the split and remember it. +      SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI); +      updateLoopExitIDom(ExitBB, L, DT); +    } +    ExitBB = SplitExitBB; +  } + +  // Now add the unswitched cases. We do this in reverse order as we built them +  // in reverse order. +  for (auto CasePair : reverse(ExitCases)) { +    ConstantInt *CaseVal = CasePair.first; +    BasicBlock *UnswitchedBB = CasePair.second; + +    NewSI->addCase(CaseVal, UnswitchedBB); +    updateDTAfterUnswitch(UnswitchedBB, OldPH, DT); +  } + +  // If the default was unswitched, re-point it and add explicit cases for +  // entering the loop. +  if (DefaultExitBB) { +    NewSI->setDefaultDest(DefaultExitBB); +    updateDTAfterUnswitch(DefaultExitBB, OldPH, DT); + +    // We removed all the exit cases, so we just copy the cases to the +    // unswitched switch. +    for (auto Case : SI.cases()) +      NewSI->addCase(Case.getCaseValue(), NewPH); +  } + +  // If we ended up with a common successor for every path through the switch +  // after unswitching, rewrite it to an unconditional branch to make it easy +  // to recognize. Otherwise we potentially have to recognize the default case +  // pointing at unreachable and other complexity. +  if (CommonSuccBB) { +    BasicBlock *BB = SI.getParent(); +    SI.eraseFromParent(); +    BranchInst::Create(CommonSuccBB, BB); +  } + +  DT.verifyDomTree(); +  ++NumTrivial; +  ++NumSwitches; +  return true; +} + +/// This routine scans the loop to find a branch or switch which occurs before +/// any side effects occur. These can potentially be unswitched without +/// duplicating the loop. If a branch or switch is successfully unswitched the +/// scanning continues to see if subsequent branches or switches have become +/// trivial. Once all trivial candidates have been unswitched, this routine +/// returns. +/// +/// The return value indicates whether anything was unswitched (and therefore +/// changed). +static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT, +                                         LoopInfo &LI) { +  bool Changed = false; + +  // If loop header has only one reachable successor we should keep looking for +  // trivial condition candidates in the successor as well. An alternative is +  // to constant fold conditions and merge successors into loop header (then we +  // only need to check header's terminator). The reason for not doing this in +  // LoopUnswitch pass is that it could potentially break LoopPassManager's +  // invariants. Folding dead branches could either eliminate the current loop +  // or make other loops unreachable. LCSSA form might also not be preserved +  // after deleting branches. The following code keeps traversing loop header's +  // successors until it finds the trivial condition candidate (condition that +  // is not a constant). Since unswitching generates branches with constant +  // conditions, this scenario could be very common in practice. +  BasicBlock *CurrentBB = L.getHeader(); +  SmallPtrSet<BasicBlock *, 8> Visited; +  Visited.insert(CurrentBB); +  do { +    // Check if there are any side-effecting instructions (e.g. stores, calls, +    // volatile loads) in the part of the loop that the code *would* execute +    // without unswitching. +    if (llvm::any_of(*CurrentBB, +                     [](Instruction &I) { return I.mayHaveSideEffects(); })) +      return Changed; + +    TerminatorInst *CurrentTerm = CurrentBB->getTerminator(); + +    if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) { +      // Don't bother trying to unswitch past a switch with a constant +      // condition. This should be removed prior to running this pass by +      // simplify-cfg. +      if (isa<Constant>(SI->getCondition())) +        return Changed; + +      if (!unswitchTrivialSwitch(L, *SI, DT, LI)) +        // Coludn't unswitch this one so we're done. +        return Changed; + +      // Mark that we managed to unswitch something. +      Changed = true; + +      // If unswitching turned the terminator into an unconditional branch then +      // we can continue. The unswitching logic specifically works to fold any +      // cases it can into an unconditional branch to make it easier to +      // recognize here. +      auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator()); +      if (!BI || BI->isConditional()) +        return Changed; + +      CurrentBB = BI->getSuccessor(0); +      continue; +    } + +    auto *BI = dyn_cast<BranchInst>(CurrentTerm); +    if (!BI) +      // We do not understand other terminator instructions. +      return Changed; + +    // Don't bother trying to unswitch past an unconditional branch or a branch +    // with a constant value. These should be removed by simplify-cfg prior to +    // running this pass. +    if (!BI->isConditional() || isa<Constant>(BI->getCondition())) +      return Changed; + +    // Found a trivial condition candidate: non-foldable conditional branch. If +    // we fail to unswitch this, we can't do anything else that is trivial. +    if (!unswitchTrivialBranch(L, *BI, DT, LI)) +      return Changed; + +    // Mark that we managed to unswitch something. +    Changed = true; + +    // We unswitched the branch. This should always leave us with an +    // unconditional branch that we can follow now. +    BI = cast<BranchInst>(CurrentBB->getTerminator()); +    assert(!BI->isConditional() && +           "Cannot form a conditional branch by unswitching1"); +    CurrentBB = BI->getSuccessor(0); + +    // When continuing, if we exit the loop or reach a previous visited block, +    // then we can not reach any trivial condition candidates (unfoldable +    // branch instructions or switch instructions) and no unswitch can happen. +  } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second); + +  return Changed; +} + +/// Unswitch control flow predicated on loop invariant conditions. +/// +/// This first hoists all branches or switches which are trivial (IE, do not +/// require duplicating any part of the loop) out of the loop body. It then +/// looks at other loop invariant control flows and tries to unswitch those as +/// well by cloning the loop if the result is small enough. +static bool unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, +                         AssumptionCache &AC) { +  assert(L.isLCSSAForm(DT) && +         "Loops must be in LCSSA form before unswitching."); +  bool Changed = false; + +  // Must be in loop simplified form: we need a preheader and dedicated exits. +  if (!L.isLoopSimplifyForm()) +    return false; + +  // Try trivial unswitch first before loop over other basic blocks in the loop. +  Changed |= unswitchAllTrivialConditions(L, DT, LI); + +  // FIXME: Add support for non-trivial unswitching by cloning the loop. + +  return Changed; +} + +PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM, +                                              LoopStandardAnalysisResults &AR, +                                              LPMUpdater &U) { +  Function &F = *L.getHeader()->getParent(); +  (void)F; + +  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << L << "\n"); + +  if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC)) +    return PreservedAnalyses::all(); + +#ifndef NDEBUG +  // Historically this pass has had issues with the dominator tree so verify it +  // in asserts builds. +  AR.DT.verifyDomTree(); +#endif +  return getLoopPassPreservedAnalyses(); +} + +namespace { +class SimpleLoopUnswitchLegacyPass : public LoopPass { +public: +  static char ID; // Pass ID, replacement for typeid +  explicit SimpleLoopUnswitchLegacyPass() : LoopPass(ID) { +    initializeSimpleLoopUnswitchLegacyPassPass( +        *PassRegistry::getPassRegistry()); +  } + +  bool runOnLoop(Loop *L, LPPassManager &LPM) override; + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.addRequired<AssumptionCacheTracker>(); +    getLoopAnalysisUsage(AU); +  } +}; +} // namespace + +bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) { +  if (skipLoop(L)) +    return false; + +  Function &F = *L->getHeader()->getParent(); + +  DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *L << "\n"); + +  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); +  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); +  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + +  bool Changed = unswitchLoop(*L, DT, LI, AC); + +#ifndef NDEBUG +  // Historically this pass has had issues with the dominator tree so verify it +  // in asserts builds. +  DT.verifyDomTree(); +#endif +  return Changed; +} + +char SimpleLoopUnswitchLegacyPass::ID = 0; +INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", +                      "Simple unswitch loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopPass) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch", +                    "Simple unswitch loops", false, false) + +Pass *llvm::createSimpleLoopUnswitchLegacyPass() { +  return new SimpleLoopUnswitchLegacyPass(); +} diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 22af21d55c019..3d5cbfc93f2e6 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -78,8 +78,8 @@ void llvm::FoldSingleEntryPHINodes(BasicBlock *BB,  bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {    // Recursively deleting a PHI may cause multiple PHIs to be deleted -  // or RAUW'd undef, so use an array of WeakVH for the PHIs to delete. -  SmallVector<WeakVH, 8> PHIs; +  // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete. +  SmallVector<WeakTrackingVH, 8> PHIs;    for (BasicBlock::iterator I = BB->begin();         PHINode *PN = dyn_cast<PHINode>(I); ++I)      PHIs.push_back(PN); diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 385c12302e040..d5124ac89016f 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -245,7 +245,7 @@ namespace {  void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,                                         BasicBlock::const_iterator StartingInst,                                         std::vector<const BasicBlock*> &ToClone){ -  WeakVH &BBEntry = VMap[BB]; +  WeakTrackingVH &BBEntry = VMap[BB];    // Have we already cloned this block?    if (BBEntry) return; @@ -547,7 +547,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,    // Make a second pass over the PHINodes now that all of them have been    // remapped into the new function, simplifying the PHINode and performing any    // recursive simplifications exposed. This will transparently update the -  // WeakVH in the VMap. Notably, we rely on that so that if we coalesce +  // WeakTrackingVH in the VMap. Notably, we rely on that so that if we coalesce    // two PHINodes, the iteration over the old PHIs remains valid, and the    // mapping will just map us to the new node (which may not even be a PHI    // node). diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index 5d6fbc3325fff..6d56e08af99f9 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -1640,7 +1640,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,        // modify the struct.        if (CS.isByValArgument(ArgNo)) {          ActualArg = HandleByValArgument(ActualArg, TheCall, CalledFunc, IFI, -                                        CalledFunc->getParamAlignment(ArgNo+1)); +                                        CalledFunc->getParamAlignment(ArgNo));          if (ActualArg != *AI)            ByValInit.push_back(std::make_pair(ActualArg, (Value*) *AI));        } @@ -2302,7 +2302,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,      AssumptionCache *AC =          IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;      auto &DL = Caller->getParent()->getDataLayout(); -    if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) { +    if (Value *V = SimplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {        PHI->replaceAllUsesWith(V);        PHI->eraseFromParent();      } diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index fe93d6927c638..42aca757c2afd 100644 --- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -33,6 +33,7 @@  #include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/IR/CFG.h"  #include "llvm/IR/Constants.h" +#include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/IRBuilder.h"  #include "llvm/IR/InstVisitor.h" @@ -48,16 +49,6 @@ using namespace llvm;  STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted");  STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted"); -static cl::opt<bool> LibCallsShrinkWrapDoDomainError( -    "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden, -    cl::desc("Perform shrink-wrap on lib calls with domain errors")); -static cl::opt<bool> LibCallsShrinkWrapDoRangeError( -    "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden, -    cl::desc("Perform shrink-wrap on lib calls with range errors")); -static cl::opt<bool> LibCallsShrinkWrapDoPoleError( -    "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden, -    cl::desc("Perform shrink-wrap on lib calls with pole errors")); -  namespace {  class LibCallsShrinkWrapLegacyPass : public FunctionPass {  public: @@ -82,10 +73,11 @@ INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",  namespace {  class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> {  public: -  LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){}; -  bool isChanged() const { return Changed; } +  LibCallsShrinkWrap(const TargetLibraryInfo &TLI, DominatorTree *DT) +      : TLI(TLI), DT(DT){};    void visitCallInst(CallInst &CI) { checkCandidate(CI); } -  void perform() { +  bool perform() { +    bool Changed = false;      for (auto &CI : WorkList) {        DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()                     << "\n"); @@ -94,6 +86,7 @@ public:          DEBUG(dbgs() << "Transformed\n");        }      } +    return Changed;    }  private: @@ -134,8 +127,8 @@ private:    }    const TargetLibraryInfo &TLI; +  DominatorTree *DT;    SmallVector<CallInst *, 16> WorkList; -  bool Changed;  };  } // end anonymous namespace @@ -241,8 +234,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,    case LibFunc_atanhf: // Same as atanh    case LibFunc_atanhl: // Same as atanh    { -    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) -      return false;      ++NumWrappedTwoCond;      Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);      break; @@ -262,8 +253,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,    case LibFunc_logbf:  // Same as log    case LibFunc_logbl:  // Same as log    { -    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) -      return false;      ++NumWrappedOneCond;      Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);      break; @@ -274,8 +263,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,    case LibFunc_log1pf: // Same as log1p    case LibFunc_log1pl: // Same as log1p    { -    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError) -      return false;      ++NumWrappedOneCond;      Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);      break; @@ -285,9 +272,6 @@ bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,                       // RangeError:  overflow or underflow    case LibFunc_powf:    case LibFunc_powl: { -    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError || -        !LibCallsShrinkWrapDoRangeError) -      return false;      Cond = generateCondForPow(CI, Func);      if (Cond == nullptr)        return false; @@ -346,7 +330,7 @@ Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,      UpperBound = 11356.0f;      break;    default: -    llvm_unreachable("Should be reach here"); +    llvm_unreachable("Unhandled library call!");    }    ++NumWrappedOneCond; @@ -410,7 +394,7 @@ Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,      UpperBound = 11383.0f;      break;    default: -    llvm_unreachable("Should be reach here"); +    llvm_unreachable("Unhandled library call!");    }    ++NumWrappedTwoCond; @@ -499,14 +483,17 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,  // Wrap conditions that can potentially generate errno to the library call.  void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) { -  assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst"); +  assert(Cond != nullptr && "ShrinkWrapCI is not expecting an empty call inst");    MDNode *BranchWeights =        MDBuilder(CI->getContext()).createBranchWeights(1, 2000); +    TerminatorInst *NewInst = -      SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights); +      SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights, DT);    BasicBlock *CallBB = NewInst->getParent();    CallBB->setName("cdce.call"); -  CallBB->getSingleSuccessor()->setName("cdce.end"); +  BasicBlock *SuccBB = CallBB->getSingleSuccessor(); +  assert(SuccBB && "The split block should have a single successor"); +  SuccBB->setName("cdce.end");    CI->removeFromParent();    CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI);    DEBUG(dbgs() << "== Basic Block After =="); @@ -522,32 +509,38 @@ bool LibCallsShrinkWrap::perform(CallInst *CI) {    TLI.getLibFunc(*Callee, Func);    assert(Func && "perform() is not expecting an empty function"); -  if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func)) -    return true; - -  if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func)) +  if (performCallDomainErrorOnly(CI, Func) || performCallRangeErrorOnly(CI, Func))      return true; -    return performCallErrors(CI, Func);  }  void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.addPreserved<DominatorTreeWrapperPass>();    AU.addPreserved<GlobalsAAWrapperPass>();    AU.addRequired<TargetLibraryInfoWrapperPass>();  } -static bool runImpl(Function &F, const TargetLibraryInfo &TLI) { +static bool runImpl(Function &F, const TargetLibraryInfo &TLI, +                    DominatorTree *DT) {    if (F.hasFnAttribute(Attribute::OptimizeForSize))      return false; -  LibCallsShrinkWrap CCDCE(TLI); +  LibCallsShrinkWrap CCDCE(TLI, DT);    CCDCE.visit(F); -  CCDCE.perform(); -  return CCDCE.isChanged(); +  bool Changed = CCDCE.perform(); + +// Verify the dominator after we've updated it locally. +#ifndef NDEBUG +  if (DT) +    DT->verifyDomTree(); +#endif +  return Changed;  }  bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); -  return runImpl(F, TLI); +  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); +  auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; +  return runImpl(F, TLI, DT);  }  namespace llvm { @@ -561,11 +554,12 @@ FunctionPass *createLibCallsShrinkWrapPass() {  PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,                                                FunctionAnalysisManager &FAM) {    auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); -  bool Changed = runImpl(F, TLI); -  if (!Changed) +  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); +  if (!runImpl(F, TLI, DT))      return PreservedAnalyses::all();    auto PA = PreservedAnalyses();    PA.preserve<GlobalsAA>(); +  PA.preserve<DominatorTreeAnalysis>();    return PA;  }  } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index d3002c5fb7502..ce6b703f3528d 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -562,7 +562,7 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {    // that can be removed.    BB->removePredecessor(Pred, true); -  WeakVH PhiIt = &BB->front(); +  WeakTrackingVH PhiIt = &BB->front();    while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {      PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));      Value *OldPhiIt = PhiIt; @@ -1259,49 +1259,6 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {            DbgValues.push_back(DVI);  } -static void appendOffset(SmallVectorImpl<uint64_t> &Ops, int64_t Offset) { -  if (Offset > 0) { -    Ops.push_back(dwarf::DW_OP_plus); -    Ops.push_back(Offset); -  } else if (Offset < 0) { -    Ops.push_back(dwarf::DW_OP_minus); -    Ops.push_back(-Offset); -  } -} - -enum { WithStackValue = true }; - -/// Prepend \p DIExpr with a deref and offset operation and optionally turn it -/// into a stack value. -static DIExpression *prependDIExpr(DIBuilder &Builder, DIExpression *DIExpr, -                                   bool Deref, int64_t Offset = 0, -                                   bool StackValue = false) { -  if (!Deref && !Offset && !StackValue) -    return DIExpr; - -  SmallVector<uint64_t, 8> Ops; -  appendOffset(Ops, Offset); -  if (Deref) -    Ops.push_back(dwarf::DW_OP_deref); -  if (DIExpr) -    for (auto Op : DIExpr->expr_ops()) { -      // A DW_OP_stack_value comes at the end, but before a DW_OP_LLVM_fragment. -      if (StackValue) { -        if (Op.getOp() == dwarf::DW_OP_stack_value) -          StackValue = false; -        else if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) { -          Ops.push_back(dwarf::DW_OP_stack_value); -          StackValue = false; -        } -      } -      Ops.push_back(Op.getOp()); -      for (unsigned I = 0; I < Op.getNumArgs(); ++I) -        Ops.push_back(Op.getArg(I)); -    } -  if (StackValue) -    Ops.push_back(dwarf::DW_OP_stack_value); -  return Builder.createExpression(Ops); -}  bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,                               Instruction *InsertBefore, DIBuilder &Builder, @@ -1313,9 +1270,7 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,    auto *DIVar = DDI->getVariable();    auto *DIExpr = DDI->getExpression();    assert(DIVar && "Missing variable"); - -  DIExpr = prependDIExpr(Builder, DIExpr, Deref, Offset); - +  DIExpr = DIExpression::prepend(DIExpr, Deref, Offset);    // Insert llvm.dbg.declare immediately after the original alloca, and remove    // old llvm.dbg.declare.    Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore); @@ -1348,7 +1303,7 @@ static void replaceOneDbgValueForAlloca(DbgValueInst *DVI, Value *NewAddress,    if (Offset) {      SmallVector<uint64_t, 4> Ops;      Ops.push_back(dwarf::DW_OP_deref); -    appendOffset(Ops, Offset); +    DIExpression::appendOffset(Ops, Offset);      Ops.append(DIExpr->elements_begin() + 1, DIExpr->elements_end());      DIExpr = Builder.createExpression(Ops);    } @@ -1398,8 +1353,9 @@ void llvm::salvageDebugInfo(Instruction &I) {          auto *DIExpr = DVI->getExpression();          DIBuilder DIB(M, /*AllowUnresolved*/ false);          // GEP offsets are i32 and thus always fit into an int64_t. -        DIExpr = prependDIExpr(DIB, DIExpr, NoDeref, Offset.getSExtValue(), -                               WithStackValue); +        DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, +                                       Offset.getSExtValue(), +                                       DIExpression::WithStackValue);          DVI->setOperand(0, MDWrap(I.getOperand(0)));          DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));          DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); @@ -1411,7 +1367,7 @@ void llvm::salvageDebugInfo(Instruction &I) {        // Rewrite the load into DW_OP_deref.        auto *DIExpr = DVI->getExpression();        DIBuilder DIB(M, /*AllowUnresolved*/ false); -      DIExpr = prependDIExpr(DIB, DIExpr, WithDeref); +      DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref);        DVI->setOperand(0, MDWrap(I.getOperand(0)));        DVI->setOperand(3, MetadataAsValue::get(I.getContext(), DIExpr));        DEBUG(dbgs() << "SALVAGE:  " << *DVI << '\n'); @@ -1520,7 +1476,7 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,    II->setAttributes(CI->getAttributes());    // Make sure that anything using the call now uses the invoke!  This also -  // updates the CallGraph if present, because it uses a WeakVH. +  // updates the CallGraph if present, because it uses a WeakTrackingVH.    CI->replaceAllUsesWith(II);    // Delete the original call diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index e7ba19665d591..72c06aef80370 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -210,7 +210,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,    for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {      PHINode *PN = cast<PHINode>(I);      ++I; -    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { +    if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {        // This is a degenerate PHI already, don't modify it!        PN->replaceAllUsesWith(V);        PN->eraseFromParent(); @@ -628,7 +628,7 @@ ReprocessLoop:    PHINode *PN;    for (BasicBlock::iterator I = L->getHeader()->begin();         (PN = dyn_cast<PHINode>(I++)); ) -    if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) { +    if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {        if (SE) SE->forgetValue(PN);        if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {          PN->replaceAllUsesWith(V); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index 43ab725b07691..4ab4d7949d233 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -757,7 +757,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,    // Simplify any new induction variables in the partially unrolled loop.    if (SE && !CompletelyUnroll && Count > 1) { -    SmallVector<WeakVH, 16> DeadInsts; +    SmallVector<WeakTrackingVH, 16> DeadInsts;      simplifyLoopIVs(L, SE, DT, LI, DeadInsts);      // Aggressively clean up dead instructions that simplifyLoopIVs already @@ -777,7 +777,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,      for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) {        Instruction *Inst = &*I++; -      if (Value *V = SimplifyInstruction(Inst, DL)) +      if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC}))          if (LI->replacementPreservesLCSSAForm(Inst, V))            Inst->replaceAllUsesWith(V);        if (isInstructionTriviallyDead(Inst)) diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index dbe42c201dd4f..29d334f2968f1 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -237,3 +237,35 @@ void llvm::filterDeadComdatFunctions(             ComdatEntriesCovered.end();    });  } + +std::string llvm::getUniqueModuleId(Module *M) { +  MD5 Md5; +  bool ExportsSymbols = false; +  auto AddGlobal = [&](GlobalValue &GV) { +    if (GV.isDeclaration() || GV.getName().startswith("llvm.") || +        !GV.hasExternalLinkage()) +      return; +    ExportsSymbols = true; +    Md5.update(GV.getName()); +    Md5.update(ArrayRef<uint8_t>{0}); +  }; + +  for (auto &F : *M) +    AddGlobal(F); +  for (auto &GV : M->globals()) +    AddGlobal(GV); +  for (auto &GA : M->aliases()) +    AddGlobal(GA); +  for (auto &IF : M->ifuncs()) +    AddGlobal(IF); + +  if (!ExportsSymbols) +    return ""; + +  MD5::MD5Result R; +  Md5.final(R); + +  SmallString<32> Str; +  MD5::stringifyResult(R, Str); +  return ("$" + Str).str(); +} diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index a33b85c4ee69a..cdba982e6641f 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -225,10 +225,10 @@ struct PromoteMem2Reg {    std::vector<AllocaInst *> Allocas;    DominatorTree &DT;    DIBuilder DIB; -    /// A cache of @llvm.assume intrinsics used by SimplifyInstruction.    AssumptionCache *AC; +  const SimplifyQuery SQ;    /// Reverse mapping of Allocas.    DenseMap<AllocaInst *, unsigned> AllocaLookup; @@ -270,7 +270,8 @@ public:                   AssumptionCache *AC)        : Allocas(Allocas.begin(), Allocas.end()), DT(DT),          DIB(*DT.getRoot()->getParent()->getParent(), /*AllowUnresolved*/ false), -        AC(AC) {} +        AC(AC), SQ(DT.getRoot()->getParent()->getParent()->getDataLayout(), +                   nullptr, &DT, AC) {}    void run(); @@ -673,8 +674,6 @@ void PromoteMem2Reg::run() {      A->eraseFromParent();    } -  const DataLayout &DL = F.getParent()->getDataLayout(); -    // Remove alloca's dbg.declare instrinsics from the function.    for (unsigned i = 0, e = AllocaDbgDeclares.size(); i != e; ++i)      if (DbgDeclareInst *DDI = AllocaDbgDeclares[i]) @@ -699,7 +698,7 @@ void PromoteMem2Reg::run() {        PHINode *PN = I->second;        // If this PHI node merges one value and/or undefs, get the value. -      if (Value *V = SimplifyInstruction(PN, DL, nullptr, &DT, AC)) { +      if (Value *V = SimplifyInstruction(PN, SQ)) {          PN->replaceAllUsesWith(V);          PN->eraseFromParent();          NewPhiNodes.erase(I++); diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index f86e97b6cc72f..7a3e8b9ae915d 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -2231,7 +2231,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL,        }        // Check for trivial simplification. -      if (Value *V = SimplifyInstruction(N, DL)) { +      if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {          if (!BBI->use_empty())            TranslateMap[&*BBI] = V;          if (!N->mayHaveSideEffects()) { @@ -2307,7 +2307,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,    for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {      PHINode *PN = cast<PHINode>(II++); -    if (Value *V = SimplifyInstruction(PN, DL)) { +    if (Value *V = SimplifyInstruction(PN, {DL, PN})) {        PN->replaceAllUsesWith(V);        PN->eraseFromParent();        continue; @@ -3545,7 +3545,7 @@ static bool TryToSimplifyUncondBranchWithICmpInIt(      assert(VVal && "Should have a unique destination value");      ICI->setOperand(0, VVal); -    if (Value *V = SimplifyInstruction(ICI, DL)) { +    if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {        ICI->replaceAllUsesWith(V);        ICI->eraseFromParent();      } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index a4cc6a031ad4c..02a5d3dbeadfb 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -51,13 +51,13 @@ namespace {      ScalarEvolution  *SE;      DominatorTree    *DT; -    SmallVectorImpl<WeakVH> &DeadInsts; +    SmallVectorImpl<WeakTrackingVH> &DeadInsts;      bool Changed;    public:      SimplifyIndvar(Loop *Loop, ScalarEvolution *SE, DominatorTree *DT, -                   LoopInfo *LI,SmallVectorImpl<WeakVH> &Dead) +                   LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead)          : L(Loop), LI(LI), SE(SE), DT(DT), DeadInsts(Dead), Changed(false) {        assert(LI && "IV simplification requires LoopInfo");      } @@ -701,7 +701,7 @@ void IVVisitor::anchor() { }  /// Simplify instructions that use this induction variable  /// by using ScalarEvolution to analyze the IV's recurrence.  bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT, -                       LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead, +                       LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead,                         IVVisitor *V) {    SimplifyIndvar SIV(LI->getLoopFor(CurrIV->getParent()), SE, DT, LI, Dead);    SIV.simplifyUsers(CurrIV, V); @@ -711,7 +711,7 @@ bool simplifyUsersOfIV(PHINode *CurrIV, ScalarEvolution *SE, DominatorTree *DT,  /// Simplify users of induction variables within this  /// loop. This does not actually change or add IVs.  bool simplifyLoopIVs(Loop *L, ScalarEvolution *SE, DominatorTree *DT, -                     LoopInfo *LI, SmallVectorImpl<WeakVH> &Dead) { +                     LoopInfo *LI, SmallVectorImpl<WeakTrackingVH> &Dead) {    bool Changed = false;    for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {      Changed |= simplifyUsersOfIV(cast<PHINode>(I), SE, DT, LI, Dead); diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp index 27373427d4f7d..2509b5f22046b 100644 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ b/lib/Transforms/Utils/SimplifyInstructions.cpp @@ -54,8 +54,7 @@ static bool runImpl(Function &F, const SimplifyQuery &SQ,          // Don't waste time simplifying unused instructions.          if (!I->use_empty()) { -          if (Value *V = -                  SimplifyInstruction(I, SQ.getWithInstruction(I), ORE)) { +          if (Value *V = SimplifyInstruction(I, SQ, ORE)) {              // Mark all uses for resimplification next time round the loop.              for (User *U : I->users())                Next->insert(cast<Instruction>(U)); diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index 5549444047088..f112c555205c3 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -3899,11 +3899,13 @@ bool SLPVectorizerPass::runImpl(Function &F, ScalarEvolution *SE_,  }  /// \brief Check that the Values in the slice in VL array are still existent in -/// the WeakVH array. +/// the WeakTrackingVH array.  /// Vectorization of part of the VL array may cause later values in the VL array -/// to become invalid. We track when this has happened in the WeakVH array. -static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH, -                               unsigned SliceBegin, unsigned SliceSize) { +/// to become invalid. We track when this has happened in the WeakTrackingVH +/// array. +static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, +                               ArrayRef<WeakTrackingVH> VH, unsigned SliceBegin, +                               unsigned SliceSize) {    VL = VL.slice(SliceBegin, SliceSize);    VH = VH.slice(SliceBegin, SliceSize);    return !std::equal(VL.begin(), VL.end(), VH.begin()); @@ -3921,7 +3923,7 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,      return false;    // Keep track of values that were deleted by vectorizing in the loop below. -  SmallVector<WeakVH, 8> TrackValues(Chain.begin(), Chain.end()); +  SmallVector<WeakTrackingVH, 8> TrackValues(Chain.begin(), Chain.end());    bool Changed = false;    // Look for profitable vectorizable trees at all offsets, starting at zero. @@ -4107,7 +4109,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,    bool Changed = false;    // Keep track of values that were deleted by vectorizing in the loop below. -  SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end()); +  SmallVector<WeakTrackingVH, 8> TrackValues(VL.begin(), VL.end());    unsigned NextInst = 0, MaxInst = VL.size();    for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF; @@ -4734,7 +4736,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,  namespace {  /// Tracks instructons and its children. -class WeakVHWithLevel final : public CallbackVH { +class WeakTrackingVHWithLevel final : public CallbackVH {    /// Operand index of the instruction currently beeing analized.    unsigned Level = 0;    /// Is this the instruction that should be vectorized, or are we now @@ -4743,8 +4745,8 @@ class WeakVHWithLevel final : public CallbackVH {    bool IsInitial = true;  public: -  explicit WeakVHWithLevel() = default; -  WeakVHWithLevel(Value *V) : CallbackVH(V){}; +  explicit WeakTrackingVHWithLevel() = default; +  WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){};    /// Restart children analysis each time it is repaced by the new instruction.    void allUsesReplacedWith(Value *New) override {      setValPtr(New); @@ -4771,7 +4773,7 @@ public:             cast<Instruction>(getValPtr())->getNumOperands() > Level);      return cast<Instruction>(getValPtr())->getOperand(Level++);    } -  virtual ~WeakVHWithLevel() = default; +  virtual ~WeakTrackingVHWithLevel() = default;  };  } // namespace @@ -4793,7 +4795,7 @@ static bool canBeVectorized(    if (Root->getParent() != BB)      return false; -  SmallVector<WeakVHWithLevel, 8> Stack(1, Root); +  SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root);    SmallSet<Value *, 8> VisitedInstrs;    bool Res = false;    while (!Stack.empty()) { @@ -5069,7 +5071,8 @@ bool SLPVectorizerPass::vectorizeGEPIndices(BasicBlock *BB, BoUpSLP &R) {        SetVector<Value *> Candidates(GEPList.begin(), GEPList.end());        // Some of the candidates may have already been vectorized after we -      // initially collected them. If so, the WeakVHs will have nullified the +      // initially collected them. If so, the WeakTrackingVHs will have +      // nullified the        // values, so remove them from the set of candidates.        Candidates.remove(nullptr);  | 
