diff options
Diffstat (limited to 'lib/Transforms/Utils')
55 files changed, 4653 insertions, 1569 deletions
| diff --git a/lib/Transforms/Utils/AddDiscriminators.cpp b/lib/Transforms/Utils/AddDiscriminators.cpp index 0f0668f24db5..e3ef42362223 100644 --- a/lib/Transforms/Utils/AddDiscriminators.cpp +++ b/lib/Transforms/Utils/AddDiscriminators.cpp @@ -69,7 +69,7 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  #include <utility>  using namespace llvm; @@ -114,7 +114,7 @@ static bool shouldHaveDiscriminator(const Instruction *I) {    return !isa<IntrinsicInst>(I) || isa<MemIntrinsic>(I);  } -/// \brief Assign DWARF discriminators. +/// Assign DWARF discriminators.  ///  /// To assign discriminators, we examine the boundaries of every  /// basic block and its successors. Suppose there is a basic block B1 @@ -210,9 +210,9 @@ static bool addDiscriminators(Function &F) {        // it in 1 byte ULEB128 representation.        unsigned Discriminator = R.second ? ++LDM[L] : LDM[L];        I.setDebugLoc(DIL->setBaseDiscriminator(Discriminator)); -      DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" -                   << DIL->getColumn() << ":" << Discriminator << " " << I -                   << "\n"); +      LLVM_DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":" +                        << DIL->getColumn() << ":" << Discriminator << " " << I +                        << "\n");        Changed = true;      }    } diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 606bd8baccaa..516a785dce1e 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -20,6 +20,7 @@  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CFG.h"  #include "llvm/IR/Constants.h" @@ -36,7 +37,6 @@  #include "llvm/IR/Value.h"  #include "llvm/IR/ValueHandle.h"  #include "llvm/Support/Casting.h" -#include "llvm/Transforms/Utils/Local.h"  #include <cassert>  #include <cstdint>  #include <string> @@ -45,16 +45,22 @@  using namespace llvm; -void llvm::DeleteDeadBlock(BasicBlock *BB) { +void llvm::DeleteDeadBlock(BasicBlock *BB, DeferredDominance *DDT) {    assert((pred_begin(BB) == pred_end(BB) ||           // Can delete self loop.           BB->getSinglePredecessor() == BB) && "Block is not dead!");    TerminatorInst *BBTerm = BB->getTerminator(); +  std::vector<DominatorTree::UpdateType> Updates;    // Loop through all of our successors and make sure they know that one    // of their predecessors is going away. -  for (BasicBlock *Succ : BBTerm->successors()) +  if (DDT) +    Updates.reserve(BBTerm->getNumSuccessors()); +  for (BasicBlock *Succ : BBTerm->successors()) {      Succ->removePredecessor(BB); +    if (DDT) +      Updates.push_back({DominatorTree::Delete, BB, Succ}); +  }    // Zap all the instructions in the block.    while (!BB->empty()) { @@ -69,8 +75,12 @@ void llvm::DeleteDeadBlock(BasicBlock *BB) {      BB->getInstList().pop_back();    } -  // Zap the block! -  BB->eraseFromParent(); +  if (DDT) { +    DDT->applyUpdates(Updates); +    DDT->deleteBB(BB); // Deferred deletion of BB. +  } else { +    BB->eraseFromParent(); // Zap the block! +  }  }  void llvm::FoldSingleEntryPHINodes(BasicBlock *BB, @@ -94,9 +104,8 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {    // Recursively deleting a PHI may cause multiple PHIs to be deleted    // or RAUW'd undef, so use an array of WeakTrackingVH for the PHIs to delete.    SmallVector<WeakTrackingVH, 8> PHIs; -  for (BasicBlock::iterator I = BB->begin(); -       PHINode *PN = dyn_cast<PHINode>(I); ++I) -    PHIs.push_back(PN); +  for (PHINode &PN : BB->phis()) +    PHIs.push_back(&PN);    bool Changed = false;    for (unsigned i = 0, e = PHIs.size(); i != e; ++i) @@ -108,9 +117,12 @@ bool llvm::DeleteDeadPHIs(BasicBlock *BB, const TargetLibraryInfo *TLI) {  bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,                                       LoopInfo *LI, -                                     MemoryDependenceResults *MemDep) { -  // Don't merge away blocks who have their address taken. -  if (BB->hasAddressTaken()) return false; +                                     MemoryDependenceResults *MemDep, +                                     DeferredDominance *DDT) { +  assert(!(DT && DDT) && "Cannot call with both DT and DDT."); + +  if (BB->hasAddressTaken()) +    return false;    // Can't merge if there are multiple predecessors, or no predecessors.    BasicBlock *PredBB = BB->getUniquePredecessor(); @@ -122,39 +134,38 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,    if (PredBB->getTerminator()->isExceptional())      return false; -  succ_iterator SI(succ_begin(PredBB)), SE(succ_end(PredBB)); -  BasicBlock *OnlySucc = BB; -  for (; SI != SE; ++SI) -    if (*SI != OnlySucc) { -      OnlySucc = nullptr;     // There are multiple distinct successors! -      break; -    } - -  // Can't merge if there are multiple successors. -  if (!OnlySucc) return false; +  // Can't merge if there are multiple distinct successors. +  if (PredBB->getUniqueSuccessor() != BB) +    return false;    // Can't merge if there is PHI loop. -  for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE; ++BI) { -    if (PHINode *PN = dyn_cast<PHINode>(BI)) { -      for (Value *IncValue : PN->incoming_values()) -        if (IncValue == PN) -          return false; -    } else -      break; -  } +  for (PHINode &PN : BB->phis()) +    for (Value *IncValue : PN.incoming_values()) +      if (IncValue == &PN) +        return false;    // Begin by getting rid of unneeded PHIs. -  SmallVector<Value *, 4> IncomingValues; +  SmallVector<AssertingVH<Value>, 4> IncomingValues;    if (isa<PHINode>(BB->front())) { -    for (auto &I : *BB) -      if (PHINode *PN = dyn_cast<PHINode>(&I)) { -        if (PN->getIncomingValue(0) != PN) -          IncomingValues.push_back(PN->getIncomingValue(0)); -      } else -        break; +    for (PHINode &PN : BB->phis()) +      if (!isa<PHINode>(PN.getIncomingValue(0)) || +          cast<PHINode>(PN.getIncomingValue(0))->getParent() != BB) +        IncomingValues.push_back(PN.getIncomingValue(0));      FoldSingleEntryPHINodes(BB, MemDep);    } +  // Deferred DT update: Collect all the edges that exit BB. These +  // dominator edges will be redirected from Pred. +  std::vector<DominatorTree::UpdateType> Updates; +  if (DDT) { +    Updates.reserve(1 + (2 * succ_size(BB))); +    Updates.push_back({DominatorTree::Delete, PredBB, BB}); +    for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { +      Updates.push_back({DominatorTree::Delete, BB, *I}); +      Updates.push_back({DominatorTree::Insert, PredBB, *I}); +    } +  } +    // Delete the unconditional branch from the predecessor...    PredBB->getInstList().pop_back(); @@ -166,8 +177,8 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,    PredBB->getInstList().splice(PredBB->end(), BB->getInstList());    // Eliminate duplicate dbg.values describing the entry PHI node post-splice. -  for (auto *Incoming : IncomingValues) { -    if (isa<Instruction>(Incoming)) { +  for (auto Incoming : IncomingValues) { +    if (isa<Instruction>(*Incoming)) {        SmallVector<DbgValueInst *, 2> DbgValues;        SmallDenseSet<std::pair<DILocalVariable *, DIExpression *>, 2>            DbgValueSet; @@ -201,7 +212,12 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DominatorTree *DT,    if (MemDep)      MemDep->invalidateCachedPredecessors(); -  BB->eraseFromParent(); +  if (DDT) { +    DDT->deleteBB(BB); // Deferred deletion of BB. +    DDT->applyUpdates(Updates); +  } else { +    BB->eraseFromParent(); // Nuke BB. +  }    return true;  } @@ -317,13 +333,21 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,                                        DominatorTree *DT, LoopInfo *LI,                                        bool PreserveLCSSA, bool &HasLoopExit) {    // Update dominator tree if available. -  if (DT) -    DT->splitBlock(NewBB); +  if (DT) { +    if (OldBB == DT->getRootNode()->getBlock()) { +      assert(NewBB == &NewBB->getParent()->getEntryBlock()); +      DT->setNewRoot(NewBB); +    } else { +      // Split block expects NewBB to have a non-empty set of predecessors. +      DT->splitBlock(NewBB); +    } +  }    // The rest of the logic is only relevant for updating the loop structures.    if (!LI)      return; +  assert(DT && "DT should be available to update LoopInfo!");    Loop *L = LI->getLoopFor(OldBB);    // If we need to preserve loop analyses, collect some information about how @@ -331,6 +355,12 @@ static void UpdateAnalysisInformation(BasicBlock *OldBB, BasicBlock *NewBB,    bool IsLoopEntry = !!L;    bool SplitMakesNewLoopHeader = false;    for (BasicBlock *Pred : Preds) { +    // Preds that are not reachable from entry should not be used to identify if +    // OldBB is a loop entry or if SplitMakesNewLoopHeader. Unreachable blocks +    // are not within any loops, so we incorrectly mark SplitMakesNewLoopHeader +    // as true and make the NewBB the header of some loop. This breaks LI. +    if (!DT->isReachableFromEntry(Pred)) +      continue;      // If we need to preserve LCSSA, determine if any of the preds is a loop      // exit.      if (PreserveLCSSA) @@ -495,7 +525,6 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,      // Insert dummy values as the incoming value.      for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++I)        cast<PHINode>(I)->addIncoming(UndefValue::get(I->getType()), NewBB); -    return NewBB;    }    // Update DominatorTree, LoopInfo, and LCCSA analysis information. @@ -503,8 +532,11 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,    UpdateAnalysisInformation(BB, NewBB, Preds, DT, LI, PreserveLCSSA,                              HasLoopExit); -  // Update the PHI nodes in BB with the values coming from NewBB. -  UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); +  if (!Preds.empty()) { +    // Update the PHI nodes in BB with the values coming from NewBB. +    UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit); +  } +    return NewBB;  } diff --git a/lib/Transforms/Utils/BreakCriticalEdges.cpp b/lib/Transforms/Utils/BreakCriticalEdges.cpp index 3653c307619b..3e30c27a9f33 100644 --- a/lib/Transforms/Utils/BreakCriticalEdges.cpp +++ b/lib/Transforms/Utils/BreakCriticalEdges.cpp @@ -28,7 +28,7 @@  #include "llvm/IR/Instructions.h"  #include "llvm/IR/Type.h"  #include "llvm/Support/ErrorHandling.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Transforms/Utils/ValueMapper.h" @@ -106,10 +106,9 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,            SplitBB->isLandingPad()) && "SplitBB has non-PHI nodes!");    // For each PHI in the destination block. -  for (BasicBlock::iterator I = DestBB->begin(); -       PHINode *PN = dyn_cast<PHINode>(I); ++I) { -    unsigned Idx = PN->getBasicBlockIndex(SplitBB); -    Value *V = PN->getIncomingValue(Idx); +  for (PHINode &PN : DestBB->phis()) { +    unsigned Idx = PN.getBasicBlockIndex(SplitBB); +    Value *V = PN.getIncomingValue(Idx);      // If the input is a PHI which already satisfies LCSSA, don't create      // a new one. @@ -119,13 +118,13 @@ static void createPHIsForSplitLoopExit(ArrayRef<BasicBlock *> Preds,      // Otherwise a new PHI is needed. Create one and populate it.      PHINode *NewPN = PHINode::Create( -        PN->getType(), Preds.size(), "split", +        PN.getType(), Preds.size(), "split",          SplitBB->isLandingPad() ? &SplitBB->front() : SplitBB->getTerminator());      for (unsigned i = 0, e = Preds.size(); i != e; ++i)        NewPN->addIncoming(V, Preds[i]);      // Update the original PHI. -    PN->setIncomingValue(Idx, NewPN); +    PN.setIncomingValue(Idx, NewPN);    }  } diff --git a/lib/Transforms/Utils/BuildLibCalls.cpp b/lib/Transforms/Utils/BuildLibCalls.cpp index b60dfb4f3541..5f5c4150d3bb 100644 --- a/lib/Transforms/Utils/BuildLibCalls.cpp +++ b/lib/Transforms/Utils/BuildLibCalls.cpp @@ -105,12 +105,23 @@ static bool setRetNonNull(Function &F) {    return true;  } +static bool setNonLazyBind(Function &F) { +  if (F.hasFnAttribute(Attribute::NonLazyBind)) +    return false; +  F.addFnAttr(Attribute::NonLazyBind); +  return true; +} +  bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {    LibFunc TheLibFunc;    if (!(TLI.getLibFunc(F, TheLibFunc) && TLI.has(TheLibFunc)))      return false;    bool Changed = false; + +  if (F.getParent() != nullptr && F.getParent()->getRtLibUseGOT()) +    Changed |= setNonLazyBind(F); +    switch (TheLibFunc) {    case LibFunc_strlen:    case LibFunc_wcslen: @@ -375,6 +386,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {    case LibFunc_fseek:    case LibFunc_ftell:    case LibFunc_fgetc: +  case LibFunc_fgetc_unlocked:    case LibFunc_fseeko:    case LibFunc_ftello:    case LibFunc_fileno: @@ -393,6 +405,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {      Changed |= setOnlyReadsMemory(F);      return Changed;    case LibFunc_fputc: +  case LibFunc_fputc_unlocked:    case LibFunc_fstat:    case LibFunc_frexp:    case LibFunc_frexpf: @@ -402,21 +415,25 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {      Changed |= setDoesNotCapture(F, 1);      return Changed;    case LibFunc_fgets: +  case LibFunc_fgets_unlocked:      Changed |= setDoesNotThrow(F);      Changed |= setDoesNotCapture(F, 2);      return Changed;    case LibFunc_fread: +  case LibFunc_fread_unlocked:      Changed |= setDoesNotThrow(F);      Changed |= setDoesNotCapture(F, 0);      Changed |= setDoesNotCapture(F, 3);      return Changed;    case LibFunc_fwrite: +  case LibFunc_fwrite_unlocked:      Changed |= setDoesNotThrow(F);      Changed |= setDoesNotCapture(F, 0);      Changed |= setDoesNotCapture(F, 3);      // FIXME: readonly #1?      return Changed;    case LibFunc_fputs: +  case LibFunc_fputs_unlocked:      Changed |= setDoesNotThrow(F);      Changed |= setDoesNotCapture(F, 0);      Changed |= setDoesNotCapture(F, 1); @@ -447,6 +464,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {      return Changed;    case LibFunc_gets:    case LibFunc_getchar: +  case LibFunc_getchar_unlocked:      Changed |= setDoesNotThrow(F);      return Changed;    case LibFunc_getitimer: @@ -485,6 +503,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {      Changed |= setOnlyReadsMemory(F, 1);      return Changed;    case LibFunc_putc: +  case LibFunc_putc_unlocked:      Changed |= setDoesNotThrow(F);      Changed |= setDoesNotCapture(F, 1);      return Changed; @@ -505,6 +524,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {      Changed |= setOnlyReadsMemory(F, 1);      return Changed;    case LibFunc_putchar: +  case LibFunc_putchar_unlocked:      Changed |= setDoesNotThrow(F);      return Changed;    case LibFunc_popen: @@ -687,9 +707,9 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {      Changed |= setRetNonNull(F);      Changed |= setRetDoesNotAlias(F);      return Changed; -  //TODO: add LibFunc entries for: -  //case LibFunc_memset_pattern4: -  //case LibFunc_memset_pattern8: +  // TODO: add LibFunc entries for: +  // case LibFunc_memset_pattern4: +  // case LibFunc_memset_pattern8:    case LibFunc_memset_pattern16:      Changed |= setOnlyAccessesArgMemory(F);      Changed |= setDoesNotCapture(F, 0); @@ -709,6 +729,19 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {    }  } +bool llvm::hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, +                           LibFunc DoubleFn, LibFunc FloatFn, +                           LibFunc LongDoubleFn) { +  switch (Ty->getTypeID()) { +  case Type::FloatTyID: +    return TLI->has(FloatFn); +  case Type::DoubleTyID: +    return TLI->has(DoubleFn); +  default: +    return TLI->has(LongDoubleFn); +  } +} +  //- Emit LibCalls ------------------------------------------------------------//  Value *llvm::castToCStr(Value *V, IRBuilder<> &B) { @@ -973,6 +1006,24 @@ Value *llvm::emitFPutC(Value *Char, Value *File, IRBuilder<> &B,    return CI;  } +Value *llvm::emitFPutCUnlocked(Value *Char, Value *File, IRBuilder<> &B, +                               const TargetLibraryInfo *TLI) { +  if (!TLI->has(LibFunc_fputc_unlocked)) +    return nullptr; + +  Module *M = B.GetInsertBlock()->getModule(); +  Constant *F = M->getOrInsertFunction("fputc_unlocked", B.getInt32Ty(), +                                       B.getInt32Ty(), File->getType()); +  if (File->getType()->isPointerTy()) +    inferLibFuncAttributes(*M->getFunction("fputc_unlocked"), *TLI); +  Char = B.CreateIntCast(Char, B.getInt32Ty(), /*isSigned*/ true, "chari"); +  CallInst *CI = B.CreateCall(F, {Char, File}, "fputc_unlocked"); + +  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) +    CI->setCallingConv(Fn->getCallingConv()); +  return CI; +} +  Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,                         const TargetLibraryInfo *TLI) {    if (!TLI->has(LibFunc_fputs)) @@ -991,6 +1042,24 @@ Value *llvm::emitFPutS(Value *Str, Value *File, IRBuilder<> &B,    return CI;  } +Value *llvm::emitFPutSUnlocked(Value *Str, Value *File, IRBuilder<> &B, +                               const TargetLibraryInfo *TLI) { +  if (!TLI->has(LibFunc_fputs_unlocked)) +    return nullptr; + +  Module *M = B.GetInsertBlock()->getModule(); +  StringRef FPutsUnlockedName = TLI->getName(LibFunc_fputs_unlocked); +  Constant *F = M->getOrInsertFunction(FPutsUnlockedName, B.getInt32Ty(), +                                       B.getInt8PtrTy(), File->getType()); +  if (File->getType()->isPointerTy()) +    inferLibFuncAttributes(*M->getFunction(FPutsUnlockedName), *TLI); +  CallInst *CI = B.CreateCall(F, {castToCStr(Str, B), File}, "fputs_unlocked"); + +  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) +    CI->setCallingConv(Fn->getCallingConv()); +  return CI; +} +  Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,                          const DataLayout &DL, const TargetLibraryInfo *TLI) {    if (!TLI->has(LibFunc_fwrite)) @@ -1013,3 +1082,119 @@ Value *llvm::emitFWrite(Value *Ptr, Value *Size, Value *File, IRBuilder<> &B,      CI->setCallingConv(Fn->getCallingConv());    return CI;  } + +Value *llvm::emitMalloc(Value *Num, IRBuilder<> &B, const DataLayout &DL, +                        const TargetLibraryInfo *TLI) { +  if (!TLI->has(LibFunc_malloc)) +    return nullptr; + +  Module *M = B.GetInsertBlock()->getModule(); +  LLVMContext &Context = B.GetInsertBlock()->getContext(); +  Value *Malloc = M->getOrInsertFunction("malloc", B.getInt8PtrTy(), +                                         DL.getIntPtrType(Context)); +  inferLibFuncAttributes(*M->getFunction("malloc"), *TLI); +  CallInst *CI = B.CreateCall(Malloc, Num, "malloc"); + +  if (const Function *F = dyn_cast<Function>(Malloc->stripPointerCasts())) +    CI->setCallingConv(F->getCallingConv()); + +  return CI; +} + +Value *llvm::emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs, +                        IRBuilder<> &B, const TargetLibraryInfo &TLI) { +  if (!TLI.has(LibFunc_calloc)) +    return nullptr; + +  Module *M = B.GetInsertBlock()->getModule(); +  const DataLayout &DL = M->getDataLayout(); +  IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); +  Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(), +                                         PtrType, PtrType); +  inferLibFuncAttributes(*M->getFunction("calloc"), TLI); +  CallInst *CI = B.CreateCall(Calloc, {Num, Size}, "calloc"); + +  if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts())) +    CI->setCallingConv(F->getCallingConv()); + +  return CI; +} + +Value *llvm::emitFWriteUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, +                                IRBuilder<> &B, const DataLayout &DL, +                                const TargetLibraryInfo *TLI) { +  if (!TLI->has(LibFunc_fwrite_unlocked)) +    return nullptr; + +  Module *M = B.GetInsertBlock()->getModule(); +  LLVMContext &Context = B.GetInsertBlock()->getContext(); +  StringRef FWriteUnlockedName = TLI->getName(LibFunc_fwrite_unlocked); +  Constant *F = M->getOrInsertFunction( +      FWriteUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(), +      DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); + +  if (File->getType()->isPointerTy()) +    inferLibFuncAttributes(*M->getFunction(FWriteUnlockedName), *TLI); +  CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File}); + +  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) +    CI->setCallingConv(Fn->getCallingConv()); +  return CI; +} + +Value *llvm::emitFGetCUnlocked(Value *File, IRBuilder<> &B, +                               const TargetLibraryInfo *TLI) { +  if (!TLI->has(LibFunc_fgetc_unlocked)) +    return nullptr; + +  Module *M = B.GetInsertBlock()->getModule(); +  Constant *F = +      M->getOrInsertFunction("fgetc_unlocked", B.getInt32Ty(), File->getType()); +  if (File->getType()->isPointerTy()) +    inferLibFuncAttributes(*M->getFunction("fgetc_unlocked"), *TLI); +  CallInst *CI = B.CreateCall(F, File, "fgetc_unlocked"); + +  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) +    CI->setCallingConv(Fn->getCallingConv()); +  return CI; +} + +Value *llvm::emitFGetSUnlocked(Value *Str, Value *Size, Value *File, +                               IRBuilder<> &B, const TargetLibraryInfo *TLI) { +  if (!TLI->has(LibFunc_fgets_unlocked)) +    return nullptr; + +  Module *M = B.GetInsertBlock()->getModule(); +  Constant *F = +      M->getOrInsertFunction("fgets_unlocked", B.getInt8PtrTy(), +                             B.getInt8PtrTy(), B.getInt32Ty(), File->getType()); +  inferLibFuncAttributes(*M->getFunction("fgets_unlocked"), *TLI); +  CallInst *CI = +      B.CreateCall(F, {castToCStr(Str, B), Size, File}, "fgets_unlocked"); + +  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) +    CI->setCallingConv(Fn->getCallingConv()); +  return CI; +} + +Value *llvm::emitFReadUnlocked(Value *Ptr, Value *Size, Value *N, Value *File, +                               IRBuilder<> &B, const DataLayout &DL, +                               const TargetLibraryInfo *TLI) { +  if (!TLI->has(LibFunc_fread_unlocked)) +    return nullptr; + +  Module *M = B.GetInsertBlock()->getModule(); +  LLVMContext &Context = B.GetInsertBlock()->getContext(); +  StringRef FReadUnlockedName = TLI->getName(LibFunc_fread_unlocked); +  Constant *F = M->getOrInsertFunction( +      FReadUnlockedName, DL.getIntPtrType(Context), B.getInt8PtrTy(), +      DL.getIntPtrType(Context), DL.getIntPtrType(Context), File->getType()); + +  if (File->getType()->isPointerTy()) +    inferLibFuncAttributes(*M->getFunction(FReadUnlockedName), *TLI); +  CallInst *CI = B.CreateCall(F, {castToCStr(Ptr, B), Size, N, File}); + +  if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts())) +    CI->setCallingConv(Fn->getCallingConv()); +  return CI; +} diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index f711b192f604..05512a6dff3e 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -21,6 +21,7 @@  #include "llvm/ADT/Optional.h"  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/Constants.h" @@ -34,7 +35,6 @@  #include "llvm/IR/Value.h"  #include "llvm/Support/Casting.h"  #include "llvm/Support/KnownBits.h" -#include "llvm/Transforms/Utils/Local.h"  #include <cassert>  #include <cstdint> @@ -173,7 +173,7 @@ Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {    return isDivisionOp() ? Value.Quotient : Value.Remainder;  } -/// \brief Check if a value looks like a hash. +/// Check if a value looks like a hash.  ///  /// The routine is expected to detect values computed using the most common hash  /// algorithms. Typically, hash computations end with one of the following diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 972e47f9270a..c87b74f739f4 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -25,8 +25,10 @@ add_llvm_library(LLVMTransformUtils    LCSSA.cpp    LibCallsShrinkWrap.cpp    Local.cpp +  LoopRotationUtils.cpp    LoopSimplify.cpp    LoopUnroll.cpp +  LoopUnrollAndJam.cpp    LoopUnrollPeel.cpp    LoopUnrollRuntime.cpp    LoopUtils.cpp @@ -43,10 +45,10 @@ add_llvm_library(LLVMTransformUtils    PromoteMemoryToRegister.cpp    StripGCRelocates.cpp    SSAUpdater.cpp +  SSAUpdaterBulk.cpp    SanitizerStats.cpp    SimplifyCFG.cpp    SimplifyIndVar.cpp -  SimplifyInstructions.cpp    SimplifyLibCalls.cpp    SplitModule.cpp    StripNonLineTableDebugInfo.cpp diff --git a/lib/Transforms/Utils/CallPromotionUtils.cpp b/lib/Transforms/Utils/CallPromotionUtils.cpp index 8825f77555e7..4d9c22e57a68 100644 --- a/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -47,14 +47,11 @@ using namespace llvm;  ///  static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,                                        BasicBlock *MergeBlock) { -  for (auto &I : *Invoke->getNormalDest()) { -    auto *Phi = dyn_cast<PHINode>(&I); -    if (!Phi) -      break; -    int Idx = Phi->getBasicBlockIndex(OrigBlock); +  for (PHINode &Phi : Invoke->getNormalDest()->phis()) { +    int Idx = Phi.getBasicBlockIndex(OrigBlock);      if (Idx == -1)        continue; -    Phi->setIncomingBlock(Idx, MergeBlock); +    Phi.setIncomingBlock(Idx, MergeBlock);    }  } @@ -82,16 +79,13 @@ static void fixupPHINodeForNormalDest(InvokeInst *Invoke, BasicBlock *OrigBlock,  static void fixupPHINodeForUnwindDest(InvokeInst *Invoke, BasicBlock *OrigBlock,                                        BasicBlock *ThenBlock,                                        BasicBlock *ElseBlock) { -  for (auto &I : *Invoke->getUnwindDest()) { -    auto *Phi = dyn_cast<PHINode>(&I); -    if (!Phi) -      break; -    int Idx = Phi->getBasicBlockIndex(OrigBlock); +  for (PHINode &Phi : Invoke->getUnwindDest()->phis()) { +    int Idx = Phi.getBasicBlockIndex(OrigBlock);      if (Idx == -1)        continue; -    auto *V = Phi->getIncomingValue(Idx); -    Phi->setIncomingBlock(Idx, ThenBlock); -    Phi->addIncoming(V, ElseBlock); +    auto *V = Phi.getIncomingValue(Idx); +    Phi.setIncomingBlock(Idx, ThenBlock); +    Phi.addIncoming(V, ElseBlock);    }  } @@ -395,12 +389,14 @@ Instruction *llvm::promoteCall(CallSite CS, Function *Callee,    // Inspect the arguments of the call site. If an argument's type doesn't    // match the corresponding formal argument's type in the callee, bitcast it    // to the correct type. -  for (Use &U : CS.args()) { -    unsigned ArgNo = CS.getArgumentNo(&U); -    Type *FormalTy = Callee->getFunctionType()->getParamType(ArgNo); -    Type *ActualTy = U.get()->getType(); +  auto CalleeType = Callee->getFunctionType(); +  auto CalleeParamNum = CalleeType->getNumParams(); +  for (unsigned ArgNo = 0; ArgNo < CalleeParamNum; ++ArgNo) { +    auto *Arg = CS.getArgument(ArgNo);  +    Type *FormalTy = CalleeType->getParamType(ArgNo); +    Type *ActualTy = Arg->getType();      if (FormalTy != ActualTy) { -      auto *Cast = CastInst::Create(Instruction::BitCast, U.get(), FormalTy, "", +      auto *Cast = CastInst::Create(Instruction::BitCast, Arg, FormalTy, "",                                      CS.getInstruction());        CS.setArgument(ArgNo, Cast);      } diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 3b19ba1b50f2..61448e9acb57 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -18,6 +18,7 @@  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/IR/CFG.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DebugInfo.h" @@ -31,7 +32,6 @@  #include "llvm/IR/Module.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/ValueMapper.h"  #include <map>  using namespace llvm; @@ -43,44 +43,36 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,                                    DebugInfoFinder *DIFinder) {    DenseMap<const MDNode *, MDNode *> Cache;    BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F); -  if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); +  if (BB->hasName()) +    NewBB->setName(BB->getName() + NameSuffix);    bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false;    Module *TheModule = F ? F->getParent() : nullptr;    // Loop over all instructions, and copy them over. -  for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end(); -       II != IE; ++II) { - -    if (DIFinder && TheModule) { -      if (auto *DDI = dyn_cast<DbgDeclareInst>(II)) -        DIFinder->processDeclare(*TheModule, DDI); -      else if (auto *DVI = dyn_cast<DbgValueInst>(II)) -        DIFinder->processValue(*TheModule, DVI); +  for (const Instruction &I : *BB) { +    if (DIFinder && TheModule) +      DIFinder->processInstruction(*TheModule, I); -      if (auto DbgLoc = II->getDebugLoc()) -        DIFinder->processLocation(*TheModule, DbgLoc.get()); -    } - -    Instruction *NewInst = II->clone(); -    if (II->hasName()) -      NewInst->setName(II->getName()+NameSuffix); +    Instruction *NewInst = I.clone(); +    if (I.hasName()) +      NewInst->setName(I.getName() + NameSuffix);      NewBB->getInstList().push_back(NewInst); -    VMap[&*II] = NewInst; // Add instruction map to value. +    VMap[&I] = NewInst; // Add instruction map to value. -    hasCalls |= (isa<CallInst>(II) && !isa<DbgInfoIntrinsic>(II)); -    if (const AllocaInst *AI = dyn_cast<AllocaInst>(II)) { +    hasCalls |= (isa<CallInst>(I) && !isa<DbgInfoIntrinsic>(I)); +    if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {        if (isa<ConstantInt>(AI->getArraySize()))          hasStaticAllocas = true;        else          hasDynamicAllocas = true;      }    } -   +    if (CodeInfo) {      CodeInfo->ContainsCalls          |= hasCalls;      CodeInfo->ContainsDynamicAllocas |= hasDynamicAllocas; -    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&  +    CodeInfo->ContainsDynamicAllocas |= hasStaticAllocas &&                                          BB != &BB->getParent()->getEntryBlock();    }    return NewBB; @@ -175,7 +167,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,      // Create a new basic block and copy instructions into it!      BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo, -                                      SP ? &DIFinder : nullptr); +                                      ModuleLevelChanges ? &DIFinder : nullptr);      // Add basic block mapping.      VMap[&BB] = CBB; @@ -197,15 +189,15 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,        Returns.push_back(RI);    } -  for (DISubprogram *ISP : DIFinder.subprograms()) { -    if (ISP != SP) { +  for (DISubprogram *ISP : DIFinder.subprograms()) +    if (ISP != SP)        VMap.MD()[ISP].reset(ISP); -    } -  } -  for (auto *Type : DIFinder.types()) { +  for (DICompileUnit *CU : DIFinder.compile_units()) +    VMap.MD()[CU].reset(CU); + +  for (DIType *Type : DIFinder.types())      VMap.MD()[Type].reset(Type); -  }    // Loop over all of the instructions in the function, fixing up operand    // references as we go.  This uses VMap to do all the hard work. @@ -283,7 +275,7 @@ namespace {      /// The specified block is found to be reachable, clone it and      /// anything that it can reach. -    void CloneBlock(const BasicBlock *BB,  +    void CloneBlock(const BasicBlock *BB,                      BasicBlock::const_iterator StartingInst,                      std::vector<const BasicBlock*> &ToClone);    }; @@ -493,17 +485,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,      // Handle PHI nodes specially, as we have to remove references to dead      // blocks. -    for (BasicBlock::const_iterator I = BI.begin(), E = BI.end(); I != E; ++I) { +    for (const PHINode &PN : BI.phis()) {        // PHI nodes may have been remapped to non-PHI nodes by the caller or        // during the cloning process. -      if (const PHINode *PN = dyn_cast<PHINode>(I)) { -        if (isa<PHINode>(VMap[PN])) -          PHIToResolve.push_back(PN); -        else -          break; -      } else { +      if (isa<PHINode>(VMap[&PN])) +        PHIToResolve.push_back(&PN); +      else          break; -      }      }      // Finally, remap the terminator instructions, as those can't be remapped @@ -550,7 +538,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,      // phi nodes will have invalid entries.  Update the PHI nodes in this      // case.      PHINode *PN = cast<PHINode>(NewBB->begin()); -    NumPreds = std::distance(pred_begin(NewBB), pred_end(NewBB)); +    NumPreds = pred_size(NewBB);      if (NumPreds != PN->getNumIncomingValues()) {        assert(NumPreds < PN->getNumIncomingValues());        // Count how many times each predecessor comes to this block. @@ -722,7 +710,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,                              ModuleLevelChanges, Returns, NameSuffix, CodeInfo);  } -/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap. +/// Remaps instructions in \p Blocks using the mapping in \p VMap.  void llvm::remapInstructionsInBlocks(      const SmallVectorImpl<BasicBlock *> &Blocks, ValueToValueMapTy &VMap) {    // Rewrite the code to refer to itself. @@ -732,7 +720,7 @@ void llvm::remapInstructionsInBlocks(                         RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);  } -/// \brief Clones a loop \p OrigLoop.  Returns the loop and the blocks in \p +/// Clones a loop \p OrigLoop.  Returns the loop and the blocks in \p  /// Blocks.  ///  /// Updates LoopInfo and DominatorTree assuming the loop is dominated by block @@ -796,12 +784,13 @@ Loop *llvm::cloneLoopWithPreheader(BasicBlock *Before, BasicBlock *LoopDomBB,    return NewLoop;  } -/// \brief Duplicate non-Phi instructions from the beginning of block up to +/// Duplicate non-Phi instructions from the beginning of block up to  /// StopAt instruction into a split block between BB and its predecessor.  BasicBlock *  llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,                                            Instruction *StopAt, -                                          ValueToValueMapTy &ValueMapping) { +                                          ValueToValueMapTy &ValueMapping, +                                          DominatorTree *DT) {    // We are going to have to map operands from the original BB block to the new    // copy of the block 'NewBB'.  If there are PHI nodes in BB, evaluate them to    // account for entry from PredBB. @@ -809,13 +798,15 @@ llvm::DuplicateInstructionsInSplitBetween(BasicBlock *BB, BasicBlock *PredBB,    for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)      ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); -  BasicBlock *NewBB = SplitEdge(PredBB, BB); +  BasicBlock *NewBB = SplitEdge(PredBB, BB, DT);    NewBB->setName(PredBB->getName() + ".split");    Instruction *NewTerm = NewBB->getTerminator();    // Clone the non-phi instructions of BB into NewBB, keeping track of the    // mapping and using it to remap operands in the cloned instructions. -  for (; StopAt != &*BI; ++BI) { +  // Stop once we see the terminator too. This covers the case where BB's +  // terminator gets replaced and StopAt == BB's terminator. +  for (; StopAt != &*BI && BB->getTerminator() != &*BI; ++BI) {      Instruction *New = BI->clone();      New->setName(BI->getName());      New->insertBefore(NewTerm); diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index 8fee10854229..35c7511a24b9 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -32,33 +32,34 @@ static void copyComdat(GlobalObject *Dst, const GlobalObject *Src) {  /// copies of global variables and functions, and making their (initializers and  /// references, respectively) refer to the right globals.  /// -std::unique_ptr<Module> llvm::CloneModule(const Module *M) { +std::unique_ptr<Module> llvm::CloneModule(const Module &M) {    // Create the value map that maps things from the old module over to the new    // module.    ValueToValueMapTy VMap;    return CloneModule(M, VMap);  } -std::unique_ptr<Module> llvm::CloneModule(const Module *M, +std::unique_ptr<Module> llvm::CloneModule(const Module &M,                                            ValueToValueMapTy &VMap) {    return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; });  }  std::unique_ptr<Module> llvm::CloneModule( -    const Module *M, ValueToValueMapTy &VMap, +    const Module &M, ValueToValueMapTy &VMap,      function_ref<bool(const GlobalValue *)> ShouldCloneDefinition) {    // First off, we need to create the new module.    std::unique_ptr<Module> New = -      llvm::make_unique<Module>(M->getModuleIdentifier(), M->getContext()); -  New->setDataLayout(M->getDataLayout()); -  New->setTargetTriple(M->getTargetTriple()); -  New->setModuleInlineAsm(M->getModuleInlineAsm()); -    +      llvm::make_unique<Module>(M.getModuleIdentifier(), M.getContext()); +  New->setSourceFileName(M.getSourceFileName()); +  New->setDataLayout(M.getDataLayout()); +  New->setTargetTriple(M.getTargetTriple()); +  New->setModuleInlineAsm(M.getModuleInlineAsm()); +    // Loop over all of the global variables, making corresponding globals in the    // new module.  Here we add them to the VMap and to the new Module.  We    // don't worry about attributes or initializers, they will come later.    // -  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); +  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();         I != E; ++I) {      GlobalVariable *GV = new GlobalVariable(*New,                                               I->getValueType(), @@ -72,7 +73,7 @@ std::unique_ptr<Module> llvm::CloneModule(    }    // Loop over the functions in the module, making external functions as before -  for (const Function &I : *M) { +  for (const Function &I : M) {      Function *NF = Function::Create(cast<FunctionType>(I.getValueType()),                                      I.getLinkage(), I.getName(), New.get());      NF->copyAttributesFrom(&I); @@ -80,7 +81,7 @@ std::unique_ptr<Module> llvm::CloneModule(    }    // Loop over the aliases in the module -  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); +  for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();         I != E; ++I) {      if (!ShouldCloneDefinition(&*I)) {        // An alias cannot act as an external reference, so we need to create @@ -114,7 +115,7 @@ std::unique_ptr<Module> llvm::CloneModule(    // have been created, loop through and copy the global variable referrers    // over...  We also set the attributes on the global now.    // -  for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); +  for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();         I != E; ++I) {      if (I->isDeclaration())        continue; @@ -139,7 +140,7 @@ std::unique_ptr<Module> llvm::CloneModule(    // Similarly, copy over function bodies now...    // -  for (const Function &I : *M) { +  for (const Function &I : M) {      if (I.isDeclaration())        continue; @@ -169,7 +170,7 @@ std::unique_ptr<Module> llvm::CloneModule(    }    // And aliases -  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); +  for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end();         I != E; ++I) {      // We already dealt with undefined aliases above.      if (!ShouldCloneDefinition(&*I)) @@ -180,8 +181,9 @@ std::unique_ptr<Module> llvm::CloneModule(    }    // And named metadata.... -  for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), -         E = M->named_metadata_end(); I != E; ++I) { +  for (Module::const_named_metadata_iterator I = M.named_metadata_begin(), +                                             E = M.named_metadata_end(); +       I != E; ++I) {      const NamedMDNode &NMD = *I;      NamedMDNode *NewNMD = New->getOrInsertNamedMetadata(NMD.getName());      for (unsigned i = 0, e = NMD.getNumOperands(); i != e; ++i) @@ -194,7 +196,7 @@ std::unique_ptr<Module> llvm::CloneModule(  extern "C" {  LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) { -  return wrap(CloneModule(unwrap(M)).release()); +  return wrap(CloneModule(*unwrap(M)).release());  }  } diff --git a/lib/Transforms/Utils/CodeExtractor.cpp b/lib/Transforms/Utils/CodeExtractor.cpp index 7a404241cb14..f31dab9f96af 100644 --- a/lib/Transforms/Utils/CodeExtractor.cpp +++ b/lib/Transforms/Utils/CodeExtractor.cpp @@ -66,6 +66,7 @@  #include <vector>  using namespace llvm; +using ProfileCount = Function::ProfileCount;  #define DEBUG_TYPE "code-extractor" @@ -77,12 +78,10 @@ static cl::opt<bool>  AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,                   cl::desc("Aggregate arguments to code-extracted functions")); -/// \brief Test whether a block is valid for extraction. -bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB, -                                              bool AllowVarArgs) { -  // Landing pads must be in the function where they were inserted for cleanup. -  if (BB.isEHPad()) -    return false; +/// Test whether a block is valid for extraction. +static bool isBlockValidForExtraction(const BasicBlock &BB, +                                      const SetVector<BasicBlock *> &Result, +                                      bool AllowVarArgs, bool AllowAlloca) {    // taking the address of a basic block moved to another function is illegal    if (BB.hasAddressTaken())      return false; @@ -111,11 +110,63 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB,      }    } -  // Don't hoist code containing allocas or invokes. If explicitly requested, -  // allow vastart. +  // If explicitly requested, allow vastart and alloca. For invoke instructions +  // verify that extraction is valid.    for (BasicBlock::const_iterator I = BB.begin(), E = BB.end(); I != E; ++I) { -    if (isa<AllocaInst>(I) || isa<InvokeInst>(I)) -      return false; +    if (isa<AllocaInst>(I)) { +       if (!AllowAlloca) +         return false; +       continue; +    } + +    if (const auto *II = dyn_cast<InvokeInst>(I)) { +      // Unwind destination (either a landingpad, catchswitch, or cleanuppad) +      // must be a part of the subgraph which is being extracted. +      if (auto *UBB = II->getUnwindDest()) +        if (!Result.count(UBB)) +          return false; +      continue; +    } + +    // All catch handlers of a catchswitch instruction as well as the unwind +    // destination must be in the subgraph. +    if (const auto *CSI = dyn_cast<CatchSwitchInst>(I)) { +      if (auto *UBB = CSI->getUnwindDest()) +        if (!Result.count(UBB)) +          return false; +      for (auto *HBB : CSI->handlers()) +        if (!Result.count(const_cast<BasicBlock*>(HBB))) +          return false; +      continue; +    } + +    // Make sure that entire catch handler is within subgraph. It is sufficient +    // to check that catch return's block is in the list. +    if (const auto *CPI = dyn_cast<CatchPadInst>(I)) { +      for (const auto *U : CPI->users()) +        if (const auto *CRI = dyn_cast<CatchReturnInst>(U)) +          if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) +            return false; +      continue; +    } + +    // And do similar checks for cleanup handler - the entire handler must be +    // in subgraph which is going to be extracted. For cleanup return should +    // additionally check that the unwind destination is also in the subgraph. +    if (const auto *CPI = dyn_cast<CleanupPadInst>(I)) { +      for (const auto *U : CPI->users()) +        if (const auto *CRI = dyn_cast<CleanupReturnInst>(U)) +          if (!Result.count(const_cast<BasicBlock*>(CRI->getParent()))) +            return false; +      continue; +    } +    if (const auto *CRI = dyn_cast<CleanupReturnInst>(I)) { +      if (auto *UBB = CRI->getUnwindDest()) +        if (!Result.count(UBB)) +          return false; +      continue; +    } +      if (const CallInst *CI = dyn_cast<CallInst>(I))        if (const Function *F = CI->getCalledFunction())          if (F->getIntrinsicID() == Intrinsic::vastart) { @@ -129,10 +180,10 @@ bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB,    return true;  } -/// \brief Build a set of blocks to extract if the input blocks are viable. +/// Build a set of blocks to extract if the input blocks are viable.  static SetVector<BasicBlock *>  buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT, -                        bool AllowVarArgs) { +                        bool AllowVarArgs, bool AllowAlloca) {    assert(!BBs.empty() && "The set of blocks to extract must be non-empty");    SetVector<BasicBlock *> Result; @@ -145,32 +196,42 @@ buildExtractionBlockSet(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,      if (!Result.insert(BB))        llvm_unreachable("Repeated basic blocks in extraction input"); -    if (!CodeExtractor::isBlockValidForExtraction(*BB, AllowVarArgs)) { -      Result.clear(); -      return Result; -    }    } -#ifndef NDEBUG -  for (SetVector<BasicBlock *>::iterator I = std::next(Result.begin()), -                                         E = Result.end(); -       I != E; ++I) -    for (pred_iterator PI = pred_begin(*I), PE = pred_end(*I); -         PI != PE; ++PI) -      assert(Result.count(*PI) && -             "No blocks in this region may have entries from outside the region" -             " except for the first block!"); -#endif +  for (auto *BB : Result) { +    if (!isBlockValidForExtraction(*BB, Result, AllowVarArgs, AllowAlloca)) +      return {}; + +    // Make sure that the first block is not a landing pad. +    if (BB == Result.front()) { +      if (BB->isEHPad()) { +        LLVM_DEBUG(dbgs() << "The first block cannot be an unwind block\n"); +        return {}; +      } +      continue; +    } + +    // All blocks other than the first must not have predecessors outside of +    // the subgraph which is being extracted. +    for (auto *PBB : predecessors(BB)) +      if (!Result.count(PBB)) { +        LLVM_DEBUG( +            dbgs() << "No blocks in this region may have entries from " +                      "outside the region except for the first block!\n"); +        return {}; +      } +  }    return Result;  }  CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,                               bool AggregateArgs, BlockFrequencyInfo *BFI, -                             BranchProbabilityInfo *BPI, bool AllowVarArgs) +                             BranchProbabilityInfo *BPI, bool AllowVarArgs, +                             bool AllowAlloca)      : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),        BPI(BPI), AllowVarArgs(AllowVarArgs), -      Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs)) {} +      Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)) {}  CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,                               BlockFrequencyInfo *BFI, @@ -178,7 +239,8 @@ CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,      : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),        BPI(BPI), AllowVarArgs(false),        Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, -                                     /* AllowVarArgs */ false)) {} +                                     /* AllowVarArgs */ false, +                                     /* AllowAlloca */ false)) {}  /// definedInRegion - Return true if the specified value is defined in the  /// extracted region. @@ -562,8 +624,8 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,                                             BasicBlock *newHeader,                                             Function *oldFunction,                                             Module *M) { -  DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); -  DEBUG(dbgs() << "outputs: " << outputs.size() << "\n"); +  LLVM_DEBUG(dbgs() << "inputs: " << inputs.size() << "\n"); +  LLVM_DEBUG(dbgs() << "outputs: " << outputs.size() << "\n");    // This function returns unsigned, outputs will go back by reference.    switch (NumExitBlocks) { @@ -577,20 +639,20 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,    // Add the types of the input values to the function's argument list    for (Value *value : inputs) { -    DEBUG(dbgs() << "value used in func: " << *value << "\n"); +    LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n");      paramTy.push_back(value->getType());    }    // Add the types of the output values to the function's argument list.    for (Value *output : outputs) { -    DEBUG(dbgs() << "instr used in func: " << *output << "\n"); +    LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n");      if (AggregateArgs)        paramTy.push_back(output->getType());      else        paramTy.push_back(PointerType::getUnqual(output->getType()));    } -  DEBUG({ +  LLVM_DEBUG({      dbgs() << "Function type: " << *RetTy << " f(";      for (Type *i : paramTy)        dbgs() << *i << ", "; @@ -620,16 +682,89 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,    if (oldFunction->hasUWTable())      newFunction->setHasUWTable(); -  // Inherit all of the target dependent attributes. +  // Inherit all of the target dependent attributes and white-listed +  // target independent attributes.    //  (e.g. If the extracted region contains a call to an x86.sse    //  instruction we need to make sure that the extracted region has the    //  "target-features" attribute allowing it to be lowered.    // FIXME: This should be changed to check to see if a specific    //           attribute can not be inherited. -  AttrBuilder AB(oldFunction->getAttributes().getFnAttributes()); -  for (const auto &Attr : AB.td_attrs()) -    newFunction->addFnAttr(Attr.first, Attr.second); +  for (const auto &Attr : oldFunction->getAttributes().getFnAttributes()) { +    if (Attr.isStringAttribute()) { +      if (Attr.getKindAsString() == "thunk") +        continue; +    } else +      switch (Attr.getKindAsEnum()) { +      // Those attributes cannot be propagated safely. Explicitly list them +      // here so we get a warning if new attributes are added. This list also +      // includes non-function attributes. +      case Attribute::Alignment: +      case Attribute::AllocSize: +      case Attribute::ArgMemOnly: +      case Attribute::Builtin: +      case Attribute::ByVal: +      case Attribute::Convergent: +      case Attribute::Dereferenceable: +      case Attribute::DereferenceableOrNull: +      case Attribute::InAlloca: +      case Attribute::InReg: +      case Attribute::InaccessibleMemOnly: +      case Attribute::InaccessibleMemOrArgMemOnly: +      case Attribute::JumpTable: +      case Attribute::Naked: +      case Attribute::Nest: +      case Attribute::NoAlias: +      case Attribute::NoBuiltin: +      case Attribute::NoCapture: +      case Attribute::NoReturn: +      case Attribute::None: +      case Attribute::NonNull: +      case Attribute::ReadNone: +      case Attribute::ReadOnly: +      case Attribute::Returned: +      case Attribute::ReturnsTwice: +      case Attribute::SExt: +      case Attribute::Speculatable: +      case Attribute::StackAlignment: +      case Attribute::StructRet: +      case Attribute::SwiftError: +      case Attribute::SwiftSelf: +      case Attribute::WriteOnly: +      case Attribute::ZExt: +      case Attribute::EndAttrKinds: +        continue; +      // Those attributes should be safe to propagate to the extracted function. +      case Attribute::AlwaysInline: +      case Attribute::Cold: +      case Attribute::NoRecurse: +      case Attribute::InlineHint: +      case Attribute::MinSize: +      case Attribute::NoDuplicate: +      case Attribute::NoImplicitFloat: +      case Attribute::NoInline: +      case Attribute::NonLazyBind: +      case Attribute::NoRedZone: +      case Attribute::NoUnwind: +      case Attribute::OptForFuzzing: +      case Attribute::OptimizeNone: +      case Attribute::OptimizeForSize: +      case Attribute::SafeStack: +      case Attribute::ShadowCallStack: +      case Attribute::SanitizeAddress: +      case Attribute::SanitizeMemory: +      case Attribute::SanitizeThread: +      case Attribute::SanitizeHWAddress: +      case Attribute::StackProtect: +      case Attribute::StackProtectReq: +      case Attribute::StackProtectStrong: +      case Attribute::StrictFP: +      case Attribute::UWTable: +      case Attribute::NoCfCheck: +        break; +      } +    newFunction->addFnAttr(Attr); +  }    newFunction->getBasicBlockList().push_back(newRootNode);    // Create an iterator to name all of the arguments we inserted. @@ -1093,10 +1228,10 @@ Function *CodeExtractor::extractCodeRegion() {    // Update the entry count of the function.    if (BFI) { -    Optional<uint64_t> EntryCount = -        BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); -    if (EntryCount.hasValue()) -      newFunction->setEntryCount(EntryCount.getValue()); +    auto Count = BFI->getProfileCountFromFreq(EntryFreq.getFrequency()); +    if (Count.hasValue()) +      newFunction->setEntryCount( +          ProfileCount(Count.getValue(), Function::PCT_Real)); // FIXME      BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());    } @@ -1104,6 +1239,10 @@ Function *CodeExtractor::extractCodeRegion() {    moveCodeToFunction(newFunction); +  // Propagate personality info to the new function if there is one. +  if (oldFunction->hasPersonalityFn()) +    newFunction->setPersonalityFn(oldFunction->getPersonalityFn()); +    // Update the branch weights for the exit block.    if (BFI && NumExitBlocks > 1)      calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI); @@ -1139,7 +1278,7 @@ Function *CodeExtractor::extractCodeRegion() {          }      } -  DEBUG(if (verifyFunction(*newFunction))  -        report_fatal_error("verifyFunction failed!")); +  LLVM_DEBUG(if (verifyFunction(*newFunction)) +                 report_fatal_error("verifyFunction failed!"));    return newFunction;  } diff --git a/lib/Transforms/Utils/CtorUtils.cpp b/lib/Transforms/Utils/CtorUtils.cpp index 82b67c293102..9a0240144d08 100644 --- a/lib/Transforms/Utils/CtorUtils.cpp +++ b/lib/Transforms/Utils/CtorUtils.cpp @@ -138,7 +138,7 @@ bool optimizeGlobalCtorsList(Module &M,      if (!F)        continue; -    DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n"); +    LLVM_DEBUG(dbgs() << "Optimizing Global Constructor: " << *F << "\n");      // We cannot simplify external ctor functions.      if (F->empty()) diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 6d3d287defdb..56ff03c7f5e1 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -9,11 +9,11 @@  #include "llvm/ADT/DenseMap.h"  #include "llvm/Analysis/CFG.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/Type.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h"  using namespace llvm;  /// DemoteRegToStack - This function takes a virtual register computed by an diff --git a/lib/Transforms/Utils/EntryExitInstrumenter.cpp b/lib/Transforms/Utils/EntryExitInstrumenter.cpp index 421663f82565..569ea58a3047 100644 --- a/lib/Transforms/Utils/EntryExitInstrumenter.cpp +++ b/lib/Transforms/Utils/EntryExitInstrumenter.cpp @@ -9,14 +9,13 @@  #include "llvm/Transforms/Utils/EntryExitInstrumenter.h"  #include "llvm/Analysis/GlobalsModRef.h" -#include "llvm/CodeGen/Passes.h"  #include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/Module.h"  #include "llvm/IR/Type.h"  #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  using namespace llvm;  static void insertCall(Function &CurFn, StringRef Func, @@ -92,17 +91,27 @@ static bool runOnFunction(Function &F, bool PostInlining) {    if (!ExitFunc.empty()) {      for (BasicBlock &BB : F) { -      TerminatorInst *T = BB.getTerminator(); +      Instruction *T = BB.getTerminator(); +      if (!isa<ReturnInst>(T)) +        continue; + +      // If T is preceded by a musttail call, that's the real terminator. +      Instruction *Prev = T->getPrevNode(); +      if (BitCastInst *BCI = dyn_cast_or_null<BitCastInst>(Prev)) +        Prev = BCI->getPrevNode(); +      if (CallInst *CI = dyn_cast_or_null<CallInst>(Prev)) { +        if (CI->isMustTailCall()) +          T = CI; +      } +        DebugLoc DL;        if (DebugLoc TerminatorDL = T->getDebugLoc())          DL = TerminatorDL;        else if (auto SP = F.getSubprogram())          DL = DebugLoc::get(0, 0, SP); -      if (isa<ReturnInst>(T)) { -        insertCall(F, ExitFunc, T, DL); -        Changed = true; -      } +      insertCall(F, ExitFunc, T, DL); +      Changed = true;      }      F.removeAttribute(AttributeList::FunctionIndex, ExitAttr);    } diff --git a/lib/Transforms/Utils/EscapeEnumerator.cpp b/lib/Transforms/Utils/EscapeEnumerator.cpp index 78d7474e5b95..c9c96fbe5da0 100644 --- a/lib/Transforms/Utils/EscapeEnumerator.cpp +++ b/lib/Transforms/Utils/EscapeEnumerator.cpp @@ -14,9 +14,9 @@  #include "llvm/Transforms/Utils/EscapeEnumerator.h"  #include "llvm/Analysis/EHPersonalities.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/IR/CallSite.h"  #include "llvm/IR/Module.h" -#include "llvm/Transforms/Utils/Local.h"  using namespace llvm;  static Constant *getDefaultPersonalityFn(Module *M) { @@ -73,8 +73,8 @@ IRBuilder<> *EscapeEnumerator::Next() {      F.setPersonalityFn(PersFn);    } -  if (isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) { -    report_fatal_error("Funclet EH not supported"); +  if (isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) { +    report_fatal_error("Scoped EH not supported");    }    LandingPadInst *LPad = diff --git a/lib/Transforms/Utils/Evaluator.cpp b/lib/Transforms/Utils/Evaluator.cpp index 3c5e299fae98..7fd9425efed3 100644 --- a/lib/Transforms/Utils/Evaluator.cpp +++ b/lib/Transforms/Utils/Evaluator.cpp @@ -24,6 +24,7 @@  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h"  #include "llvm/IR/GlobalValue.h"  #include "llvm/IR/GlobalVariable.h"  #include "llvm/IR/InstrTypes.h" @@ -174,6 +175,11 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) {    return false;  } +static Constant *getInitializer(Constant *C) { +  auto *GV = dyn_cast<GlobalVariable>(C); +  return GV && GV->hasDefinitiveInitializer() ? GV->getInitializer() : nullptr; +} +  /// Return the value that would be computed by a load from P after the stores  /// reflected by 'memory' have been performed.  If we can't decide, return null.  Constant *Evaluator::ComputeLoadResult(Constant *P) { @@ -189,18 +195,96 @@ Constant *Evaluator::ComputeLoadResult(Constant *P) {      return nullptr;    } -  // Handle a constantexpr getelementptr. -  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) -    if (CE->getOpcode() == Instruction::GetElementPtr && -        isa<GlobalVariable>(CE->getOperand(0))) { -      GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0)); -      if (GV->hasDefinitiveInitializer()) -        return ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE); +  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(P)) { +    switch (CE->getOpcode()) { +    // Handle a constantexpr getelementptr. +    case Instruction::GetElementPtr: +      if (auto *I = getInitializer(CE->getOperand(0))) +        return ConstantFoldLoadThroughGEPConstantExpr(I, CE); +      break; +    // Handle a constantexpr bitcast. +    case Instruction::BitCast: +      Constant *Val = getVal(CE->getOperand(0)); +      auto MM = MutatedMemory.find(Val); +      auto *I = (MM != MutatedMemory.end()) ? MM->second +                                            : getInitializer(CE->getOperand(0)); +      if (I) +        return ConstantFoldLoadThroughBitcast( +            I, P->getType()->getPointerElementType(), DL); +      break;      } +  }    return nullptr;  // don't know how to evaluate.  } +static Function *getFunction(Constant *C) { +  if (auto *Fn = dyn_cast<Function>(C)) +    return Fn; + +  if (auto *Alias = dyn_cast<GlobalAlias>(C)) +    if (auto *Fn = dyn_cast<Function>(Alias->getAliasee())) +      return Fn; +  return nullptr; +} + +Function * +Evaluator::getCalleeWithFormalArgs(CallSite &CS, +                                   SmallVector<Constant *, 8> &Formals) { +  auto *V = CS.getCalledValue(); +  if (auto *Fn = getFunction(getVal(V))) +    return getFormalParams(CS, Fn, Formals) ? Fn : nullptr; + +  auto *CE = dyn_cast<ConstantExpr>(V); +  if (!CE || CE->getOpcode() != Instruction::BitCast || +      !getFormalParams(CS, getFunction(CE->getOperand(0)), Formals)) +    return nullptr; + +  return dyn_cast<Function>( +      ConstantFoldLoadThroughBitcast(CE, CE->getOperand(0)->getType(), DL)); +} + +bool Evaluator::getFormalParams(CallSite &CS, Function *F, +                                SmallVector<Constant *, 8> &Formals) { +  if (!F) +    return false; + +  auto *FTy = F->getFunctionType(); +  if (FTy->getNumParams() > CS.getNumArgOperands()) { +    LLVM_DEBUG(dbgs() << "Too few arguments for function.\n"); +    return false; +  } + +  auto ArgI = CS.arg_begin(); +  for (auto ParI = FTy->param_begin(), ParE = FTy->param_end(); ParI != ParE; +       ++ParI) { +    auto *ArgC = ConstantFoldLoadThroughBitcast(getVal(*ArgI), *ParI, DL); +    if (!ArgC) { +      LLVM_DEBUG(dbgs() << "Can not convert function argument.\n"); +      return false; +    } +    Formals.push_back(ArgC); +    ++ArgI; +  } +  return true; +} + +/// If call expression contains bitcast then we may need to cast +/// evaluated return value to a type of the call expression. +Constant *Evaluator::castCallResultIfNeeded(Value *CallExpr, Constant *RV) { +  ConstantExpr *CE = dyn_cast<ConstantExpr>(CallExpr); +  if (!RV || !CE || CE->getOpcode() != Instruction::BitCast) +    return RV; + +  if (auto *FT = +          dyn_cast<FunctionType>(CE->getType()->getPointerElementType())) { +    RV = ConstantFoldLoadThroughBitcast(RV, FT->getReturnType(), DL); +    if (!RV) +      LLVM_DEBUG(dbgs() << "Failed to fold bitcast call expr\n"); +  } +  return RV; +} +  /// Evaluate all instructions in block BB, returning true if successful, false  /// if we can't evaluate it.  NewBB returns the next BB that control flows into,  /// or null upon return. @@ -210,22 +294,23 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,    while (true) {      Constant *InstResult = nullptr; -    DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n"); +    LLVM_DEBUG(dbgs() << "Evaluating Instruction: " << *CurInst << "\n");      if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {        if (!SI->isSimple()) { -        DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n"); +        LLVM_DEBUG(dbgs() << "Store is not simple! Can not evaluate.\n");          return false;  // no volatile/atomic accesses.        }        Constant *Ptr = getVal(SI->getOperand(1));        if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) { -        DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr); +        LLVM_DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);          Ptr = FoldedPtr; -        DEBUG(dbgs() << "; To: " << *Ptr << "\n"); +        LLVM_DEBUG(dbgs() << "; To: " << *Ptr << "\n");        }        if (!isSimpleEnoughPointerToCommit(Ptr)) {          // If this is too complex for us to commit, reject it. -        DEBUG(dbgs() << "Pointer is too complex for us to evaluate store."); +        LLVM_DEBUG( +            dbgs() << "Pointer is too complex for us to evaluate store.");          return false;        } @@ -234,14 +319,15 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,        // If this might be too difficult for the backend to handle (e.g. the addr        // of one global variable divided by another) then we can't commit it.        if (!isSimpleEnoughValueToCommit(Val, SimpleConstants, DL)) { -        DEBUG(dbgs() << "Store value is too complex to evaluate store. " << *Val -              << "\n"); +        LLVM_DEBUG(dbgs() << "Store value is too complex to evaluate store. " +                          << *Val << "\n");          return false;        }        if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {          if (CE->getOpcode() == Instruction::BitCast) { -          DEBUG(dbgs() << "Attempting to resolve bitcast on constant ptr.\n"); +          LLVM_DEBUG(dbgs() +                     << "Attempting to resolve bitcast on constant ptr.\n");            // If we're evaluating a store through a bitcast, then we need            // to pull the bitcast off the pointer type and push it onto the            // stored value. @@ -252,7 +338,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,            // In order to push the bitcast onto the stored value, a bitcast            // from NewTy to Val's type must be legal.  If it's not, we can try            // introspecting NewTy to find a legal conversion. -          while (!Val->getType()->canLosslesslyBitCastTo(NewTy)) { +          Constant *NewVal; +          while (!(NewVal = ConstantFoldLoadThroughBitcast(Val, NewTy, DL))) {              // If NewTy is a struct, we can convert the pointer to the struct              // into a pointer to its first member.              // FIXME: This could be extended to support arrays as well. @@ -270,17 +357,14 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,              // If we can't improve the situation by introspecting NewTy,              // we have to give up.              } else { -              DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " -                    "evaluate.\n"); +              LLVM_DEBUG(dbgs() << "Failed to bitcast constant ptr, can not " +                                   "evaluate.\n");                return false;              }            } -          // If we found compatible types, go ahead and push the bitcast -          // onto the stored value. -          Val = ConstantExpr::getBitCast(Val, NewTy); - -          DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n"); +          Val = NewVal; +          LLVM_DEBUG(dbgs() << "Evaluated bitcast: " << *Val << "\n");          }        } @@ -289,37 +373,37 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,        InstResult = ConstantExpr::get(BO->getOpcode(),                                       getVal(BO->getOperand(0)),                                       getVal(BO->getOperand(1))); -      DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " << *InstResult -            << "\n"); +      LLVM_DEBUG(dbgs() << "Found a BinaryOperator! Simplifying: " +                        << *InstResult << "\n");      } else if (CmpInst *CI = dyn_cast<CmpInst>(CurInst)) {        InstResult = ConstantExpr::getCompare(CI->getPredicate(),                                              getVal(CI->getOperand(0)),                                              getVal(CI->getOperand(1))); -      DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult -            << "\n"); +      LLVM_DEBUG(dbgs() << "Found a CmpInst! Simplifying: " << *InstResult +                        << "\n");      } else if (CastInst *CI = dyn_cast<CastInst>(CurInst)) {        InstResult = ConstantExpr::getCast(CI->getOpcode(),                                           getVal(CI->getOperand(0)),                                           CI->getType()); -      DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult -            << "\n"); +      LLVM_DEBUG(dbgs() << "Found a Cast! Simplifying: " << *InstResult +                        << "\n");      } else if (SelectInst *SI = dyn_cast<SelectInst>(CurInst)) {        InstResult = ConstantExpr::getSelect(getVal(SI->getOperand(0)),                                             getVal(SI->getOperand(1)),                                             getVal(SI->getOperand(2))); -      DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult -            << "\n"); +      LLVM_DEBUG(dbgs() << "Found a Select! Simplifying: " << *InstResult +                        << "\n");      } else if (auto *EVI = dyn_cast<ExtractValueInst>(CurInst)) {        InstResult = ConstantExpr::getExtractValue(            getVal(EVI->getAggregateOperand()), EVI->getIndices()); -      DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " << *InstResult -                   << "\n"); +      LLVM_DEBUG(dbgs() << "Found an ExtractValueInst! Simplifying: " +                        << *InstResult << "\n");      } else if (auto *IVI = dyn_cast<InsertValueInst>(CurInst)) {        InstResult = ConstantExpr::getInsertValue(            getVal(IVI->getAggregateOperand()),            getVal(IVI->getInsertedValueOperand()), IVI->getIndices()); -      DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " << *InstResult -                   << "\n"); +      LLVM_DEBUG(dbgs() << "Found an InsertValueInst! Simplifying: " +                        << *InstResult << "\n");      } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurInst)) {        Constant *P = getVal(GEP->getOperand(0));        SmallVector<Constant*, 8> GEPOps; @@ -329,60 +413,63 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,        InstResult =            ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), P, GEPOps,                                           cast<GEPOperator>(GEP)->isInBounds()); -      DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult -            << "\n"); +      LLVM_DEBUG(dbgs() << "Found a GEP! Simplifying: " << *InstResult << "\n");      } else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {        if (!LI->isSimple()) { -        DEBUG(dbgs() << "Found a Load! Not a simple load, can not evaluate.\n"); +        LLVM_DEBUG( +            dbgs() << "Found a Load! Not a simple load, can not evaluate.\n");          return false;  // no volatile/atomic accesses.        }        Constant *Ptr = getVal(LI->getOperand(0));        if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {          Ptr = FoldedPtr; -        DEBUG(dbgs() << "Found a constant pointer expression, constant " -              "folding: " << *Ptr << "\n"); +        LLVM_DEBUG(dbgs() << "Found a constant pointer expression, constant " +                             "folding: " +                          << *Ptr << "\n");        }        InstResult = ComputeLoadResult(Ptr);        if (!InstResult) { -        DEBUG(dbgs() << "Failed to compute load result. Can not evaluate load." -              "\n"); +        LLVM_DEBUG( +            dbgs() << "Failed to compute load result. Can not evaluate load." +                      "\n");          return false; // Could not evaluate load.        } -      DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n"); +      LLVM_DEBUG(dbgs() << "Evaluated load: " << *InstResult << "\n");      } else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {        if (AI->isArrayAllocation()) { -        DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n"); +        LLVM_DEBUG(dbgs() << "Found an array alloca. Can not evaluate.\n");          return false;  // Cannot handle array allocs.        }        Type *Ty = AI->getAllocatedType();        AllocaTmps.push_back(llvm::make_unique<GlobalVariable>(            Ty, false, GlobalValue::InternalLinkage, UndefValue::get(Ty), -          AI->getName())); +          AI->getName(), /*TLMode=*/GlobalValue::NotThreadLocal, +          AI->getType()->getPointerAddressSpace()));        InstResult = AllocaTmps.back().get(); -      DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n"); +      LLVM_DEBUG(dbgs() << "Found an alloca. Result: " << *InstResult << "\n");      } else if (isa<CallInst>(CurInst) || isa<InvokeInst>(CurInst)) {        CallSite CS(&*CurInst);        // Debug info can safely be ignored here.        if (isa<DbgInfoIntrinsic>(CS.getInstruction())) { -        DEBUG(dbgs() << "Ignoring debug info.\n"); +        LLVM_DEBUG(dbgs() << "Ignoring debug info.\n");          ++CurInst;          continue;        }        // Cannot handle inline asm.        if (isa<InlineAsm>(CS.getCalledValue())) { -        DEBUG(dbgs() << "Found inline asm, can not evaluate.\n"); +        LLVM_DEBUG(dbgs() << "Found inline asm, can not evaluate.\n");          return false;        }        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {          if (MemSetInst *MSI = dyn_cast<MemSetInst>(II)) {            if (MSI->isVolatile()) { -            DEBUG(dbgs() << "Can not optimize a volatile memset " << -                  "intrinsic.\n"); +            LLVM_DEBUG(dbgs() << "Can not optimize a volatile memset " +                              << "intrinsic.\n");              return false;            }            Constant *Ptr = getVal(MSI->getDest()); @@ -390,7 +477,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,            Constant *DestVal = ComputeLoadResult(getVal(Ptr));            if (Val->isNullValue() && DestVal && DestVal->isNullValue()) {              // This memset is a no-op. -            DEBUG(dbgs() << "Ignoring no-op memset.\n"); +            LLVM_DEBUG(dbgs() << "Ignoring no-op memset.\n");              ++CurInst;              continue;            } @@ -398,7 +485,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,          if (II->getIntrinsicID() == Intrinsic::lifetime_start ||              II->getIntrinsicID() == Intrinsic::lifetime_end) { -          DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n"); +          LLVM_DEBUG(dbgs() << "Ignoring lifetime intrinsic.\n");            ++CurInst;            continue;          } @@ -407,7 +494,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,            // We don't insert an entry into Values, as it doesn't have a            // meaningful return value.            if (!II->use_empty()) { -            DEBUG(dbgs() << "Found unused invariant_start. Can't evaluate.\n"); +            LLVM_DEBUG(dbgs() +                       << "Found unused invariant_start. Can't evaluate.\n");              return false;            }            ConstantInt *Size = cast<ConstantInt>(II->getArgOperand(0)); @@ -419,54 +507,54 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,                  Size->getValue().getLimitedValue() >=                      DL.getTypeStoreSize(ElemTy)) {                Invariants.insert(GV); -              DEBUG(dbgs() << "Found a global var that is an invariant: " << *GV -                    << "\n"); +              LLVM_DEBUG(dbgs() << "Found a global var that is an invariant: " +                                << *GV << "\n");              } else { -              DEBUG(dbgs() << "Found a global var, but can not treat it as an " -                    "invariant.\n"); +              LLVM_DEBUG(dbgs() +                         << "Found a global var, but can not treat it as an " +                            "invariant.\n");              }            }            // Continue even if we do nothing.            ++CurInst;            continue;          } else if (II->getIntrinsicID() == Intrinsic::assume) { -          DEBUG(dbgs() << "Skipping assume intrinsic.\n"); +          LLVM_DEBUG(dbgs() << "Skipping assume intrinsic.\n");            ++CurInst;            continue;          } else if (II->getIntrinsicID() == Intrinsic::sideeffect) { -          DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n"); +          LLVM_DEBUG(dbgs() << "Skipping sideeffect intrinsic.\n");            ++CurInst;            continue;          } -        DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n"); +        LLVM_DEBUG(dbgs() << "Unknown intrinsic. Can not evaluate.\n");          return false;        }        // Resolve function pointers. -      Function *Callee = dyn_cast<Function>(getVal(CS.getCalledValue())); +      SmallVector<Constant *, 8> Formals; +      Function *Callee = getCalleeWithFormalArgs(CS, Formals);        if (!Callee || Callee->isInterposable()) { -        DEBUG(dbgs() << "Can not resolve function pointer.\n"); +        LLVM_DEBUG(dbgs() << "Can not resolve function pointer.\n");          return false;  // Cannot resolve.        } -      SmallVector<Constant*, 8> Formals; -      for (User::op_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) -        Formals.push_back(getVal(*i)); -        if (Callee->isDeclaration()) {          // If this is a function we can constant fold, do it.          if (Constant *C = ConstantFoldCall(CS, Callee, Formals, TLI)) { -          InstResult = C; -          DEBUG(dbgs() << "Constant folded function call. Result: " << -                *InstResult << "\n"); +          InstResult = castCallResultIfNeeded(CS.getCalledValue(), C); +          if (!InstResult) +            return false; +          LLVM_DEBUG(dbgs() << "Constant folded function call. Result: " +                            << *InstResult << "\n");          } else { -          DEBUG(dbgs() << "Can not constant fold function call.\n"); +          LLVM_DEBUG(dbgs() << "Can not constant fold function call.\n");            return false;          }        } else {          if (Callee->getFunctionType()->isVarArg()) { -          DEBUG(dbgs() << "Can not constant fold vararg function call.\n"); +          LLVM_DEBUG(dbgs() << "Can not constant fold vararg function call.\n");            return false;          } @@ -474,21 +562,24 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,          // Execute the call, if successful, use the return value.          ValueStack.emplace_back();          if (!EvaluateFunction(Callee, RetVal, Formals)) { -          DEBUG(dbgs() << "Failed to evaluate function.\n"); +          LLVM_DEBUG(dbgs() << "Failed to evaluate function.\n");            return false;          }          ValueStack.pop_back(); -        InstResult = RetVal; +        InstResult = castCallResultIfNeeded(CS.getCalledValue(), RetVal); +        if (RetVal && !InstResult) +          return false;          if (InstResult) { -          DEBUG(dbgs() << "Successfully evaluated function. Result: " -                       << *InstResult << "\n\n"); +          LLVM_DEBUG(dbgs() << "Successfully evaluated function. Result: " +                            << *InstResult << "\n\n");          } else { -          DEBUG(dbgs() << "Successfully evaluated function. Result: 0\n\n"); +          LLVM_DEBUG(dbgs() +                     << "Successfully evaluated function. Result: 0\n\n");          }        }      } else if (isa<TerminatorInst>(CurInst)) { -      DEBUG(dbgs() << "Found a terminator instruction.\n"); +      LLVM_DEBUG(dbgs() << "Found a terminator instruction.\n");        if (BranchInst *BI = dyn_cast<BranchInst>(CurInst)) {          if (BI->isUnconditional()) { @@ -515,17 +606,18 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,          NextBB = nullptr;        } else {          // invoke, unwind, resume, unreachable. -        DEBUG(dbgs() << "Can not handle terminator."); +        LLVM_DEBUG(dbgs() << "Can not handle terminator.");          return false;  // Cannot handle this terminator.        }        // We succeeded at evaluating this block! -      DEBUG(dbgs() << "Successfully evaluated block.\n"); +      LLVM_DEBUG(dbgs() << "Successfully evaluated block.\n");        return true;      } else {        // Did not know how to evaluate this! -      DEBUG(dbgs() << "Failed to evaluate block due to unhandled instruction." -            "\n"); +      LLVM_DEBUG( +          dbgs() << "Failed to evaluate block due to unhandled instruction." +                    "\n");        return false;      } @@ -539,7 +631,7 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,      // If we just processed an invoke, we finished evaluating the block.      if (InvokeInst *II = dyn_cast<InvokeInst>(CurInst)) {        NextBB = II->getNormalDest(); -      DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n"); +      LLVM_DEBUG(dbgs() << "Found an invoke instruction. Finished Block.\n\n");        return true;      } @@ -578,7 +670,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,    while (true) {      BasicBlock *NextBB = nullptr; // Initialized to avoid compiler warnings. -    DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n"); +    LLVM_DEBUG(dbgs() << "Trying to evaluate BB: " << *CurBB << "\n");      if (!EvaluateBlock(CurInst, NextBB))        return false; diff --git a/lib/Transforms/Utils/FlattenCFG.cpp b/lib/Transforms/Utils/FlattenCFG.cpp index 5fdcc6d1d727..3c6c9c9a5df4 100644 --- a/lib/Transforms/Utils/FlattenCFG.cpp +++ b/lib/Transforms/Utils/FlattenCFG.cpp @@ -13,6 +13,7 @@  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/IRBuilder.h" @@ -24,7 +25,6 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h"  #include <cassert>  using namespace llvm; @@ -36,16 +36,16 @@ namespace {  class FlattenCFGOpt {    AliasAnalysis *AA; -  /// \brief Use parallel-and or parallel-or to generate conditions for +  /// Use parallel-and or parallel-or to generate conditions for    /// conditional branches.    bool FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder); -  /// \brief If \param BB is the merge block of an if-region, attempt to merge +  /// If \param BB is the merge block of an if-region, attempt to merge    /// the if-region with an adjacent if-region upstream if two if-regions    /// contain identical instructions.    bool MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder); -  /// \brief Compare a pair of blocks: \p Block1 and \p Block2, which +  /// Compare a pair of blocks: \p Block1 and \p Block2, which    /// are from two if-regions whose entry blocks are \p Head1 and \p    /// Head2.  \returns true if \p Block1 and \p Block2 contain identical    /// instructions, and have no memory reference alias with \p Head2. @@ -312,7 +312,7 @@ bool FlattenCFGOpt::FlattenParallelAndOr(BasicBlock *BB, IRBuilder<> &Builder) {      new UnreachableInst(CB->getContext(), CB);    } while (Iteration); -  DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock); +  LLVM_DEBUG(dbgs() << "Use parallel and/or in:\n" << *FirstCondBlock);    return true;  } @@ -469,7 +469,7 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {    // Remove \param SecondEntryBlock    SecondEntryBlock->dropAllReferences();    SecondEntryBlock->eraseFromParent(); -  DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock); +  LLVM_DEBUG(dbgs() << "If conditions merged into:\n" << *FirstEntryBlock);    return true;  } diff --git a/lib/Transforms/Utils/FunctionComparator.cpp b/lib/Transforms/Utils/FunctionComparator.cpp index bddcbd86e914..69203f9f2485 100644 --- a/lib/Transforms/Utils/FunctionComparator.cpp +++ b/lib/Transforms/Utils/FunctionComparator.cpp @@ -18,7 +18,6 @@  #include "llvm/ADT/ArrayRef.h"  #include "llvm/ADT/Hashing.h"  #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/BasicBlock.h" @@ -377,7 +376,7 @@ int FunctionComparator::cmpConstants(const Constant *L,      }    }    default: // Unknown constant, abort. -    DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n"); +    LLVM_DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");      llvm_unreachable("Constant ValueID not recognized.");      return -1;    } @@ -710,7 +709,7 @@ int FunctionComparator::cmpInlineAsm(const InlineAsm *L,      return Res;    if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))      return Res; -  llvm_unreachable("InlineAsm blocks were not uniqued."); +  assert(L->getFunctionType() != R->getFunctionType());    return 0;  } @@ -925,7 +924,7 @@ FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {    H.add(F.arg_size());    SmallVector<const BasicBlock *, 8> BBs; -  SmallSet<const BasicBlock *, 16> VisitedBBs; +  SmallPtrSet<const BasicBlock *, 16> VisitedBBs;    // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),    // accumulating the hash of the function "structure." (BB and opcode sequence) diff --git a/lib/Transforms/Utils/FunctionImportUtils.cpp b/lib/Transforms/Utils/FunctionImportUtils.cpp index 6b5f593073b4..479816a339d0 100644 --- a/lib/Transforms/Utils/FunctionImportUtils.cpp +++ b/lib/Transforms/Utils/FunctionImportUtils.cpp @@ -206,15 +206,10 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {    // definition.    if (GV.hasName()) {      ValueInfo VI = ImportIndex.getValueInfo(GV.getGUID()); -    if (VI) { -      // Need to check all summaries are local in case of hash collisions. -      bool IsLocal = VI.getSummaryList().size() && -          llvm::all_of(VI.getSummaryList(), -                       [](const std::unique_ptr<GlobalValueSummary> &Summary) { -                         return Summary->isDSOLocal(); -                       }); -      if (IsLocal) -        GV.setDSOLocal(true); +    if (VI && VI.isDSOLocal()) { +      GV.setDSOLocal(true); +      if (GV.hasDLLImportStorageClass()) +        GV.setDLLStorageClass(GlobalValue::DefaultStorageClass);      }    } diff --git a/lib/Transforms/Utils/GlobalStatus.cpp b/lib/Transforms/Utils/GlobalStatus.cpp index 245fefb38ee8..ff6970db47da 100644 --- a/lib/Transforms/Utils/GlobalStatus.cpp +++ b/lib/Transforms/Utils/GlobalStatus.cpp @@ -60,7 +60,7 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {  }  static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS, -                             SmallPtrSetImpl<const PHINode *> &PhiUsers) { +                             SmallPtrSetImpl<const Value *> &VisitedUsers) {    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V))      if (GV->isExternallyInitialized())        GS.StoredType = GlobalStatus::StoredOnce; @@ -75,7 +75,8 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,        if (!isa<PointerType>(CE->getType()))          return true; -      if (analyzeGlobalAux(CE, GS, PhiUsers)) +      // FIXME: Do we need to add constexpr selects to VisitedUsers? +      if (analyzeGlobalAux(CE, GS, VisitedUsers))          return true;      } else if (const Instruction *I = dyn_cast<Instruction>(UR)) {        if (!GS.HasMultipleAccessingFunctions) { @@ -137,20 +138,18 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,              GS.StoredType = GlobalStatus::Stored;            }          } -      } else if (isa<BitCastInst>(I)) { -        if (analyzeGlobalAux(I, GS, PhiUsers)) +      } else if (isa<BitCastInst>(I) || isa<GetElementPtrInst>(I)) { +        // Skip over bitcasts and GEPs; we don't care about the type or offset +        // of the pointer. +        if (analyzeGlobalAux(I, GS, VisitedUsers))            return true; -      } else if (isa<GetElementPtrInst>(I)) { -        if (analyzeGlobalAux(I, GS, PhiUsers)) -          return true; -      } else if (isa<SelectInst>(I)) { -        if (analyzeGlobalAux(I, GS, PhiUsers)) -          return true; -      } else if (const PHINode *PN = dyn_cast<PHINode>(I)) { -        // PHI nodes we can check just like select or GEP instructions, but we -        // have to be careful about infinite recursion. -        if (PhiUsers.insert(PN).second) // Not already visited. -          if (analyzeGlobalAux(I, GS, PhiUsers)) +      } else if (isa<SelectInst>(I) || isa<PHINode>(I)) { +        // Look through selects and PHIs to find if the pointer is +        // conditionally accessed. Make sure we only visit an instruction +        // once; otherwise, we can get infinite recursion or exponential +        // compile time. +        if (VisitedUsers.insert(I).second) +          if (analyzeGlobalAux(I, GS, VisitedUsers))              return true;        } else if (isa<CmpInst>(I)) {          GS.IsCompared = true; @@ -191,6 +190,6 @@ static bool analyzeGlobalAux(const Value *V, GlobalStatus &GS,  GlobalStatus::GlobalStatus() = default;  bool GlobalStatus::analyzeGlobal(const Value *V, GlobalStatus &GS) { -  SmallPtrSet<const PHINode *, 16> PhiUsers; -  return analyzeGlobalAux(V, GS, PhiUsers); +  SmallPtrSet<const Value *, 16> VisitedUsers; +  return analyzeGlobalAux(V, GS, VisitedUsers);  } diff --git a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp index b8c12ad5ea84..8382220fc9e1 100644 --- a/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp +++ b/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp @@ -161,7 +161,7 @@ void ImportedFunctionsInliningStatistics::dump(const bool Verbose) {  void ImportedFunctionsInliningStatistics::calculateRealInlines() {    // Removing duplicated Callers. -  std::sort(NonImportedCallers.begin(), NonImportedCallers.end()); +  llvm::sort(NonImportedCallers.begin(), NonImportedCallers.end());    NonImportedCallers.erase(        std::unique(NonImportedCallers.begin(), NonImportedCallers.end()),        NonImportedCallers.end()); @@ -190,13 +190,14 @@ ImportedFunctionsInliningStatistics::getSortedNodes() {    for (const NodesMapTy::value_type& Node : NodesMap)      SortedNodes.push_back(&Node); -  std::sort( +  llvm::sort(        SortedNodes.begin(), SortedNodes.end(),        [&](const SortedNodesTy::value_type &Lhs,            const SortedNodesTy::value_type &Rhs) {          if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)            return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines; -        if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines) +        if (Lhs->second->NumberOfRealInlines != +            Rhs->second->NumberOfRealInlines)            return Lhs->second->NumberOfRealInlines >                   Rhs->second->NumberOfRealInlines;          return Lhs->first() < Rhs->first(); diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index fedf6e100d6c..0315aac1cf84 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -29,6 +29,7 @@  #include "llvm/Analysis/EHPersonalities.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/Argument.h"  #include "llvm/IR/BasicBlock.h" @@ -60,7 +61,6 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/ValueMapper.h"  #include <algorithm>  #include <cassert> @@ -72,6 +72,7 @@  #include <vector>  using namespace llvm; +using ProfileCount = Function::ProfileCount;  static cl::opt<bool>  EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true), @@ -1247,7 +1248,7 @@ static void HandleByValArgumentInit(Value *Dst, Value *Src, Module *M,    // Always generate a memcpy of alignment 1 here because we don't know    // the alignment of the src pointer.  Other optimizations can infer    // better alignment. -  Builder.CreateMemCpy(Dst, Src, Size, /*Align=*/1); +  Builder.CreateMemCpy(Dst, /*DstAlign*/1, Src, /*SrcAlign*/1, Size);  }  /// When inlining a call site that has a byval argument, @@ -1431,29 +1432,29 @@ static void updateCallerBFI(BasicBlock *CallSiteBlock,  /// Update the branch metadata for cloned call instructions.  static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap, -                              const Optional<uint64_t> &CalleeEntryCount, +                              const ProfileCount &CalleeEntryCount,                                const Instruction *TheCall,                                ProfileSummaryInfo *PSI,                                BlockFrequencyInfo *CallerBFI) { -  if (!CalleeEntryCount.hasValue() || CalleeEntryCount.getValue() < 1) +  if (!CalleeEntryCount.hasValue() || CalleeEntryCount.isSynthetic() || +      CalleeEntryCount.getCount() < 1)      return; -  Optional<uint64_t> CallSiteCount = -      PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None; +  auto CallSiteCount = PSI ? PSI->getProfileCount(TheCall, CallerBFI) : None;    uint64_t CallCount =        std::min(CallSiteCount.hasValue() ? CallSiteCount.getValue() : 0, -               CalleeEntryCount.getValue()); +               CalleeEntryCount.getCount());    for (auto const &Entry : VMap)      if (isa<CallInst>(Entry.first))        if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) -        CI->updateProfWeight(CallCount, CalleeEntryCount.getValue()); +        CI->updateProfWeight(CallCount, CalleeEntryCount.getCount());    for (BasicBlock &BB : *Callee)      // No need to update the callsite if it is pruned during inlining.      if (VMap.count(&BB))        for (Instruction &I : BB)          if (CallInst *CI = dyn_cast<CallInst>(&I)) -          CI->updateProfWeight(CalleeEntryCount.getValue() - CallCount, -                               CalleeEntryCount.getValue()); +          CI->updateProfWeight(CalleeEntryCount.getCount() - CallCount, +                               CalleeEntryCount.getCount());  }  /// Update the entry count of callee after inlining. @@ -1467,18 +1468,19 @@ static void updateCalleeCount(BlockFrequencyInfo *CallerBFI, BasicBlock *CallBB,    // callsite is M, the new callee count is set to N - M. M is estimated from    // the caller's entry count, its entry block frequency and the block frequency    // of the callsite. -  Optional<uint64_t> CalleeCount = Callee->getEntryCount(); +  auto CalleeCount = Callee->getEntryCount();    if (!CalleeCount.hasValue() || !PSI)      return; -  Optional<uint64_t> CallCount = PSI->getProfileCount(CallInst, CallerBFI); +  auto CallCount = PSI->getProfileCount(CallInst, CallerBFI);    if (!CallCount.hasValue())      return;    // Since CallSiteCount is an estimate, it could exceed the original callee    // count and has to be set to 0. -  if (CallCount.getValue() > CalleeCount.getValue()) -    Callee->setEntryCount(0); +  if (CallCount.getValue() > CalleeCount.getCount()) +    CalleeCount.setCount(0);    else -    Callee->setEntryCount(CalleeCount.getValue() - CallCount.getValue()); +    CalleeCount.setCount(CalleeCount.getCount() - CallCount.getValue()); +  Callee->setEntryCount(CalleeCount);  }  /// This function inlines the called function into the basic block of the @@ -1500,10 +1502,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,    IFI.reset();    Function *CalledFunc = CS.getCalledFunction(); -  if (!CalledFunc ||              // Can't inline external function or indirect -      CalledFunc->isDeclaration() || -      (!ForwardVarArgsTo && CalledFunc->isVarArg())) // call, or call to a vararg function! -      return false; +  if (!CalledFunc ||               // Can't inline external function or indirect +      CalledFunc->isDeclaration()) // call! +    return false;    // The inliner does not know how to inline through calls with operand bundles    // in general ... @@ -1568,7 +1569,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,    Instruction *CallSiteEHPad = nullptr;    if (CallerPersonality) {      EHPersonality Personality = classifyEHPersonality(CallerPersonality); -    if (isFuncletEHPersonality(Personality)) { +    if (isScopedEHPersonality(Personality)) {        Optional<OperandBundleUse> ParentFunclet =            CS.getOperandBundle(LLVMContext::OB_funclet);        if (ParentFunclet) @@ -1630,9 +1631,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,      auto &DL = Caller->getParent()->getDataLayout(); -    assert((CalledFunc->arg_size() == CS.arg_size() || ForwardVarArgsTo) && -           "Varargs calls can only be inlined if the Varargs are forwarded!"); -      // Calculate the vector of arguments to pass into the function cloner, which      // matches up the formal to the actual argument values.      CallSite::arg_iterator AI = CS.arg_begin(); @@ -1815,9 +1813,12 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,    }    SmallVector<Value*,4> VarArgsToForward; +  SmallVector<AttributeSet, 4> VarArgsAttrs;    for (unsigned i = CalledFunc->getFunctionType()->getNumParams(); -       i < CS.getNumArgOperands(); i++) +       i < CS.getNumArgOperands(); i++) {      VarArgsToForward.push_back(CS.getArgOperand(i)); +    VarArgsAttrs.push_back(CS.getAttributes().getParamAttributes(i)); +  }    bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;    if (InlinedFunctionInfo.ContainsCalls) { @@ -1825,6 +1826,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,      if (CallInst *CI = dyn_cast<CallInst>(TheCall))        CallSiteTailKind = CI->getTailCallKind(); +    // For inlining purposes, the "notail" marker is the same as no marker. +    if (CallSiteTailKind == CallInst::TCK_NoTail) +      CallSiteTailKind = CallInst::TCK_None; +      for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;           ++BB) {        for (auto II = BB->begin(); II != BB->end();) { @@ -1833,6 +1838,40 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,          if (!CI)            continue; +        // Forward varargs from inlined call site to calls to the +        // ForwardVarArgsTo function, if requested, and to musttail calls. +        if (!VarArgsToForward.empty() && +            ((ForwardVarArgsTo && +              CI->getCalledFunction() == ForwardVarArgsTo) || +             CI->isMustTailCall())) { +          // Collect attributes for non-vararg parameters. +          AttributeList Attrs = CI->getAttributes(); +          SmallVector<AttributeSet, 8> ArgAttrs; +          if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) { +            for (unsigned ArgNo = 0; +                 ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo) +              ArgAttrs.push_back(Attrs.getParamAttributes(ArgNo)); +          } + +          // Add VarArg attributes. +          ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end()); +          Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttributes(), +                                     Attrs.getRetAttributes(), ArgAttrs); +          // Add VarArgs to existing parameters. +          SmallVector<Value *, 6> Params(CI->arg_operands()); +          Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); +          CallInst *NewCI = +              CallInst::Create(CI->getCalledFunction() ? CI->getCalledFunction() +                                                       : CI->getCalledValue(), +                               Params, "", CI); +          NewCI->setDebugLoc(CI->getDebugLoc()); +          NewCI->setAttributes(Attrs); +          NewCI->setCallingConv(CI->getCallingConv()); +          CI->replaceAllUsesWith(NewCI); +          CI->eraseFromParent(); +          CI = NewCI; +        } +          if (Function *F = CI->getCalledFunction())            InlinedDeoptimizeCalls |=                F->getIntrinsicID() == Intrinsic::experimental_deoptimize; @@ -1850,6 +1889,8 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,          //    f -> musttail g ->     tail f  ==>  f ->     tail f          //    f ->          g -> musttail f  ==>  f ->          f          //    f ->          g ->     tail f  ==>  f ->          f +        // +        // Inlined notail calls should remain notail calls.          CallInst::TailCallKind ChildTCK = CI->getTailCallKind();          if (ChildTCK != CallInst::TCK_NoTail)            ChildTCK = std::min(CallSiteTailKind, ChildTCK); @@ -1860,16 +1901,6 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,          // 'nounwind'.          if (MarkNoUnwind)            CI->setDoesNotThrow(); - -        if (ForwardVarArgsTo && !VarArgsToForward.empty() && -            CI->getCalledFunction() == ForwardVarArgsTo) { -          SmallVector<Value*, 6> Params(CI->arg_operands()); -          Params.append(VarArgsToForward.begin(), VarArgsToForward.end()); -          CallInst *Call = CallInst::Create(CI->getCalledFunction(), Params, "", CI); -          Call->setDebugLoc(CI->getDebugLoc()); -          CI->replaceAllUsesWith(Call); -          CI->eraseFromParent(); -        }        }      }    } diff --git a/lib/Transforms/Utils/InstructionNamer.cpp b/lib/Transforms/Utils/InstructionNamer.cpp index 23ec45edb3ef..003721f2b939 100644 --- a/lib/Transforms/Utils/InstructionNamer.cpp +++ b/lib/Transforms/Utils/InstructionNamer.cpp @@ -17,7 +17,7 @@  #include "llvm/IR/Function.h"  #include "llvm/IR/Type.h"  #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  using namespace llvm;  namespace { diff --git a/lib/Transforms/Utils/IntegerDivision.cpp b/lib/Transforms/Utils/IntegerDivision.cpp index 5a90dcb033b2..3fbb3487884b 100644 --- a/lib/Transforms/Utils/IntegerDivision.cpp +++ b/lib/Transforms/Utils/IntegerDivision.cpp @@ -372,7 +372,7 @@ static Value *generateUnsignedDivisionCode(Value *Dividend, Value *Divisor,  /// information about the operands are known. Implements both 32bit and 64bit  /// scalar division.  /// -/// @brief Replace Rem with generated code. +/// Replace Rem with generated code.  bool llvm::expandRemainder(BinaryOperator *Rem) {    assert((Rem->getOpcode() == Instruction::SRem ||            Rem->getOpcode() == Instruction::URem) && @@ -430,7 +430,7 @@ bool llvm::expandRemainder(BinaryOperator *Rem) {  /// when more information about the operands are known. Implements both  /// 32bit and 64bit scalar division.  /// -/// @brief Replace Div with generated code. +/// Replace Div with generated code.  bool llvm::expandDivision(BinaryOperator *Div) {    assert((Div->getOpcode() == Instruction::SDiv ||            Div->getOpcode() == Instruction::UDiv) && @@ -482,7 +482,7 @@ bool llvm::expandDivision(BinaryOperator *Div) {  /// that have no or very little suppport for smaller than 32 bit integer   /// arithmetic.  /// -/// @brief Replace Rem with emulation code. +/// Replace Rem with emulation code.  bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {    assert((Rem->getOpcode() == Instruction::SRem ||            Rem->getOpcode() == Instruction::URem) && @@ -531,7 +531,7 @@ bool llvm::expandRemainderUpTo32Bits(BinaryOperator *Rem) {  /// 64 bits. Uses the above routines and extends the inputs/truncates the  /// outputs to operate in 64 bits.  /// -/// @brief Replace Rem with emulation code. +/// Replace Rem with emulation code.  bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {    assert((Rem->getOpcode() == Instruction::SRem ||            Rem->getOpcode() == Instruction::URem) && @@ -580,7 +580,7 @@ bool llvm::expandRemainderUpTo64Bits(BinaryOperator *Rem) {  /// in 32 bits; that is, these routines are good for targets that have no  /// or very little support for smaller than 32 bit integer arithmetic.  /// -/// @brief Replace Div with emulation code. +/// Replace Div with emulation code.  bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {    assert((Div->getOpcode() == Instruction::SDiv ||            Div->getOpcode() == Instruction::UDiv) && @@ -628,7 +628,7 @@ bool llvm::expandDivisionUpTo32Bits(BinaryOperator *Div) {  /// above routines and extends the inputs/truncates the outputs to operate  /// in 64 bits.  /// -/// @brief Replace Div with emulation code. +/// Replace Div with emulation code.  bool llvm::expandDivisionUpTo64Bits(BinaryOperator *Div) {    assert((Div->getOpcode() == Instruction::SDiv ||            Div->getOpcode() == Instruction::UDiv) && diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp index ae0e2bb6c280..956d0387c7a8 100644 --- a/lib/Transforms/Utils/LCSSA.cpp +++ b/lib/Transforms/Utils/LCSSA.cpp @@ -36,13 +36,14 @@  #include "llvm/Analysis/LoopPass.h"  #include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/PredIteratorCache.h"  #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  #include "llvm/Transforms/Utils/LoopUtils.h"  #include "llvm/Transforms/Utils/SSAUpdater.h"  using namespace llvm; @@ -214,18 +215,27 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,          Worklist.push_back(PostProcessPN);      // Keep track of PHI nodes that we want to remove because they did not have -    // any uses rewritten. +    // any uses rewritten. If the new PHI is used, store it so that we can +    // try to propagate dbg.value intrinsics to it. +    SmallVector<PHINode *, 2> NeedDbgValues;      for (PHINode *PN : AddedPHIs)        if (PN->use_empty())          PHIsToRemove.insert(PN); - +      else +        NeedDbgValues.push_back(PN); +    insertDebugValuesForPHIs(InstBB, NeedDbgValues);      Changed = true;    } -  // Remove PHI nodes that did not have any uses rewritten. -  for (PHINode *PN : PHIsToRemove) { -    assert (PN->use_empty() && "Trying to remove a phi with uses."); -    PN->eraseFromParent(); -  } +  // Remove PHI nodes that did not have any uses rewritten. We need to redo the +  // use_empty() check here, because even if the PHI node wasn't used when added +  // to PHIsToRemove, later added PHI nodes can be using it.  This cleanup is +  // not guaranteed to handle trees/cycles of PHI nodes that only are used by +  // each other. Such situations has only been noticed when the input IR +  // contains unreachable code, and leaving some extra redundant PHI nodes in +  // such situations is considered a minor problem. +  for (PHINode *PN : PHIsToRemove) +    if (PN->use_empty()) +      PN->eraseFromParent();    return Changed;  } diff --git a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp index 42aca757c2af..9832a6f24e1f 100644 --- a/lib/Transforms/Utils/LibCallsShrinkWrap.cpp +++ b/lib/Transforms/Utils/LibCallsShrinkWrap.cpp @@ -79,11 +79,11 @@ public:    bool perform() {      bool Changed = false;      for (auto &CI : WorkList) { -      DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() -                   << "\n"); +      LLVM_DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName() +                        << "\n");        if (perform(CI)) {          Changed = true; -        DEBUG(dbgs() << "Transformed\n"); +        LLVM_DEBUG(dbgs() << "Transformed\n");        }      }      return Changed; @@ -421,7 +421,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,                                                const LibFunc &Func) {    // FIXME: LibFunc_powf and powl TBD.    if (Func != LibFunc_pow) { -    DEBUG(dbgs() << "Not handled powf() and powl()\n"); +    LLVM_DEBUG(dbgs() << "Not handled powf() and powl()\n");      return nullptr;    } @@ -433,7 +433,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,    if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) {      double D = CF->getValueAPF().convertToDouble();      if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) { -      DEBUG(dbgs() << "Not handled pow(): constant base out of range\n"); +      LLVM_DEBUG(dbgs() << "Not handled pow(): constant base out of range\n");        return nullptr;      } @@ -447,7 +447,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,    // If the Base value coming from an integer type.    Instruction *I = dyn_cast<Instruction>(Base);    if (!I) { -    DEBUG(dbgs() << "Not handled pow(): FP type base\n"); +    LLVM_DEBUG(dbgs() << "Not handled pow(): FP type base\n");      return nullptr;    }    unsigned Opcode = I->getOpcode(); @@ -461,7 +461,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,      else if (BW == 32)        UpperV = 32.0f;      else { -      DEBUG(dbgs() << "Not handled pow(): type too wide\n"); +      LLVM_DEBUG(dbgs() << "Not handled pow(): type too wide\n");        return nullptr;      } @@ -477,7 +477,7 @@ Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,      Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0);      return BBBuilder.CreateOr(Cond0, Cond);    } -  DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n"); +  LLVM_DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n");    return nullptr;  } @@ -496,9 +496,9 @@ void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {    SuccBB->setName("cdce.end");    CI->removeFromParent();    CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI); -  DEBUG(dbgs() << "== Basic Block After =="); -  DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB -               << *CallBB->getSingleSuccessor() << "\n"); +  LLVM_DEBUG(dbgs() << "== Basic Block After =="); +  LLVM_DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB +                    << *CallBB->getSingleSuccessor() << "\n");  }  // Perform the transformation to a single candidate. @@ -529,10 +529,7 @@ static bool runImpl(Function &F, const TargetLibraryInfo &TLI,    bool Changed = CCDCE.perform();  // Verify the dominator after we've updated it locally. -#ifndef NDEBUG -  if (DT) -    DT->verifyDomTree(); -#endif +  assert(!DT || DT->verify(DominatorTree::VerificationLevel::Fast));    return Changed;  } diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index a1961eecb391..ae3cb077a3af 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -73,6 +73,7 @@  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/KnownBits.h"  #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/ValueMapper.h"  #include <algorithm>  #include <cassert>  #include <climits> @@ -100,26 +101,23 @@ STATISTIC(NumRemoved, "Number of unreachable basic blocks removed");  /// conditions and indirectbr addresses this might make dead if  /// DeleteDeadConditions is true.  bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions, -                                  const TargetLibraryInfo *TLI) { +                                  const TargetLibraryInfo *TLI, +                                  DeferredDominance *DDT) {    TerminatorInst *T = BB->getTerminator();    IRBuilder<> Builder(T);    // Branch - See if we are conditional jumping on constant -  if (BranchInst *BI = dyn_cast<BranchInst>(T)) { +  if (auto *BI = dyn_cast<BranchInst>(T)) {      if (BI->isUnconditional()) return false;  // Can't optimize uncond branch      BasicBlock *Dest1 = BI->getSuccessor(0);      BasicBlock *Dest2 = BI->getSuccessor(1); -    if (ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition())) { +    if (auto *Cond = dyn_cast<ConstantInt>(BI->getCondition())) {        // Are we branching on constant?        // YES.  Change to unconditional branch...        BasicBlock *Destination = Cond->getZExtValue() ? Dest1 : Dest2;        BasicBlock *OldDest     = Cond->getZExtValue() ? Dest2 : Dest1; -      //cerr << "Function: " << T->getParent()->getParent() -      //     << "\nRemoving branch from " << T->getParent() -      //     << "\n\nTo: " << OldDest << endl; -        // Let the basic block know that we are letting go of it.  Based on this,        // it will adjust it's PHI nodes.        OldDest->removePredecessor(BB); @@ -127,6 +125,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,        // Replace the conditional branch with an unconditional one.        Builder.CreateBr(Destination);        BI->eraseFromParent(); +      if (DDT) +        DDT->deleteEdge(BB, OldDest);        return true;      } @@ -150,10 +150,10 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,      return false;    } -  if (SwitchInst *SI = dyn_cast<SwitchInst>(T)) { +  if (auto *SI = dyn_cast<SwitchInst>(T)) {      // If we are switching on a constant, we can convert the switch to an      // unconditional branch. -    ConstantInt *CI = dyn_cast<ConstantInt>(SI->getCondition()); +    auto *CI = dyn_cast<ConstantInt>(SI->getCondition());      BasicBlock *DefaultDest = SI->getDefaultDest();      BasicBlock *TheOnlyDest = DefaultDest; @@ -197,9 +197,12 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,                            createBranchWeights(Weights));          }          // Remove this entry. -        DefaultDest->removePredecessor(SI->getParent()); +        BasicBlock *ParentBB = SI->getParent(); +        DefaultDest->removePredecessor(ParentBB);          i = SI->removeCase(i);          e = SI->case_end(); +        if (DDT) +          DDT->deleteEdge(ParentBB, DefaultDest);          continue;        } @@ -225,14 +228,20 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,        // Insert the new branch.        Builder.CreateBr(TheOnlyDest);        BasicBlock *BB = SI->getParent(); +      std::vector <DominatorTree::UpdateType> Updates; +      if (DDT) +        Updates.reserve(SI->getNumSuccessors() - 1);        // Remove entries from PHI nodes which we no longer branch to...        for (BasicBlock *Succ : SI->successors()) {          // Found case matching a constant operand? -        if (Succ == TheOnlyDest) +        if (Succ == TheOnlyDest) {            TheOnlyDest = nullptr; // Don't modify the first branch to TheOnlyDest -        else +        } else {            Succ->removePredecessor(BB); +          if (DDT) +            Updates.push_back({DominatorTree::Delete, BB, Succ}); +        }        }        // Delete the old switch. @@ -240,6 +249,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,        SI->eraseFromParent();        if (DeleteDeadConditions)          RecursivelyDeleteTriviallyDeadInstructions(Cond, TLI); +      if (DDT) +        DDT->applyUpdates(Updates);        return true;      } @@ -280,19 +291,28 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,      return false;    } -  if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(T)) { +  if (auto *IBI = dyn_cast<IndirectBrInst>(T)) {      // indirectbr blockaddress(@F, @BB) -> br label @BB -    if (BlockAddress *BA = +    if (auto *BA =            dyn_cast<BlockAddress>(IBI->getAddress()->stripPointerCasts())) {        BasicBlock *TheOnlyDest = BA->getBasicBlock(); +      std::vector <DominatorTree::UpdateType> Updates; +      if (DDT) +        Updates.reserve(IBI->getNumDestinations() - 1); +        // Insert the new branch.        Builder.CreateBr(TheOnlyDest);        for (unsigned i = 0, e = IBI->getNumDestinations(); i != e; ++i) { -        if (IBI->getDestination(i) == TheOnlyDest) +        if (IBI->getDestination(i) == TheOnlyDest) {            TheOnlyDest = nullptr; -        else -          IBI->getDestination(i)->removePredecessor(IBI->getParent()); +        } else { +          BasicBlock *ParentBB = IBI->getParent(); +          BasicBlock *DestBB = IBI->getDestination(i); +          DestBB->removePredecessor(ParentBB); +          if (DDT) +            Updates.push_back({DominatorTree::Delete, ParentBB, DestBB}); +        }        }        Value *Address = IBI->getAddress();        IBI->eraseFromParent(); @@ -307,6 +327,8 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB, bool DeleteDeadConditions,          new UnreachableInst(BB->getContext(), BB);        } +      if (DDT) +        DDT->applyUpdates(Updates);        return true;      }    } @@ -350,6 +372,11 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,        return false;      return true;    } +  if (DbgLabelInst *DLI = dyn_cast<DbgLabelInst>(I)) { +    if (DLI->getLabel()) +      return false; +    return true; +  }    if (!I->mayHaveSideEffects())      return true; @@ -357,8 +384,9 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,    // Special case intrinsics that "may have side effects" but can be deleted    // when dead.    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { -    // Safe to delete llvm.stacksave if dead. -    if (II->getIntrinsicID() == Intrinsic::stacksave) +    // Safe to delete llvm.stacksave and launder.invariant.group if dead. +    if (II->getIntrinsicID() == Intrinsic::stacksave || +        II->getIntrinsicID() == Intrinsic::launder_invariant_group)        return true;      // Lifetime intrinsics are dead when their right-hand is undef. @@ -406,17 +434,31 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,    SmallVector<Instruction*, 16> DeadInsts;    DeadInsts.push_back(I); +  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI); -  do { -    I = DeadInsts.pop_back_val(); +  return true; +} + +void llvm::RecursivelyDeleteTriviallyDeadInstructions( +    SmallVectorImpl<Instruction *> &DeadInsts, const TargetLibraryInfo *TLI) { +  // Process the dead instruction list until empty. +  while (!DeadInsts.empty()) { +    Instruction &I = *DeadInsts.pop_back_val(); +    assert(I.use_empty() && "Instructions with uses are not dead."); +    assert(isInstructionTriviallyDead(&I, TLI) && +           "Live instruction found in dead worklist!"); + +    // Don't lose the debug info while deleting the instructions. +    salvageDebugInfo(I);      // Null out all of the instruction's operands to see if any operand becomes      // dead as we go. -    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { -      Value *OpV = I->getOperand(i); -      I->setOperand(i, nullptr); +    for (Use &OpU : I.operands()) { +      Value *OpV = OpU.get(); +      OpU.set(nullptr); -      if (!OpV->use_empty()) continue; +      if (!OpV->use_empty()) +        continue;        // If the operand is an instruction that became dead as we nulled out the        // operand, and if it is 'trivially' dead, delete it in a future loop @@ -426,10 +468,8 @@ llvm::RecursivelyDeleteTriviallyDeadInstructions(Value *V,            DeadInsts.push_back(OpI);      } -    I->eraseFromParent(); -  } while (!DeadInsts.empty()); - -  return true; +    I.eraseFromParent(); +  }  }  /// areAllUsesEqual - Check whether the uses of a value are all the same. @@ -481,6 +521,8 @@ simplifyAndDCEInstruction(Instruction *I,                            const DataLayout &DL,                            const TargetLibraryInfo *TLI) {    if (isInstructionTriviallyDead(I, TLI)) { +    salvageDebugInfo(*I); +      // Null out all of the instruction's operands to see if any operand becomes      // dead as we go.      for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { @@ -583,7 +625,8 @@ bool llvm::SimplifyInstructionsInBlock(BasicBlock *BB,  ///  /// .. and delete the predecessor corresponding to the '1', this will attempt to  /// recursively fold the and to 0. -void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) { +void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred, +                                        DeferredDominance *DDT) {    // This only adjusts blocks with PHI nodes.    if (!isa<PHINode>(BB->begin()))      return; @@ -606,13 +649,18 @@ void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred) {      // of the block.      if (PhiIt != OldPhiIt) PhiIt = &BB->front();    } +  if (DDT) +    DDT->deleteEdge(Pred, BB);  }  /// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its  /// predecessor is known to have one successor (DestBB!).  Eliminate the edge  /// between them, moving the instructions in the predecessor into DestBB and  /// deleting the predecessor block. -void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) { +void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT, +                                       DeferredDominance *DDT) { +  assert(!(DT && DDT) && "Cannot call with both DT and DDT."); +    // If BB has single-entry PHI nodes, fold them.    while (PHINode *PN = dyn_cast<PHINode>(DestBB->begin())) {      Value *NewVal = PN->getIncomingValue(0); @@ -625,6 +673,24 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {    BasicBlock *PredBB = DestBB->getSinglePredecessor();    assert(PredBB && "Block doesn't have a single predecessor!"); +  bool ReplaceEntryBB = false; +  if (PredBB == &DestBB->getParent()->getEntryBlock()) +    ReplaceEntryBB = true; + +  // Deferred DT update: Collect all the edges that enter PredBB. These +  // dominator edges will be redirected to DestBB. +  std::vector <DominatorTree::UpdateType> Updates; +  if (DDT && !ReplaceEntryBB) { +    Updates.reserve(1 + (2 * pred_size(PredBB))); +    Updates.push_back({DominatorTree::Delete, PredBB, DestBB}); +    for (auto I = pred_begin(PredBB), E = pred_end(PredBB); I != E; ++I) { +      Updates.push_back({DominatorTree::Delete, *I, PredBB}); +      // This predecessor of PredBB may already have DestBB as a successor. +      if (llvm::find(successors(*I), DestBB) == succ_end(*I)) +        Updates.push_back({DominatorTree::Insert, *I, DestBB}); +    } +  } +    // Zap anything that took the address of DestBB.  Not doing this will give the    // address an invalid value.    if (DestBB->hasAddressTaken()) { @@ -645,7 +711,7 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {    // If the PredBB is the entry block of the function, move DestBB up to    // become the entry block after we erase PredBB. -  if (PredBB == &DestBB->getParent()->getEntryBlock()) +  if (ReplaceEntryBB)      DestBB->moveAfter(PredBB);    if (DT) { @@ -657,8 +723,19 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, DominatorTree *DT) {        DT->eraseNode(PredBB);      }    } -  // Nuke BB. -  PredBB->eraseFromParent(); + +  if (DDT) { +    DDT->deleteBB(PredBB); // Deferred deletion of BB. +    if (ReplaceEntryBB) +      // The entry block was removed and there is no external interface for the +      // dominator tree to be notified of this change. In this corner-case we +      // recalculate the entire tree. +      DDT->recalculate(*(DestBB->getParent())); +    else +      DDT->applyUpdates(Updates); +  } else { +    PredBB->eraseFromParent(); // Nuke BB. +  }  }  /// CanMergeValues - Return true if we can choose one of these values to use @@ -675,8 +752,8 @@ static bool CanMergeValues(Value *First, Value *Second) {  static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {    assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!"); -  DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " -        << Succ->getName() << "\n"); +  LLVM_DEBUG(dbgs() << "Looking to fold " << BB->getName() << " into " +                    << Succ->getName() << "\n");    // Shortcut, if there is only a single predecessor it must be BB and merging    // is always safe    if (Succ->getSinglePredecessor()) return true; @@ -699,10 +776,11 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {          if (BBPreds.count(IBB) &&              !CanMergeValues(BBPN->getIncomingValueForBlock(IBB),                              PN->getIncomingValue(PI))) { -          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " -                << Succ->getName() << " is conflicting with " -                << BBPN->getName() << " with regard to common predecessor " -                << IBB->getName() << "\n"); +          LLVM_DEBUG(dbgs() +                     << "Can't fold, phi node " << PN->getName() << " in " +                     << Succ->getName() << " is conflicting with " +                     << BBPN->getName() << " with regard to common predecessor " +                     << IBB->getName() << "\n");            return false;          }        } @@ -715,9 +793,10 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {          BasicBlock *IBB = PN->getIncomingBlock(PI);          if (BBPreds.count(IBB) &&              !CanMergeValues(Val, PN->getIncomingValue(PI))) { -          DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() << " in " -                << Succ->getName() << " is conflicting with regard to common " -                << "predecessor " << IBB->getName() << "\n"); +          LLVM_DEBUG(dbgs() << "Can't fold, phi node " << PN->getName() +                            << " in " << Succ->getName() +                            << " is conflicting with regard to common " +                            << "predecessor " << IBB->getName() << "\n");            return false;          }        } @@ -730,7 +809,7 @@ static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {  using PredBlockVector = SmallVector<BasicBlock *, 16>;  using IncomingValueMap = DenseMap<BasicBlock *, Value *>; -/// \brief Determines the value to use as the phi node input for a block. +/// Determines the value to use as the phi node input for a block.  ///  /// Select between \p OldVal any value that we know flows from \p BB  /// to a particular phi on the basis of which one (if either) is not @@ -759,7 +838,7 @@ static Value *selectIncomingValueForBlock(Value *OldVal, BasicBlock *BB,    return OldVal;  } -/// \brief Create a map from block to value for the operands of a +/// Create a map from block to value for the operands of a  /// given phi.  ///  /// Create a map from block to value for each non-undef value flowing @@ -778,7 +857,7 @@ static void gatherIncomingValuesToPhi(PHINode *PN,    }  } -/// \brief Replace the incoming undef values to a phi with the values +/// Replace the incoming undef values to a phi with the values  /// from a block-to-value map.  ///  /// \param PN The phi we are replacing the undefs in. @@ -798,7 +877,7 @@ static void replaceUndefValuesInPhi(PHINode *PN,    }  } -/// \brief Replace a value flowing from a block to a phi with +/// Replace a value flowing from a block to a phi with  /// potentially multiple instances of that value flowing from the  /// block's predecessors to the phi.  /// @@ -865,7 +944,8 @@ static void redirectValuesFromPredecessorsToPhi(BasicBlock *BB,  /// potential side-effect free intrinsics and the branch.  If possible,  /// eliminate BB by rewriting all the predecessors to branch to the successor  /// block and return true.  If we can't transform, return false. -bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) { +bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB, +                                                   DeferredDominance *DDT) {    assert(BB != &BB->getParent()->getEntryBlock() &&           "TryToSimplifyUncondBranchFromEmptyBlock called on entry block!"); @@ -904,7 +984,20 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {      }    } -  DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); +  LLVM_DEBUG(dbgs() << "Killing Trivial BB: \n" << *BB); + +  std::vector<DominatorTree::UpdateType> Updates; +  if (DDT) { +    Updates.reserve(1 + (2 * pred_size(BB))); +    Updates.push_back({DominatorTree::Delete, BB, Succ}); +    // All predecessors of BB will be moved to Succ. +    for (auto I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { +      Updates.push_back({DominatorTree::Delete, *I, BB}); +      // This predecessor of BB may already have Succ as a successor. +      if (llvm::find(successors(*I), Succ) == succ_end(*I)) +        Updates.push_back({DominatorTree::Insert, *I, Succ}); +    } +  }    if (isa<PHINode>(Succ->begin())) {      // If there is more than one pred of succ, and there are PHI nodes in @@ -950,7 +1043,13 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {    // Everything that jumped to BB now goes to Succ.    BB->replaceAllUsesWith(Succ);    if (!Succ->hasName()) Succ->takeName(BB); -  BB->eraseFromParent();              // Delete the old basic block. + +  if (DDT) { +    DDT->deleteBB(BB); // Deferred deletion of the old basic block. +    DDT->applyUpdates(Updates); +  } else { +    BB->eraseFromParent(); // Delete the old basic block. +  }    return true;  } @@ -1129,6 +1228,31 @@ static bool PhiHasDebugValue(DILocalVariable *DIVar,    return false;  } +/// Check if the alloc size of \p ValTy is large enough to cover the variable +/// (or fragment of the variable) described by \p DII. +/// +/// This is primarily intended as a helper for the different +/// ConvertDebugDeclareToDebugValue functions. The dbg.declare/dbg.addr that is +/// converted describes an alloca'd variable, so we need to use the +/// alloc size of the value when doing the comparison. E.g. an i1 value will be +/// identified as covering an n-bit fragment, if the store size of i1 is at +/// least n bits. +static bool valueCoversEntireFragment(Type *ValTy, DbgInfoIntrinsic *DII) { +  const DataLayout &DL = DII->getModule()->getDataLayout(); +  uint64_t ValueSize = DL.getTypeAllocSizeInBits(ValTy); +  if (auto FragmentSize = DII->getFragmentSizeInBits()) +    return ValueSize >= *FragmentSize; +  // We can't always calculate the size of the DI variable (e.g. if it is a +  // VLA). Try to use the size of the alloca that the dbg intrinsic describes +  // intead. +  if (DII->isAddressOfVariable()) +    if (auto *AI = dyn_cast_or_null<AllocaInst>(DII->getVariableLocation())) +      if (auto FragmentSize = AI->getAllocationSizeInBits(DL)) +        return ValueSize >= *FragmentSize; +  // Could not determine size of variable. Conservatively return false. +  return false; +} +  /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value  /// that has an associated llvm.dbg.declare or llvm.dbg.addr intrinsic.  void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII, @@ -1139,6 +1263,21 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,    auto *DIExpr = DII->getExpression();    Value *DV = SI->getOperand(0); +  if (!valueCoversEntireFragment(SI->getValueOperand()->getType(), DII)) { +    // FIXME: If storing to a part of the variable described by the dbg.declare, +    // then we want to insert a dbg.value for the corresponding fragment. +    LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " +                      << *DII << '\n'); +    // For now, when there is a store to parts of the variable (but we do not +    // know which part) we insert an dbg.value instrinsic to indicate that we +    // know nothing about the variable's content. +    DV = UndefValue::get(DV->getType()); +    if (!LdStHasDebugValue(DIVar, DIExpr, SI)) +      Builder.insertDbgValueIntrinsic(DV, DIVar, DIExpr, DII->getDebugLoc(), +                                      SI); +    return; +  } +    // If an argument is zero extended then use argument directly. The ZExt    // may be zapped by an optimization pass in future.    Argument *ExtendedArg = nullptr; @@ -1182,6 +1321,15 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,    if (LdStHasDebugValue(DIVar, DIExpr, LI))      return; +  if (!valueCoversEntireFragment(LI->getType(), DII)) { +    // FIXME: If only referring to a part of the variable described by the +    // dbg.declare, then we want to insert a dbg.value for the corresponding +    // fragment. +    LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " +                      << *DII << '\n'); +    return; +  } +    // We are now tracking the loaded value instead of the address. In the    // future if multi-location support is added to the IR, it might be    // preferable to keep tracking both the loaded value and the original @@ -1202,6 +1350,15 @@ void llvm::ConvertDebugDeclareToDebugValue(DbgInfoIntrinsic *DII,    if (PhiHasDebugValue(DIVar, DIExpr, APN))      return; +  if (!valueCoversEntireFragment(APN->getType(), DII)) { +    // FIXME: If only referring to a part of the variable described by the +    // dbg.declare, then we want to insert a dbg.value for the corresponding +    // fragment. +    LLVM_DEBUG(dbgs() << "Failed to convert dbg.declare to dbg.value: " +                      << *DII << '\n'); +    return; +  } +    BasicBlock *BB = APN->getParent();    auto InsertionPt = BB->getFirstInsertionPt(); @@ -1241,33 +1398,91 @@ bool llvm::LowerDbgDeclare(Function &F) {      // stored on the stack, while the dbg.declare can only describe      // the stack slot (and at a lexical-scope granularity). Later      // passes will attempt to elide the stack slot. -    if (AI && !isArray(AI)) { -      for (auto &AIUse : AI->uses()) { -        User *U = AIUse.getUser(); -        if (StoreInst *SI = dyn_cast<StoreInst>(U)) { -          if (AIUse.getOperandNo() == 1) -            ConvertDebugDeclareToDebugValue(DDI, SI, DIB); -        } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) { -          ConvertDebugDeclareToDebugValue(DDI, LI, DIB); -        } else if (CallInst *CI = dyn_cast<CallInst>(U)) { -          // This is a call by-value or some other instruction that -          // takes a pointer to the variable. Insert a *value* -          // intrinsic that describes the alloca. -          DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), -                                      DDI->getExpression(), DDI->getDebugLoc(), -                                      CI); -        } +    if (!AI || isArray(AI)) +      continue; + +    // A volatile load/store means that the alloca can't be elided anyway. +    if (llvm::any_of(AI->users(), [](User *U) -> bool { +          if (LoadInst *LI = dyn_cast<LoadInst>(U)) +            return LI->isVolatile(); +          if (StoreInst *SI = dyn_cast<StoreInst>(U)) +            return SI->isVolatile(); +          return false; +        })) +      continue; + +    for (auto &AIUse : AI->uses()) { +      User *U = AIUse.getUser(); +      if (StoreInst *SI = dyn_cast<StoreInst>(U)) { +        if (AIUse.getOperandNo() == 1) +          ConvertDebugDeclareToDebugValue(DDI, SI, DIB); +      } else if (LoadInst *LI = dyn_cast<LoadInst>(U)) { +        ConvertDebugDeclareToDebugValue(DDI, LI, DIB); +      } else if (CallInst *CI = dyn_cast<CallInst>(U)) { +        // This is a call by-value or some other instruction that takes a +        // pointer to the variable. Insert a *value* intrinsic that describes +        // the variable by dereferencing the alloca. +        auto *DerefExpr = +            DIExpression::append(DDI->getExpression(), dwarf::DW_OP_deref); +        DIB.insertDbgValueIntrinsic(AI, DDI->getVariable(), DerefExpr, +                                    DDI->getDebugLoc(), CI);        } -      DDI->eraseFromParent();      } +    DDI->eraseFromParent();    }    return true;  } +/// Propagate dbg.value intrinsics through the newly inserted PHIs. +void llvm::insertDebugValuesForPHIs(BasicBlock *BB, +                                    SmallVectorImpl<PHINode *> &InsertedPHIs) { +  assert(BB && "No BasicBlock to clone dbg.value(s) from."); +  if (InsertedPHIs.size() == 0) +    return; + +  // Map existing PHI nodes to their dbg.values. +  ValueToValueMapTy DbgValueMap; +  for (auto &I : *BB) { +    if (auto DbgII = dyn_cast<DbgInfoIntrinsic>(&I)) { +      if (auto *Loc = dyn_cast_or_null<PHINode>(DbgII->getVariableLocation())) +        DbgValueMap.insert({Loc, DbgII}); +    } +  } +  if (DbgValueMap.size() == 0) +    return; + +  // Then iterate through the new PHIs and look to see if they use one of the +  // previously mapped PHIs. If so, insert a new dbg.value intrinsic that will +  // propagate the info through the new PHI. +  LLVMContext &C = BB->getContext(); +  for (auto PHI : InsertedPHIs) { +    BasicBlock *Parent = PHI->getParent(); +    // Avoid inserting an intrinsic into an EH block. +    if (Parent->getFirstNonPHI()->isEHPad()) +      continue; +    auto PhiMAV = MetadataAsValue::get(C, ValueAsMetadata::get(PHI)); +    for (auto VI : PHI->operand_values()) { +      auto V = DbgValueMap.find(VI); +      if (V != DbgValueMap.end()) { +        auto *DbgII = cast<DbgInfoIntrinsic>(V->second); +        Instruction *NewDbgII = DbgII->clone(); +        NewDbgII->setOperand(0, PhiMAV); +        auto InsertionPt = Parent->getFirstInsertionPt(); +        assert(InsertionPt != Parent->end() && "Ill-formed basic block"); +        NewDbgII->insertBefore(&*InsertionPt); +      } +    } +  } +} +  /// Finds all intrinsics declaring local variables as living in the memory that  /// 'V' points to. This may include a mix of dbg.declare and  /// dbg.addr intrinsics.  TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) { +  // This function is hot. Check whether the value has any metadata to avoid a +  // DenseMap lookup. +  if (!V->isUsedByMetadata()) +    return {};    auto *L = LocalAsMetadata::getIfExists(V);    if (!L)      return {}; @@ -1286,6 +1501,10 @@ TinyPtrVector<DbgInfoIntrinsic *> llvm::FindDbgAddrUses(Value *V) {  }  void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) { +  // This function is hot. Check whether the value has any metadata to avoid a +  // DenseMap lookup. +  if (!V->isUsedByMetadata()) +    return;    if (auto *L = LocalAsMetadata::getIfExists(V))      if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))        for (User *U : MDV->users()) @@ -1293,8 +1512,12 @@ void llvm::findDbgValues(SmallVectorImpl<DbgValueInst *> &DbgValues, Value *V) {            DbgValues.push_back(DVI);  } -static void findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers, -                         Value *V) { +void llvm::findDbgUsers(SmallVectorImpl<DbgInfoIntrinsic *> &DbgUsers, +                        Value *V) { +  // This function is hot. Check whether the value has any metadata to avoid a +  // DenseMap lookup. +  if (!V->isUsedByMetadata()) +    return;    if (auto *L = LocalAsMetadata::getIfExists(V))      if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))        for (User *U : MDV->users()) @@ -1312,11 +1535,11 @@ bool llvm::replaceDbgDeclare(Value *Address, Value *NewAddress,      auto *DIExpr = DII->getExpression();      assert(DIVar && "Missing variable");      DIExpr = DIExpression::prepend(DIExpr, DerefBefore, Offset, DerefAfter); -    // Insert llvm.dbg.declare immediately after InsertBefore, and remove old +    // Insert llvm.dbg.declare immediately before InsertBefore, and remove old      // llvm.dbg.declare.      Builder.insertDeclare(NewAddress, DIVar, DIExpr, Loc, InsertBefore);      if (DII == InsertBefore) -      InsertBefore = &*std::next(InsertBefore->getIterator()); +      InsertBefore = InsertBefore->getNextNode();      DII->eraseFromParent();    }    return !DbgAddrs.empty(); @@ -1368,66 +1591,293 @@ void llvm::replaceDbgValueForAlloca(AllocaInst *AI, Value *NewAllocaAddress,        }  } -void llvm::salvageDebugInfo(Instruction &I) { -  SmallVector<DbgValueInst *, 1> DbgValues; +/// Wrap \p V in a ValueAsMetadata instance. +static MetadataAsValue *wrapValueInMetadata(LLVMContext &C, Value *V) { +  return MetadataAsValue::get(C, ValueAsMetadata::get(V)); +} + +bool llvm::salvageDebugInfo(Instruction &I) { +  SmallVector<DbgInfoIntrinsic *, 1> DbgUsers; +  findDbgUsers(DbgUsers, &I); +  if (DbgUsers.empty()) +    return false; +    auto &M = *I.getModule(); +  auto &DL = M.getDataLayout(); +  auto &Ctx = I.getContext(); +  auto wrapMD = [&](Value *V) { return wrapValueInMetadata(Ctx, V); }; -  auto wrapMD = [&](Value *V) { -    return MetadataAsValue::get(I.getContext(), ValueAsMetadata::get(V)); +  auto doSalvage = [&](DbgInfoIntrinsic *DII, SmallVectorImpl<uint64_t> &Ops) { +    auto *DIExpr = DII->getExpression(); +    if (!Ops.empty()) { +      // Do not add DW_OP_stack_value for DbgDeclare and DbgAddr, because they +      // are implicitly pointing out the value as a DWARF memory location +      // description. +      bool WithStackValue = isa<DbgValueInst>(DII); +      DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue); +    } +    DII->setOperand(0, wrapMD(I.getOperand(0))); +    DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr)); +    LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');    }; -  auto applyOffset = [&](DbgValueInst *DVI, uint64_t Offset) { -    auto *DIExpr = DVI->getExpression(); -    DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset, -                                   DIExpression::NoDeref, -                                   DIExpression::WithStackValue); -    DVI->setOperand(0, wrapMD(I.getOperand(0))); -    DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); -    DEBUG(dbgs() << "SALVAGE: " << *DVI << '\n'); +  auto applyOffset = [&](DbgInfoIntrinsic *DII, uint64_t Offset) { +    SmallVector<uint64_t, 8> Ops; +    DIExpression::appendOffset(Ops, Offset); +    doSalvage(DII, Ops);    }; -  if (isa<BitCastInst>(&I) || isa<IntToPtrInst>(&I)) { -    // Bitcasts are entirely irrelevant for debug info. Rewrite dbg.value, -    // dbg.addr, and dbg.declare to use the cast's source. -    SmallVector<DbgInfoIntrinsic *, 1> DbgUsers; -    findDbgUsers(DbgUsers, &I); +  auto applyOps = [&](DbgInfoIntrinsic *DII, +                      std::initializer_list<uint64_t> Opcodes) { +    SmallVector<uint64_t, 8> Ops(Opcodes); +    doSalvage(DII, Ops); +  }; + +  if (auto *CI = dyn_cast<CastInst>(&I)) { +    if (!CI->isNoopCast(DL)) +      return false; + +    // No-op casts are irrelevant for debug info. +    MetadataAsValue *CastSrc = wrapMD(I.getOperand(0));      for (auto *DII : DbgUsers) { -      DII->setOperand(0, wrapMD(I.getOperand(0))); -      DEBUG(dbgs() << "SALVAGE: " << *DII << '\n'); +      DII->setOperand(0, CastSrc); +      LLVM_DEBUG(dbgs() << "SALVAGE: " << *DII << '\n');      } +    return true;    } else if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) { -    findDbgValues(DbgValues, &I); -    for (auto *DVI : DbgValues) { -      unsigned BitWidth = -          M.getDataLayout().getPointerSizeInBits(GEP->getPointerAddressSpace()); -      APInt Offset(BitWidth, 0); -      // Rewrite a constant GEP into a DIExpression.  Since we are performing -      // arithmetic to compute the variable's *value* in the DIExpression, we -      // need to mark the expression with a DW_OP_stack_value. -      if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) -        // GEP offsets are i32 and thus always fit into an int64_t. -        applyOffset(DVI, Offset.getSExtValue()); -    } +    unsigned BitWidth = +        M.getDataLayout().getIndexSizeInBits(GEP->getPointerAddressSpace()); +    // Rewrite a constant GEP into a DIExpression.  Since we are performing +    // arithmetic to compute the variable's *value* in the DIExpression, we +    // need to mark the expression with a DW_OP_stack_value. +    APInt Offset(BitWidth, 0); +    if (GEP->accumulateConstantOffset(M.getDataLayout(), Offset)) +      for (auto *DII : DbgUsers) +        applyOffset(DII, Offset.getSExtValue()); +    return true;    } else if (auto *BI = dyn_cast<BinaryOperator>(&I)) { -    if (BI->getOpcode() == Instruction::Add) -      if (auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1))) -        if (ConstInt->getBitWidth() <= 64) { -          APInt Offset = ConstInt->getValue(); -          findDbgValues(DbgValues, &I); -          for (auto *DVI : DbgValues) -            applyOffset(DVI, Offset.getSExtValue()); -        } +    // Rewrite binary operations with constant integer operands. +    auto *ConstInt = dyn_cast<ConstantInt>(I.getOperand(1)); +    if (!ConstInt || ConstInt->getBitWidth() > 64) +      return false; + +    uint64_t Val = ConstInt->getSExtValue(); +    for (auto *DII : DbgUsers) { +      switch (BI->getOpcode()) { +      case Instruction::Add: +        applyOffset(DII, Val); +        break; +      case Instruction::Sub: +        applyOffset(DII, -int64_t(Val)); +        break; +      case Instruction::Mul: +        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mul}); +        break; +      case Instruction::SDiv: +        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_div}); +        break; +      case Instruction::SRem: +        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_mod}); +        break; +      case Instruction::Or: +        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_or}); +        break; +      case Instruction::And: +        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_and}); +        break; +      case Instruction::Xor: +        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_xor}); +        break; +      case Instruction::Shl: +        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shl}); +        break; +      case Instruction::LShr: +        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shr}); +        break; +      case Instruction::AShr: +        applyOps(DII, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_shra}); +        break; +      default: +        // TODO: Salvage constants from each kind of binop we know about. +        return false; +      } +    } +    return true;    } else if (isa<LoadInst>(&I)) { -    findDbgValues(DbgValues, &I); -    for (auto *DVI : DbgValues) { +    MetadataAsValue *AddrMD = wrapMD(I.getOperand(0)); +    for (auto *DII : DbgUsers) {        // Rewrite the load into DW_OP_deref. -      auto *DIExpr = DVI->getExpression(); +      auto *DIExpr = DII->getExpression();        DIExpr = DIExpression::prepend(DIExpr, DIExpression::WithDeref); -      DVI->setOperand(0, wrapMD(I.getOperand(0))); -      DVI->setOperand(2, MetadataAsValue::get(I.getContext(), DIExpr)); -      DEBUG(dbgs() << "SALVAGE:  " << *DVI << '\n'); +      DII->setOperand(0, AddrMD); +      DII->setOperand(2, MetadataAsValue::get(Ctx, DIExpr)); +      LLVM_DEBUG(dbgs() << "SALVAGE:  " << *DII << '\n'); +    } +    return true; +  } +  return false; +} + +/// A replacement for a dbg.value expression. +using DbgValReplacement = Optional<DIExpression *>; + +/// Point debug users of \p From to \p To using exprs given by \p RewriteExpr, +/// possibly moving/deleting users to prevent use-before-def. Returns true if +/// changes are made. +static bool rewriteDebugUsers( +    Instruction &From, Value &To, Instruction &DomPoint, DominatorTree &DT, +    function_ref<DbgValReplacement(DbgInfoIntrinsic &DII)> RewriteExpr) { +  // Find debug users of From. +  SmallVector<DbgInfoIntrinsic *, 1> Users; +  findDbgUsers(Users, &From); +  if (Users.empty()) +    return false; + +  // Prevent use-before-def of To. +  bool Changed = false; +  SmallPtrSet<DbgInfoIntrinsic *, 1> DeleteOrSalvage; +  if (isa<Instruction>(&To)) { +    bool DomPointAfterFrom = From.getNextNonDebugInstruction() == &DomPoint; + +    for (auto *DII : Users) { +      // It's common to see a debug user between From and DomPoint. Move it +      // after DomPoint to preserve the variable update without any reordering. +      if (DomPointAfterFrom && DII->getNextNonDebugInstruction() == &DomPoint) { +        LLVM_DEBUG(dbgs() << "MOVE:  " << *DII << '\n'); +        DII->moveAfter(&DomPoint); +        Changed = true; + +      // Users which otherwise aren't dominated by the replacement value must +      // be salvaged or deleted. +      } else if (!DT.dominates(&DomPoint, DII)) { +        DeleteOrSalvage.insert(DII); +      }      }    } + +  // Update debug users without use-before-def risk. +  for (auto *DII : Users) { +    if (DeleteOrSalvage.count(DII)) +      continue; + +    LLVMContext &Ctx = DII->getContext(); +    DbgValReplacement DVR = RewriteExpr(*DII); +    if (!DVR) +      continue; + +    DII->setOperand(0, wrapValueInMetadata(Ctx, &To)); +    DII->setOperand(2, MetadataAsValue::get(Ctx, *DVR)); +    LLVM_DEBUG(dbgs() << "REWRITE:  " << *DII << '\n'); +    Changed = true; +  } + +  if (!DeleteOrSalvage.empty()) { +    // Try to salvage the remaining debug users. +    Changed |= salvageDebugInfo(From); + +    // Delete the debug users which weren't salvaged. +    for (auto *DII : DeleteOrSalvage) { +      if (DII->getVariableLocation() == &From) { +        LLVM_DEBUG(dbgs() << "Erased UseBeforeDef:  " << *DII << '\n'); +        DII->eraseFromParent(); +        Changed = true; +      } +    } +  } + +  return Changed; +} + +/// Check if a bitcast between a value of type \p FromTy to type \p ToTy would +/// losslessly preserve the bits and semantics of the value. This predicate is +/// symmetric, i.e swapping \p FromTy and \p ToTy should give the same result. +/// +/// Note that Type::canLosslesslyBitCastTo is not suitable here because it +/// allows semantically unequivalent bitcasts, such as <2 x i64> -> <4 x i32>, +/// and also does not allow lossless pointer <-> integer conversions. +static bool isBitCastSemanticsPreserving(const DataLayout &DL, Type *FromTy, +                                         Type *ToTy) { +  // Trivially compatible types. +  if (FromTy == ToTy) +    return true; + +  // Handle compatible pointer <-> integer conversions. +  if (FromTy->isIntOrPtrTy() && ToTy->isIntOrPtrTy()) { +    bool SameSize = DL.getTypeSizeInBits(FromTy) == DL.getTypeSizeInBits(ToTy); +    bool LosslessConversion = !DL.isNonIntegralPointerType(FromTy) && +                              !DL.isNonIntegralPointerType(ToTy); +    return SameSize && LosslessConversion; +  } + +  // TODO: This is not exhaustive. +  return false; +} + +bool llvm::replaceAllDbgUsesWith(Instruction &From, Value &To, +                                 Instruction &DomPoint, DominatorTree &DT) { +  // Exit early if From has no debug users. +  if (!From.isUsedByMetadata()) +    return false; + +  assert(&From != &To && "Can't replace something with itself"); + +  Type *FromTy = From.getType(); +  Type *ToTy = To.getType(); + +  auto Identity = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement { +    return DII.getExpression(); +  }; + +  // Handle no-op conversions. +  Module &M = *From.getModule(); +  const DataLayout &DL = M.getDataLayout(); +  if (isBitCastSemanticsPreserving(DL, FromTy, ToTy)) +    return rewriteDebugUsers(From, To, DomPoint, DT, Identity); + +  // Handle integer-to-integer widening and narrowing. +  // FIXME: Use DW_OP_convert when it's available everywhere. +  if (FromTy->isIntegerTy() && ToTy->isIntegerTy()) { +    uint64_t FromBits = FromTy->getPrimitiveSizeInBits(); +    uint64_t ToBits = ToTy->getPrimitiveSizeInBits(); +    assert(FromBits != ToBits && "Unexpected no-op conversion"); + +    // When the width of the result grows, assume that a debugger will only +    // access the low `FromBits` bits when inspecting the source variable. +    if (FromBits < ToBits) +      return rewriteDebugUsers(From, To, DomPoint, DT, Identity); + +    // The width of the result has shrunk. Use sign/zero extension to describe +    // the source variable's high bits. +    auto SignOrZeroExt = [&](DbgInfoIntrinsic &DII) -> DbgValReplacement { +      DILocalVariable *Var = DII.getVariable(); + +      // Without knowing signedness, sign/zero extension isn't possible. +      auto Signedness = Var->getSignedness(); +      if (!Signedness) +        return None; + +      bool Signed = *Signedness == DIBasicType::Signedness::Signed; + +      if (!Signed) { +        // In the unsigned case, assume that a debugger will initialize the +        // high bits to 0 and do a no-op conversion. +        return Identity(DII); +      } else { +        // In the signed case, the high bits are given by sign extension, i.e: +        //   (To >> (ToBits - 1)) * ((2 ^ FromBits) - 1) +        // Calculate the high bits and OR them together with the low bits. +        SmallVector<uint64_t, 8> Ops({dwarf::DW_OP_dup, dwarf::DW_OP_constu, +                                      (ToBits - 1), dwarf::DW_OP_shr, +                                      dwarf::DW_OP_lit0, dwarf::DW_OP_not, +                                      dwarf::DW_OP_mul, dwarf::DW_OP_or}); +        return DIExpression::appendToStack(DII.getExpression(), Ops); +      } +    }; +    return rewriteDebugUsers(From, To, DomPoint, DT, SignOrZeroExt); +  } + +  // TODO: Floating-point conversions, vectors. +  return false;  }  unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) { @@ -1452,13 +1902,19 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {  }  unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap, -                                   bool PreserveLCSSA) { +                                   bool PreserveLCSSA, DeferredDominance *DDT) {    BasicBlock *BB = I->getParent(); +  std::vector <DominatorTree::UpdateType> Updates; +    // Loop over all of the successors, removing BB's entry from any PHI    // nodes. -  for (BasicBlock *Successor : successors(BB)) +  if (DDT) +    Updates.reserve(BB->getTerminator()->getNumSuccessors()); +  for (BasicBlock *Successor : successors(BB)) {      Successor->removePredecessor(BB, PreserveLCSSA); - +    if (DDT) +      Updates.push_back({DominatorTree::Delete, BB, Successor}); +  }    // Insert a call to llvm.trap right before this.  This turns the undefined    // behavior into a hard fail instead of falling through into random code.    if (UseLLVMTrap) { @@ -1478,11 +1934,13 @@ unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,      BB->getInstList().erase(BBI++);      ++NumInstrsRemoved;    } +  if (DDT) +    DDT->applyUpdates(Updates);    return NumInstrsRemoved;  }  /// changeToCall - Convert the specified invoke into a normal call. -static void changeToCall(InvokeInst *II) { +static void changeToCall(InvokeInst *II, DeferredDominance *DDT = nullptr) {    SmallVector<Value*, 8> Args(II->arg_begin(), II->arg_end());    SmallVector<OperandBundleDef, 1> OpBundles;    II->getOperandBundlesAsDefs(OpBundles); @@ -1495,11 +1953,16 @@ static void changeToCall(InvokeInst *II) {    II->replaceAllUsesWith(NewCall);    // Follow the call by a branch to the normal destination. -  BranchInst::Create(II->getNormalDest(), II); +  BasicBlock *NormalDestBB = II->getNormalDest(); +  BranchInst::Create(NormalDestBB, II);    // Update PHI nodes in the unwind destination -  II->getUnwindDest()->removePredecessor(II->getParent()); +  BasicBlock *BB = II->getParent(); +  BasicBlock *UnwindDestBB = II->getUnwindDest(); +  UnwindDestBB->removePredecessor(BB);    II->eraseFromParent(); +  if (DDT) +    DDT->deleteEdge(BB, UnwindDestBB);  }  BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI, @@ -1540,7 +2003,8 @@ BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,  }  static bool markAliveBlocks(Function &F, -                            SmallPtrSetImpl<BasicBlock*> &Reachable) { +                            SmallPtrSetImpl<BasicBlock*> &Reachable, +                            DeferredDominance *DDT = nullptr) {    SmallVector<BasicBlock*, 128> Worklist;    BasicBlock *BB = &F.front();    Worklist.push_back(BB); @@ -1553,41 +2017,44 @@ static bool markAliveBlocks(Function &F,      // instructions into LLVM unreachable insts.  The instruction combining pass      // canonicalizes unreachable insts into stores to null or undef.      for (Instruction &I : *BB) { -      // Assumptions that are known to be false are equivalent to unreachable. -      // Also, if the condition is undefined, then we make the choice most -      // beneficial to the optimizer, and choose that to also be unreachable. -      if (auto *II = dyn_cast<IntrinsicInst>(&I)) { -        if (II->getIntrinsicID() == Intrinsic::assume) { -          if (match(II->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { -            // Don't insert a call to llvm.trap right before the unreachable. -            changeToUnreachable(II, false); -            Changed = true; -            break; -          } -        } - -        if (II->getIntrinsicID() == Intrinsic::experimental_guard) { -          // A call to the guard intrinsic bails out of the current compilation -          // unit if the predicate passed to it is false.  If the predicate is a -          // constant false, then we know the guard will bail out of the current -          // compile unconditionally, so all code following it is dead. -          // -          // Note: unlike in llvm.assume, it is not "obviously profitable" for -          // guards to treat `undef` as `false` since a guard on `undef` can -          // still be useful for widening. -          if (match(II->getArgOperand(0), m_Zero())) -            if (!isa<UnreachableInst>(II->getNextNode())) { -              changeToUnreachable(II->getNextNode(), /*UseLLVMTrap=*/ false); +      if (auto *CI = dyn_cast<CallInst>(&I)) { +        Value *Callee = CI->getCalledValue(); +        // Handle intrinsic calls. +        if (Function *F = dyn_cast<Function>(Callee)) { +          auto IntrinsicID = F->getIntrinsicID(); +          // Assumptions that are known to be false are equivalent to +          // unreachable. Also, if the condition is undefined, then we make the +          // choice most beneficial to the optimizer, and choose that to also be +          // unreachable. +          if (IntrinsicID == Intrinsic::assume) { +            if (match(CI->getArgOperand(0), m_CombineOr(m_Zero(), m_Undef()))) { +              // Don't insert a call to llvm.trap right before the unreachable. +              changeToUnreachable(CI, false, false, DDT);                Changed = true;                break;              } -        } -      } - -      if (auto *CI = dyn_cast<CallInst>(&I)) { -        Value *Callee = CI->getCalledValue(); -        if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { -          changeToUnreachable(CI, /*UseLLVMTrap=*/false); +          } else if (IntrinsicID == Intrinsic::experimental_guard) { +            // A call to the guard intrinsic bails out of the current +            // compilation unit if the predicate passed to it is false. If the +            // predicate is a constant false, then we know the guard will bail +            // out of the current compile unconditionally, so all code following +            // it is dead. +            // +            // Note: unlike in llvm.assume, it is not "obviously profitable" for +            // guards to treat `undef` as `false` since a guard on `undef` can +            // still be useful for widening. +            if (match(CI->getArgOperand(0), m_Zero())) +              if (!isa<UnreachableInst>(CI->getNextNode())) { +                changeToUnreachable(CI->getNextNode(), /*UseLLVMTrap=*/false, +                                    false, DDT); +                Changed = true; +                break; +              } +          } +        } else if ((isa<ConstantPointerNull>(Callee) && +                    !NullPointerIsDefined(CI->getFunction())) || +                   isa<UndefValue>(Callee)) { +          changeToUnreachable(CI, /*UseLLVMTrap=*/false, false, DDT);            Changed = true;            break;          } @@ -1597,17 +2064,16 @@ static bool markAliveBlocks(Function &F,            // though.            if (!isa<UnreachableInst>(CI->getNextNode())) {              // Don't insert a call to llvm.trap right before the unreachable. -            changeToUnreachable(CI->getNextNode(), false); +            changeToUnreachable(CI->getNextNode(), false, false, DDT);              Changed = true;            }            break;          } -      } +      } else if (auto *SI = dyn_cast<StoreInst>(&I)) { +        // Store to undef and store to null are undefined and used to signal +        // that they should be changed to unreachable by passes that can't +        // modify the CFG. -      // Store to undef and store to null are undefined and used to signal that -      // they should be changed to unreachable by passes that can't modify the -      // CFG. -      if (auto *SI = dyn_cast<StoreInst>(&I)) {          // Don't touch volatile stores.          if (SI->isVolatile()) continue; @@ -1615,8 +2081,9 @@ static bool markAliveBlocks(Function &F,          if (isa<UndefValue>(Ptr) ||              (isa<ConstantPointerNull>(Ptr) && -             SI->getPointerAddressSpace() == 0)) { -          changeToUnreachable(SI, true); +             !NullPointerIsDefined(SI->getFunction(), +                                   SI->getPointerAddressSpace()))) { +          changeToUnreachable(SI, true, false, DDT);            Changed = true;            break;          } @@ -1627,17 +2094,23 @@ static bool markAliveBlocks(Function &F,      if (auto *II = dyn_cast<InvokeInst>(Terminator)) {        // Turn invokes that call 'nounwind' functions into ordinary calls.        Value *Callee = II->getCalledValue(); -      if (isa<ConstantPointerNull>(Callee) || isa<UndefValue>(Callee)) { -        changeToUnreachable(II, true); +      if ((isa<ConstantPointerNull>(Callee) && +           !NullPointerIsDefined(BB->getParent())) || +          isa<UndefValue>(Callee)) { +        changeToUnreachable(II, true, false, DDT);          Changed = true;        } else if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(&F)) {          if (II->use_empty() && II->onlyReadsMemory()) {            // jump to the normal destination branch. -          BranchInst::Create(II->getNormalDest(), II); -          II->getUnwindDest()->removePredecessor(II->getParent()); +          BasicBlock *NormalDestBB = II->getNormalDest(); +          BasicBlock *UnwindDestBB = II->getUnwindDest(); +          BranchInst::Create(NormalDestBB, II); +          UnwindDestBB->removePredecessor(II->getParent());            II->eraseFromParent(); +          if (DDT) +            DDT->deleteEdge(BB, UnwindDestBB);          } else -          changeToCall(II); +          changeToCall(II, DDT);          Changed = true;        }      } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Terminator)) { @@ -1683,7 +2156,7 @@ static bool markAliveBlocks(Function &F,        }      } -    Changed |= ConstantFoldTerminator(BB, true); +    Changed |= ConstantFoldTerminator(BB, true, nullptr, DDT);      for (BasicBlock *Successor : successors(BB))        if (Reachable.insert(Successor).second)          Worklist.push_back(Successor); @@ -1691,11 +2164,11 @@ static bool markAliveBlocks(Function &F,    return Changed;  } -void llvm::removeUnwindEdge(BasicBlock *BB) { +void llvm::removeUnwindEdge(BasicBlock *BB, DeferredDominance *DDT) {    TerminatorInst *TI = BB->getTerminator();    if (auto *II = dyn_cast<InvokeInst>(TI)) { -    changeToCall(II); +    changeToCall(II, DDT);      return;    } @@ -1723,15 +2196,18 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {    UnwindDest->removePredecessor(BB);    TI->replaceAllUsesWith(NewTI);    TI->eraseFromParent(); +  if (DDT) +    DDT->deleteEdge(BB, UnwindDest);  }  /// removeUnreachableBlocks - Remove blocks that are not reachable, even  /// if they are in a dead cycle.  Return true if a change was made, false  /// otherwise. If `LVI` is passed, this function preserves LazyValueInfo  /// after modifying the CFG. -bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) { +bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI, +                                   DeferredDominance *DDT) {    SmallPtrSet<BasicBlock*, 16> Reachable; -  bool Changed = markAliveBlocks(F, Reachable); +  bool Changed = markAliveBlocks(F, Reachable, DDT);    // If there are unreachable blocks in the CFG...    if (Reachable.size() == F.size()) @@ -1741,25 +2217,39 @@ bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {    NumRemoved += F.size()-Reachable.size();    // Loop over all of the basic blocks that are not reachable, dropping all of -  // their internal references... -  for (Function::iterator BB = ++F.begin(), E = F.end(); BB != E; ++BB) { -    if (Reachable.count(&*BB)) +  // their internal references. Update DDT and LVI if available. +  std::vector <DominatorTree::UpdateType> Updates; +  for (Function::iterator I = ++F.begin(), E = F.end(); I != E; ++I) { +    auto *BB = &*I; +    if (Reachable.count(BB))        continue; - -    for (BasicBlock *Successor : successors(&*BB)) +    for (BasicBlock *Successor : successors(BB)) {        if (Reachable.count(Successor)) -        Successor->removePredecessor(&*BB); +        Successor->removePredecessor(BB); +      if (DDT) +        Updates.push_back({DominatorTree::Delete, BB, Successor}); +    }      if (LVI) -      LVI->eraseBlock(&*BB); +      LVI->eraseBlock(BB);      BB->dropAllReferences();    } -  for (Function::iterator I = ++F.begin(); I != F.end();) -    if (!Reachable.count(&*I)) -      I = F.getBasicBlockList().erase(I); -    else +  for (Function::iterator I = ++F.begin(); I != F.end();) { +    auto *BB = &*I; +    if (Reachable.count(BB)) {        ++I; +      continue; +    } +    if (DDT) { +      DDT->deleteBB(BB); // deferred deletion of BB. +      ++I; +    } else { +      I = F.getBasicBlockList().erase(I); +    } +  } +  if (DDT) +    DDT->applyUpdates(Updates);    return true;  } @@ -1852,8 +2342,8 @@ static unsigned replaceDominatedUsesWith(Value *From, Value *To,      if (!Dominates(Root, U))        continue;      U.set(To); -    DEBUG(dbgs() << "Replace dominated use of '" << From->getName() << "' as " -                 << *To << " in " << *U << "\n"); +    LLVM_DEBUG(dbgs() << "Replace dominated use of '" << From->getName() +                      << "' as " << *To << " in " << *U << "\n");      ++Count;    }    return Count; @@ -1957,7 +2447,7 @@ void llvm::copyRangeMetadata(const DataLayout &DL, const LoadInst &OldLI,    if (!NewTy->isPointerTy())      return; -  unsigned BitWidth = DL.getTypeSizeInBits(NewTy); +  unsigned BitWidth = DL.getIndexTypeSizeInBits(NewTy);    if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {      MDNode *NN = MDNode::get(OldLI.getContext(), None);      NewLI.setMetadata(LLVMContext::MD_nonnull, NN); @@ -2269,7 +2759,7 @@ bool llvm::canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx) {      // Static allocas (constant size in the entry block) are handled by      // prologue/epilogue insertion so they're free anyway. We definitely don't      // want to make them non-constant. -    return !dyn_cast<AllocaInst>(I)->isStaticAlloca(); +    return !cast<AllocaInst>(I)->isStaticAlloca();    case Instruction::GetElementPtr:      if (OpIdx == 0)        return true; diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp new file mode 100644 index 000000000000..6e92e679f999 --- /dev/null +++ b/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -0,0 +1,645 @@ +//===----------------- LoopRotationUtils.cpp -----------------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file provides utilities to convert a loop into a loop with bottom test. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopRotationUtils.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SSAUpdater.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-rotate" + +STATISTIC(NumRotated, "Number of loops rotated"); + +namespace { +/// A simple loop rotation transformation. +class LoopRotate { +  const unsigned MaxHeaderSize; +  LoopInfo *LI; +  const TargetTransformInfo *TTI; +  AssumptionCache *AC; +  DominatorTree *DT; +  ScalarEvolution *SE; +  const SimplifyQuery &SQ; +  bool RotationOnly; +  bool IsUtilMode; + +public: +  LoopRotate(unsigned MaxHeaderSize, LoopInfo *LI, +             const TargetTransformInfo *TTI, AssumptionCache *AC, +             DominatorTree *DT, ScalarEvolution *SE, const SimplifyQuery &SQ, +             bool RotationOnly, bool IsUtilMode) +      : MaxHeaderSize(MaxHeaderSize), LI(LI), TTI(TTI), AC(AC), DT(DT), SE(SE), +        SQ(SQ), RotationOnly(RotationOnly), IsUtilMode(IsUtilMode) {} +  bool processLoop(Loop *L); + +private: +  bool rotateLoop(Loop *L, bool SimplifiedLatch); +  bool simplifyLoopLatch(Loop *L); +}; +} // end anonymous namespace + +/// RewriteUsesOfClonedInstructions - We just cloned the instructions from the +/// old header into the preheader.  If there were uses of the values produced by +/// these instruction that were outside of the loop, we have to insert PHI nodes +/// to merge the two values.  Do this now. +static void RewriteUsesOfClonedInstructions(BasicBlock *OrigHeader, +                                            BasicBlock *OrigPreheader, +                                            ValueToValueMapTy &ValueMap, +                                SmallVectorImpl<PHINode*> *InsertedPHIs) { +  // Remove PHI node entries that are no longer live. +  BasicBlock::iterator I, E = OrigHeader->end(); +  for (I = OrigHeader->begin(); PHINode *PN = dyn_cast<PHINode>(I); ++I) +    PN->removeIncomingValue(PN->getBasicBlockIndex(OrigPreheader)); + +  // Now fix up users of the instructions in OrigHeader, inserting PHI nodes +  // as necessary. +  SSAUpdater SSA(InsertedPHIs); +  for (I = OrigHeader->begin(); I != E; ++I) { +    Value *OrigHeaderVal = &*I; + +    // If there are no uses of the value (e.g. because it returns void), there +    // is nothing to rewrite. +    if (OrigHeaderVal->use_empty()) +      continue; + +    Value *OrigPreHeaderVal = ValueMap.lookup(OrigHeaderVal); + +    // The value now exits in two versions: the initial value in the preheader +    // and the loop "next" value in the original header. +    SSA.Initialize(OrigHeaderVal->getType(), OrigHeaderVal->getName()); +    SSA.AddAvailableValue(OrigHeader, OrigHeaderVal); +    SSA.AddAvailableValue(OrigPreheader, OrigPreHeaderVal); + +    // Visit each use of the OrigHeader instruction. +    for (Value::use_iterator UI = OrigHeaderVal->use_begin(), +                             UE = OrigHeaderVal->use_end(); +         UI != UE;) { +      // Grab the use before incrementing the iterator. +      Use &U = *UI; + +      // Increment the iterator before removing the use from the list. +      ++UI; + +      // SSAUpdater can't handle a non-PHI use in the same block as an +      // earlier def. We can easily handle those cases manually. +      Instruction *UserInst = cast<Instruction>(U.getUser()); +      if (!isa<PHINode>(UserInst)) { +        BasicBlock *UserBB = UserInst->getParent(); + +        // The original users in the OrigHeader are already using the +        // original definitions. +        if (UserBB == OrigHeader) +          continue; + +        // Users in the OrigPreHeader need to use the value to which the +        // original definitions are mapped. +        if (UserBB == OrigPreheader) { +          U = OrigPreHeaderVal; +          continue; +        } +      } + +      // Anything else can be handled by SSAUpdater. +      SSA.RewriteUse(U); +    } + +    // Replace MetadataAsValue(ValueAsMetadata(OrigHeaderVal)) uses in debug +    // intrinsics. +    SmallVector<DbgValueInst *, 1> DbgValues; +    llvm::findDbgValues(DbgValues, OrigHeaderVal); +    for (auto &DbgValue : DbgValues) { +      // The original users in the OrigHeader are already using the original +      // definitions. +      BasicBlock *UserBB = DbgValue->getParent(); +      if (UserBB == OrigHeader) +        continue; + +      // Users in the OrigPreHeader need to use the value to which the +      // original definitions are mapped and anything else can be handled by +      // the SSAUpdater. To avoid adding PHINodes, check if the value is +      // available in UserBB, if not substitute undef. +      Value *NewVal; +      if (UserBB == OrigPreheader) +        NewVal = OrigPreHeaderVal; +      else if (SSA.HasValueForBlock(UserBB)) +        NewVal = SSA.GetValueInMiddleOfBlock(UserBB); +      else +        NewVal = UndefValue::get(OrigHeaderVal->getType()); +      DbgValue->setOperand(0, +                           MetadataAsValue::get(OrigHeaderVal->getContext(), +                                                ValueAsMetadata::get(NewVal))); +    } +  } +} + +// Look for a phi which is only used outside the loop (via a LCSSA phi) +// in the exit from the header. This means that rotating the loop can +// remove the phi. +static bool shouldRotateLoopExitingLatch(Loop *L) { +  BasicBlock *Header = L->getHeader(); +  BasicBlock *HeaderExit = Header->getTerminator()->getSuccessor(0); +  if (L->contains(HeaderExit)) +    HeaderExit = Header->getTerminator()->getSuccessor(1); + +  for (auto &Phi : Header->phis()) { +    // Look for uses of this phi in the loop/via exits other than the header. +    if (llvm::any_of(Phi.users(), [HeaderExit](const User *U) { +          return cast<Instruction>(U)->getParent() != HeaderExit; +        })) +      continue; +    return true; +  } + +  return false; +} + +/// Rotate loop LP. Return true if the loop is rotated. +/// +/// \param SimplifiedLatch is true if the latch was just folded into the final +/// loop exit. In this case we may want to rotate even though the new latch is +/// now an exiting branch. This rotation would have happened had the latch not +/// been simplified. However, if SimplifiedLatch is false, then we avoid +/// rotating loops in which the latch exits to avoid excessive or endless +/// rotation. LoopRotate should be repeatable and converge to a canonical +/// form. This property is satisfied because simplifying the loop latch can only +/// happen once across multiple invocations of the LoopRotate pass. +bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) { +  // If the loop has only one block then there is not much to rotate. +  if (L->getBlocks().size() == 1) +    return false; + +  BasicBlock *OrigHeader = L->getHeader(); +  BasicBlock *OrigLatch = L->getLoopLatch(); + +  BranchInst *BI = dyn_cast<BranchInst>(OrigHeader->getTerminator()); +  if (!BI || BI->isUnconditional()) +    return false; + +  // If the loop header is not one of the loop exiting blocks then +  // either this loop is already rotated or it is not +  // suitable for loop rotation transformations. +  if (!L->isLoopExiting(OrigHeader)) +    return false; + +  // If the loop latch already contains a branch that leaves the loop then the +  // loop is already rotated. +  if (!OrigLatch) +    return false; + +  // Rotate if either the loop latch does *not* exit the loop, or if the loop +  // latch was just simplified. Or if we think it will be profitable. +  if (L->isLoopExiting(OrigLatch) && !SimplifiedLatch && IsUtilMode == false && +      !shouldRotateLoopExitingLatch(L)) +    return false; + +  // Check size of original header and reject loop if it is very big or we can't +  // duplicate blocks inside it. +  { +    SmallPtrSet<const Value *, 32> EphValues; +    CodeMetrics::collectEphemeralValues(L, AC, EphValues); + +    CodeMetrics Metrics; +    Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues); +    if (Metrics.notDuplicatable) { +      LLVM_DEBUG( +          dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable" +                 << " instructions: "; +          L->dump()); +      return false; +    } +    if (Metrics.convergent) { +      LLVM_DEBUG(dbgs() << "LoopRotation: NOT rotating - contains convergent " +                           "instructions: "; +                 L->dump()); +      return false; +    } +    if (Metrics.NumInsts > MaxHeaderSize) +      return false; +  } + +  // Now, this loop is suitable for rotation. +  BasicBlock *OrigPreheader = L->getLoopPreheader(); + +  // If the loop could not be converted to canonical form, it must have an +  // indirectbr in it, just give up. +  if (!OrigPreheader || !L->hasDedicatedExits()) +    return false; + +  // Anything ScalarEvolution may know about this loop or the PHI nodes +  // in its header will soon be invalidated. We should also invalidate +  // all outer loops because insertion and deletion of blocks that happens +  // during the rotation may violate invariants related to backedge taken +  // infos in them. +  if (SE) +    SE->forgetTopmostLoop(L); + +  LLVM_DEBUG(dbgs() << "LoopRotation: rotating "; L->dump()); + +  // Find new Loop header. NewHeader is a Header's one and only successor +  // that is inside loop.  Header's other successor is outside the +  // loop.  Otherwise loop is not suitable for rotation. +  BasicBlock *Exit = BI->getSuccessor(0); +  BasicBlock *NewHeader = BI->getSuccessor(1); +  if (L->contains(Exit)) +    std::swap(Exit, NewHeader); +  assert(NewHeader && "Unable to determine new loop header"); +  assert(L->contains(NewHeader) && !L->contains(Exit) && +         "Unable to determine loop header and exit blocks"); + +  // This code assumes that the new header has exactly one predecessor. +  // Remove any single-entry PHI nodes in it. +  assert(NewHeader->getSinglePredecessor() && +         "New header doesn't have one pred!"); +  FoldSingleEntryPHINodes(NewHeader); + +  // Begin by walking OrigHeader and populating ValueMap with an entry for +  // each Instruction. +  BasicBlock::iterator I = OrigHeader->begin(), E = OrigHeader->end(); +  ValueToValueMapTy ValueMap; + +  // For PHI nodes, the value available in OldPreHeader is just the +  // incoming value from OldPreHeader. +  for (; PHINode *PN = dyn_cast<PHINode>(I); ++I) +    ValueMap[PN] = PN->getIncomingValueForBlock(OrigPreheader); + +  // For the rest of the instructions, either hoist to the OrigPreheader if +  // possible or create a clone in the OldPreHeader if not. +  TerminatorInst *LoopEntryBranch = OrigPreheader->getTerminator(); + +  // Record all debug intrinsics preceding LoopEntryBranch to avoid duplication. +  using DbgIntrinsicHash = +      std::pair<std::pair<Value *, DILocalVariable *>, DIExpression *>; +  auto makeHash = [](DbgInfoIntrinsic *D) -> DbgIntrinsicHash { +    return {{D->getVariableLocation(), D->getVariable()}, D->getExpression()}; +  }; +  SmallDenseSet<DbgIntrinsicHash, 8> DbgIntrinsics; +  for (auto I = std::next(OrigPreheader->rbegin()), E = OrigPreheader->rend(); +       I != E; ++I) { +    if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&*I)) +      DbgIntrinsics.insert(makeHash(DII)); +    else +      break; +  } + +  while (I != E) { +    Instruction *Inst = &*I++; + +    // If the instruction's operands are invariant and it doesn't read or write +    // memory, then it is safe to hoist.  Doing this doesn't change the order of +    // execution in the preheader, but does prevent the instruction from +    // executing in each iteration of the loop.  This means it is safe to hoist +    // something that might trap, but isn't safe to hoist something that reads +    // memory (without proving that the loop doesn't write). +    if (L->hasLoopInvariantOperands(Inst) && !Inst->mayReadFromMemory() && +        !Inst->mayWriteToMemory() && !isa<TerminatorInst>(Inst) && +        !isa<DbgInfoIntrinsic>(Inst) && !isa<AllocaInst>(Inst)) { +      Inst->moveBefore(LoopEntryBranch); +      continue; +    } + +    // Otherwise, create a duplicate of the instruction. +    Instruction *C = Inst->clone(); + +    // Eagerly remap the operands of the instruction. +    RemapInstruction(C, ValueMap, +                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); + +    // Avoid inserting the same intrinsic twice. +    if (auto *DII = dyn_cast<DbgInfoIntrinsic>(C)) +      if (DbgIntrinsics.count(makeHash(DII))) { +        C->deleteValue(); +        continue; +      } + +    // With the operands remapped, see if the instruction constant folds or is +    // otherwise simplifyable.  This commonly occurs because the entry from PHI +    // nodes allows icmps and other instructions to fold. +    Value *V = SimplifyInstruction(C, SQ); +    if (V && LI->replacementPreservesLCSSAForm(C, V)) { +      // If so, then delete the temporary instruction and stick the folded value +      // in the map. +      ValueMap[Inst] = V; +      if (!C->mayHaveSideEffects()) { +        C->deleteValue(); +        C = nullptr; +      } +    } else { +      ValueMap[Inst] = C; +    } +    if (C) { +      // Otherwise, stick the new instruction into the new block! +      C->setName(Inst->getName()); +      C->insertBefore(LoopEntryBranch); + +      if (auto *II = dyn_cast<IntrinsicInst>(C)) +        if (II->getIntrinsicID() == Intrinsic::assume) +          AC->registerAssumption(II); +    } +  } + +  // Along with all the other instructions, we just cloned OrigHeader's +  // terminator into OrigPreHeader. Fix up the PHI nodes in each of OrigHeader's +  // successors by duplicating their incoming values for OrigHeader. +  TerminatorInst *TI = OrigHeader->getTerminator(); +  for (BasicBlock *SuccBB : TI->successors()) +    for (BasicBlock::iterator BI = SuccBB->begin(); +         PHINode *PN = dyn_cast<PHINode>(BI); ++BI) +      PN->addIncoming(PN->getIncomingValueForBlock(OrigHeader), OrigPreheader); + +  // Now that OrigPreHeader has a clone of OrigHeader's terminator, remove +  // OrigPreHeader's old terminator (the original branch into the loop), and +  // remove the corresponding incoming values from the PHI nodes in OrigHeader. +  LoopEntryBranch->eraseFromParent(); + + +  SmallVector<PHINode*, 2> InsertedPHIs; +  // If there were any uses of instructions in the duplicated block outside the +  // loop, update them, inserting PHI nodes as required +  RewriteUsesOfClonedInstructions(OrigHeader, OrigPreheader, ValueMap, +                                  &InsertedPHIs); + +  // Attach dbg.value intrinsics to the new phis if that phi uses a value that +  // previously had debug metadata attached. This keeps the debug info +  // up-to-date in the loop body. +  if (!InsertedPHIs.empty()) +    insertDebugValuesForPHIs(OrigHeader, InsertedPHIs); + +  // NewHeader is now the header of the loop. +  L->moveToHeader(NewHeader); +  assert(L->getHeader() == NewHeader && "Latch block is our new header"); + +  // Inform DT about changes to the CFG. +  if (DT) { +    // The OrigPreheader branches to the NewHeader and Exit now. Then, inform +    // the DT about the removed edge to the OrigHeader (that got removed). +    SmallVector<DominatorTree::UpdateType, 3> Updates; +    Updates.push_back({DominatorTree::Insert, OrigPreheader, Exit}); +    Updates.push_back({DominatorTree::Insert, OrigPreheader, NewHeader}); +    Updates.push_back({DominatorTree::Delete, OrigPreheader, OrigHeader}); +    DT->applyUpdates(Updates); +  } + +  // At this point, we've finished our major CFG changes.  As part of cloning +  // the loop into the preheader we've simplified instructions and the +  // duplicated conditional branch may now be branching on a constant.  If it is +  // branching on a constant and if that constant means that we enter the loop, +  // then we fold away the cond branch to an uncond branch.  This simplifies the +  // loop in cases important for nested loops, and it also means we don't have +  // to split as many edges. +  BranchInst *PHBI = cast<BranchInst>(OrigPreheader->getTerminator()); +  assert(PHBI->isConditional() && "Should be clone of BI condbr!"); +  if (!isa<ConstantInt>(PHBI->getCondition()) || +      PHBI->getSuccessor(cast<ConstantInt>(PHBI->getCondition())->isZero()) != +          NewHeader) { +    // The conditional branch can't be folded, handle the general case. +    // Split edges as necessary to preserve LoopSimplify form. + +    // Right now OrigPreHeader has two successors, NewHeader and ExitBlock, and +    // thus is not a preheader anymore. +    // Split the edge to form a real preheader. +    BasicBlock *NewPH = SplitCriticalEdge( +        OrigPreheader, NewHeader, +        CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); +    NewPH->setName(NewHeader->getName() + ".lr.ph"); + +    // Preserve canonical loop form, which means that 'Exit' should have only +    // one predecessor. Note that Exit could be an exit block for multiple +    // nested loops, causing both of the edges to now be critical and need to +    // be split. +    SmallVector<BasicBlock *, 4> ExitPreds(pred_begin(Exit), pred_end(Exit)); +    bool SplitLatchEdge = false; +    for (BasicBlock *ExitPred : ExitPreds) { +      // We only need to split loop exit edges. +      Loop *PredLoop = LI->getLoopFor(ExitPred); +      if (!PredLoop || PredLoop->contains(Exit)) +        continue; +      if (isa<IndirectBrInst>(ExitPred->getTerminator())) +        continue; +      SplitLatchEdge |= L->getLoopLatch() == ExitPred; +      BasicBlock *ExitSplit = SplitCriticalEdge( +          ExitPred, Exit, +          CriticalEdgeSplittingOptions(DT, LI).setPreserveLCSSA()); +      ExitSplit->moveBefore(Exit); +    } +    assert(SplitLatchEdge && +           "Despite splitting all preds, failed to split latch exit?"); +  } else { +    // We can fold the conditional branch in the preheader, this makes things +    // simpler. The first step is to remove the extra edge to the Exit block. +    Exit->removePredecessor(OrigPreheader, true /*preserve LCSSA*/); +    BranchInst *NewBI = BranchInst::Create(NewHeader, PHBI); +    NewBI->setDebugLoc(PHBI->getDebugLoc()); +    PHBI->eraseFromParent(); + +    // With our CFG finalized, update DomTree if it is available. +    if (DT) DT->deleteEdge(OrigPreheader, Exit); +  } + +  assert(L->getLoopPreheader() && "Invalid loop preheader after loop rotation"); +  assert(L->getLoopLatch() && "Invalid loop latch after loop rotation"); + +  // Now that the CFG and DomTree are in a consistent state again, try to merge +  // the OrigHeader block into OrigLatch.  This will succeed if they are +  // connected by an unconditional branch.  This is just a cleanup so the +  // emitted code isn't too gross in this common case. +  MergeBlockIntoPredecessor(OrigHeader, DT, LI); + +  LLVM_DEBUG(dbgs() << "LoopRotation: into "; L->dump()); + +  ++NumRotated; +  return true; +} + +/// Determine whether the instructions in this range may be safely and cheaply +/// speculated. This is not an important enough situation to develop complex +/// heuristics. We handle a single arithmetic instruction along with any type +/// conversions. +static bool shouldSpeculateInstrs(BasicBlock::iterator Begin, +                                  BasicBlock::iterator End, Loop *L) { +  bool seenIncrement = false; +  bool MultiExitLoop = false; + +  if (!L->getExitingBlock()) +    MultiExitLoop = true; + +  for (BasicBlock::iterator I = Begin; I != End; ++I) { + +    if (!isSafeToSpeculativelyExecute(&*I)) +      return false; + +    if (isa<DbgInfoIntrinsic>(I)) +      continue; + +    switch (I->getOpcode()) { +    default: +      return false; +    case Instruction::GetElementPtr: +      // GEPs are cheap if all indices are constant. +      if (!cast<GEPOperator>(I)->hasAllConstantIndices()) +        return false; +      // fall-thru to increment case +      LLVM_FALLTHROUGH; +    case Instruction::Add: +    case Instruction::Sub: +    case Instruction::And: +    case Instruction::Or: +    case Instruction::Xor: +    case Instruction::Shl: +    case Instruction::LShr: +    case Instruction::AShr: { +      Value *IVOpnd = +          !isa<Constant>(I->getOperand(0)) +              ? I->getOperand(0) +              : !isa<Constant>(I->getOperand(1)) ? I->getOperand(1) : nullptr; +      if (!IVOpnd) +        return false; + +      // If increment operand is used outside of the loop, this speculation +      // could cause extra live range interference. +      if (MultiExitLoop) { +        for (User *UseI : IVOpnd->users()) { +          auto *UserInst = cast<Instruction>(UseI); +          if (!L->contains(UserInst)) +            return false; +        } +      } + +      if (seenIncrement) +        return false; +      seenIncrement = true; +      break; +    } +    case Instruction::Trunc: +    case Instruction::ZExt: +    case Instruction::SExt: +      // ignore type conversions +      break; +    } +  } +  return true; +} + +/// Fold the loop tail into the loop exit by speculating the loop tail +/// instructions. Typically, this is a single post-increment. In the case of a +/// simple 2-block loop, hoisting the increment can be much better than +/// duplicating the entire loop header. In the case of loops with early exits, +/// rotation will not work anyway, but simplifyLoopLatch will put the loop in +/// canonical form so downstream passes can handle it. +/// +/// I don't believe this invalidates SCEV. +bool LoopRotate::simplifyLoopLatch(Loop *L) { +  BasicBlock *Latch = L->getLoopLatch(); +  if (!Latch || Latch->hasAddressTaken()) +    return false; + +  BranchInst *Jmp = dyn_cast<BranchInst>(Latch->getTerminator()); +  if (!Jmp || !Jmp->isUnconditional()) +    return false; + +  BasicBlock *LastExit = Latch->getSinglePredecessor(); +  if (!LastExit || !L->isLoopExiting(LastExit)) +    return false; + +  BranchInst *BI = dyn_cast<BranchInst>(LastExit->getTerminator()); +  if (!BI) +    return false; + +  if (!shouldSpeculateInstrs(Latch->begin(), Jmp->getIterator(), L)) +    return false; + +  LLVM_DEBUG(dbgs() << "Folding loop latch " << Latch->getName() << " into " +                    << LastExit->getName() << "\n"); + +  // Hoist the instructions from Latch into LastExit. +  LastExit->getInstList().splice(BI->getIterator(), Latch->getInstList(), +                                 Latch->begin(), Jmp->getIterator()); + +  unsigned FallThruPath = BI->getSuccessor(0) == Latch ? 0 : 1; +  BasicBlock *Header = Jmp->getSuccessor(0); +  assert(Header == L->getHeader() && "expected a backward branch"); + +  // Remove Latch from the CFG so that LastExit becomes the new Latch. +  BI->setSuccessor(FallThruPath, Header); +  Latch->replaceSuccessorsPhiUsesWith(LastExit); +  Jmp->eraseFromParent(); + +  // Nuke the Latch block. +  assert(Latch->empty() && "unable to evacuate Latch"); +  LI->removeBlock(Latch); +  if (DT) +    DT->eraseNode(Latch); +  Latch->eraseFromParent(); +  return true; +} + +/// Rotate \c L, and return true if any modification was made. +bool LoopRotate::processLoop(Loop *L) { +  // Save the loop metadata. +  MDNode *LoopMD = L->getLoopID(); + +  bool SimplifiedLatch = false; + +  // Simplify the loop latch before attempting to rotate the header +  // upward. Rotation may not be needed if the loop tail can be folded into the +  // loop exit. +  if (!RotationOnly) +    SimplifiedLatch = simplifyLoopLatch(L); + +  bool MadeChange = rotateLoop(L, SimplifiedLatch); +  assert((!MadeChange || L->isLoopExiting(L->getLoopLatch())) && +         "Loop latch should be exiting after loop-rotate."); + +  // Restore the loop metadata. +  // NB! We presume LoopRotation DOESN'T ADD its own metadata. +  if ((MadeChange || SimplifiedLatch) && LoopMD) +    L->setLoopID(LoopMD); + +  return MadeChange || SimplifiedLatch; +} + + +/// The utility to convert a loop into a loop with bottom test. +bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI, +                        AssumptionCache *AC, DominatorTree *DT, +                        ScalarEvolution *SE, const SimplifyQuery &SQ, +                        bool RotationOnly = true, +                        unsigned Threshold = unsigned(-1), +                        bool IsUtilMode = true) { +  LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, SQ, RotationOnly, IsUtilMode); + +  return LR.processLoop(L); +} diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index f43af9772771..970494eb4704 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -52,6 +52,7 @@  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/IR/CFG.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h" @@ -64,9 +65,8 @@  #include "llvm/IR/Type.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/LoopUtils.h"  using namespace llvm; @@ -141,8 +141,8 @@ BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,    if (!PreheaderBB)      return nullptr; -  DEBUG(dbgs() << "LoopSimplify: Creating pre-header " -               << PreheaderBB->getName() << "\n"); +  LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header " +                    << PreheaderBB->getName() << "\n");    // Make sure that NewBB is put someplace intelligent, which doesn't mess up    // code layout too horribly. @@ -170,7 +170,7 @@ static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,    } while (!Worklist.empty());  } -/// \brief The first part of loop-nestification is to find a PHI node that tells +/// The first part of loop-nestification is to find a PHI node that tells  /// us how to partition the loops.  static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,                                          AssumptionCache *AC) { @@ -195,7 +195,7 @@ static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,    return nullptr;  } -/// \brief If this loop has multiple backedges, try to pull one of them out into +/// If this loop has multiple backedges, try to pull one of them out into  /// a nested loop.  ///  /// This is important for code that looks like @@ -242,7 +242,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,        OuterLoopPreds.push_back(PN->getIncomingBlock(i));      }    } -  DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); +  LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");    // If ScalarEvolution is around and knows anything about values in    // this loop, tell it to forget them, because we're about to @@ -332,7 +332,7 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,    return NewOuter;  } -/// \brief This method is called when the specified loop has more than one +/// This method is called when the specified loop has more than one  /// backedge in it.  ///  /// If this occurs, revector all of these backedges to target a new basic block @@ -371,8 +371,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,    BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);    BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); -  DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " -               << BEBlock->getName() << "\n"); +  LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " +                    << BEBlock->getName() << "\n");    // Move the new backedge block to right after the last backedge block.    Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); @@ -457,7 +457,7 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,    return BEBlock;  } -/// \brief Simplify one loop and queue further loops for simplification. +/// Simplify one loop and queue further loops for simplification.  static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,                              DominatorTree *DT, LoopInfo *LI,                              ScalarEvolution *SE, AssumptionCache *AC, @@ -484,8 +484,8 @@ ReprocessLoop:      // Delete each unique out-of-loop (and thus dead) predecessor.      for (BasicBlock *P : BadPreds) { -      DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " -                   << P->getName() << "\n"); +      LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " +                        << P->getName() << "\n");        // Zap the dead pred's terminator and replace it with unreachable.        TerminatorInst *TI = P->getTerminator(); @@ -504,16 +504,13 @@ ReprocessLoop:        if (BI->isConditional()) {          if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { -          DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in " -                       << ExitingBlock->getName() << "\n"); +          LLVM_DEBUG(dbgs() +                     << "LoopSimplify: Resolving \"br i1 undef\" to exit in " +                     << ExitingBlock->getName() << "\n");            BI->setCondition(ConstantInt::get(Cond->getType(),                                              !L->contains(BI->getSuccessor(0)))); -          // This may make the loop analyzable, force SCEV recomputation. -          if (SE) -            SE->forgetLoop(L); -            Changed = true;          }        } @@ -617,11 +614,8 @@ ReprocessLoop:        // comparison and the branch.        bool AllInvariant = true;        bool AnyInvariant = false; -      for (BasicBlock::iterator I = ExitingBlock->begin(); &*I != BI; ) { +      for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) {          Instruction *Inst = &*I++; -        // Skip debug info intrinsics. -        if (isa<DbgInfoIntrinsic>(Inst)) -          continue;          if (Inst == CI)            continue;          if (!L->makeLoopInvariant(Inst, AnyInvariant, @@ -648,15 +642,8 @@ ReprocessLoop:        // Success. The block is now dead, so remove it from the loop,        // update the dominator tree and delete it. -      DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " -                   << ExitingBlock->getName() << "\n"); - -      // Notify ScalarEvolution before deleting this block. Currently assume the -      // parent loop doesn't change (spliting edges doesn't count). If blocks, -      // CFG edges, or other values in the parent loop change, then we need call -      // to forgetLoop() for the parent instead. -      if (SE) -        SE->forgetLoop(L); +      LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " +                        << ExitingBlock->getName() << "\n");        assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));        Changed = true; @@ -679,6 +666,12 @@ ReprocessLoop:      }    } +  // Changing exit conditions for blocks may affect exit counts of this loop and +  // any of its paretns, so we must invalidate the entire subtree if we've made +  // any changes. +  if (Changed && SE) +    SE->forgetTopmostLoop(L); +    return Changed;  } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index dc98a39adcc5..04b8c1417e0a 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -23,6 +23,7 @@  #include "llvm/Analysis/LoopIterator.h"  #include "llvm/Analysis/OptimizationRemarkEmitter.h"  #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DebugInfoMetadata.h" @@ -33,7 +34,6 @@  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/LoopSimplify.h"  #include "llvm/Transforms/Utils/LoopUtils.h"  #include "llvm/Transforms/Utils/SimplifyIndVar.h" @@ -63,8 +63,7 @@ UnrollVerifyDomtree("unroll-verify-domtree", cl::Hidden,  /// Convert the instruction operands from referencing the current values into  /// those specified by VMap. -static inline void remapInstruction(Instruction *I, -                                    ValueToValueMapTy &VMap) { +void llvm::remapInstruction(Instruction *I, ValueToValueMapTy &VMap) {    for (unsigned op = 0, E = I->getNumOperands(); op != E; ++op) {      Value *Op = I->getOperand(op); @@ -97,16 +96,10 @@ static inline void remapInstruction(Instruction *I,  /// Folds a basic block into its predecessor if it only has one predecessor, and  /// that predecessor only has one successor. -/// The LoopInfo Analysis that is passed will be kept consistent.  If folding is -/// successful references to the containing loop must be removed from -/// ScalarEvolution by calling ScalarEvolution::forgetLoop because SE may have -/// references to the eliminated BB.  The argument ForgottenLoops contains a set -/// of loops that have already been forgotten to prevent redundant, expensive -/// calls to ScalarEvolution::forgetLoop.  Returns the new combined block. -static BasicBlock * -foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE, -                         SmallPtrSetImpl<Loop *> &ForgottenLoops, -                         DominatorTree *DT) { +/// The LoopInfo Analysis that is passed will be kept consistent. +BasicBlock *llvm::foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, +                                           ScalarEvolution *SE, +                                           DominatorTree *DT) {    // Merge basic blocks into their predecessor if there is only one distinct    // pred, and if there is only one distinct successor of the predecessor, and    // if there are no PHI nodes. @@ -116,7 +109,8 @@ foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE,    if (OnlyPred->getTerminator()->getNumSuccessors() != 1)      return nullptr; -  DEBUG(dbgs() << "Merging: " << *BB << "into: " << *OnlyPred); +  LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into " +                    << OnlyPred->getName() << "\n");    // Resolve any PHI nodes at the start of the block.  They are all    // guaranteed to have exactly one entry if they exist, unless there are @@ -149,13 +143,6 @@ foldBlockIntoPredecessor(BasicBlock *BB, LoopInfo *LI, ScalarEvolution *SE,        DT->eraseNode(BB);      } -  // ScalarEvolution holds references to loop exit blocks. -  if (SE) { -    if (Loop *L = LI->getLoopFor(BB)) { -      if (ForgottenLoops.insert(L).second) -        SE->forgetLoop(L); -    } -  }    LI->removeBlock(BB);    // Inherit predecessor's name if it exists... @@ -258,16 +245,55 @@ static bool isEpilogProfitable(Loop *L) {    BasicBlock *PreHeader = L->getLoopPreheader();    BasicBlock *Header = L->getHeader();    assert(PreHeader && Header); -  for (Instruction &BBI : *Header) { -    PHINode *PN = dyn_cast<PHINode>(&BBI); -    if (!PN) -      break; -    if (isa<ConstantInt>(PN->getIncomingValueForBlock(PreHeader))) +  for (const PHINode &PN : Header->phis()) { +    if (isa<ConstantInt>(PN.getIncomingValueForBlock(PreHeader)))        return true;    }    return false;  } +/// Perform some cleanup and simplifications on loops after unrolling. It is +/// useful to simplify the IV's in the new loop, as well as do a quick +/// simplify/dce pass of the instructions. +void llvm::simplifyLoopAfterUnroll(Loop *L, bool SimplifyIVs, LoopInfo *LI, +                                   ScalarEvolution *SE, DominatorTree *DT, +                                   AssumptionCache *AC) { +  // Simplify any new induction variables in the partially unrolled loop. +  if (SE && SimplifyIVs) { +    SmallVector<WeakTrackingVH, 16> DeadInsts; +    simplifyLoopIVs(L, SE, DT, LI, DeadInsts); + +    // Aggressively clean up dead instructions that simplifyLoopIVs already +    // identified. Any remaining should be cleaned up below. +    while (!DeadInsts.empty()) +      if (Instruction *Inst = +              dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) +        RecursivelyDeleteTriviallyDeadInstructions(Inst); +  } + +  // At this point, the code is well formed.  We now do a quick sweep over the +  // inserted code, doing constant propagation and dead code elimination as we +  // go. +  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); +  const std::vector<BasicBlock *> &NewLoopBlocks = L->getBlocks(); +  for (BasicBlock *BB : NewLoopBlocks) { +    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { +      Instruction *Inst = &*I++; + +      if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) +        if (LI->replacementPreservesLCSSAForm(Inst, V)) +          Inst->replaceAllUsesWith(V); +      if (isInstructionTriviallyDead(Inst)) +        BB->getInstList().erase(Inst); +    } +  } + +  // TODO: after peeling or unrolling, previously loop variant conditions are +  // likely to fold to constants, eagerly propagating those here will require +  // fewer cleanup passes to be run.  Alternatively, a LoopEarlyCSE might be +  // appropriate. +} +  /// Unroll the given loop by Count. The loop must be in LCSSA form.  Unrolling  /// can only fail when the loop's latch block is not terminated by a conditional  /// branch instruction. However, if the trip count (and multiple) are not known, @@ -313,19 +339,19 @@ LoopUnrollResult llvm::UnrollLoop(    BasicBlock *Preheader = L->getLoopPreheader();    if (!Preheader) { -    DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n"); +    LLVM_DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");      return LoopUnrollResult::Unmodified;    }    BasicBlock *LatchBlock = L->getLoopLatch();    if (!LatchBlock) { -    DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n"); +    LLVM_DEBUG(dbgs() << "  Can't unroll; loop exit-block-insertion failed.\n");      return LoopUnrollResult::Unmodified;    }    // Loops with indirectbr cannot be cloned.    if (!L->isSafeToClone()) { -    DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n"); +    LLVM_DEBUG(dbgs() << "  Can't unroll; Loop body cannot be cloned.\n");      return LoopUnrollResult::Unmodified;    } @@ -338,8 +364,9 @@ LoopUnrollResult llvm::UnrollLoop(    if (!BI || BI->isUnconditional()) {      // The loop-rotate pass can be helpful to avoid this in many cases. -    DEBUG(dbgs() << -             "  Can't unroll; loop not terminated by a conditional branch.\n"); +    LLVM_DEBUG( +        dbgs() +        << "  Can't unroll; loop not terminated by a conditional branch.\n");      return LoopUnrollResult::Unmodified;    } @@ -348,22 +375,22 @@ LoopUnrollResult llvm::UnrollLoop(    };    if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { -    DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" -                    " exiting the loop can be unrolled\n"); +    LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" +                         " exiting the loop can be unrolled\n");      return LoopUnrollResult::Unmodified;    }    if (Header->hasAddressTaken()) {      // The loop-rotate pass can be helpful to avoid this in many cases. -    DEBUG(dbgs() << -          "  Won't unroll loop: address of header block is taken.\n"); +    LLVM_DEBUG( +        dbgs() << "  Won't unroll loop: address of header block is taken.\n");      return LoopUnrollResult::Unmodified;    }    if (TripCount != 0) -    DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n"); +    LLVM_DEBUG(dbgs() << "  Trip Count = " << TripCount << "\n");    if (TripMultiple != 1) -    DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n"); +    LLVM_DEBUG(dbgs() << "  Trip Multiple = " << TripMultiple << "\n");    // Effectively "DCE" unrolled iterations that are beyond the tripcount    // and will never be executed. @@ -372,7 +399,7 @@ LoopUnrollResult llvm::UnrollLoop(    // Don't enter the unroll code if there is nothing to do.    if (TripCount == 0 && Count < 2 && PeelCount == 0) { -    DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); +    LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n");      return LoopUnrollResult::Unmodified;    } @@ -406,8 +433,9 @@ LoopUnrollResult llvm::UnrollLoop(           "Did not expect runtime trip-count unrolling "           "and peeling for the same loop"); +  bool Peeled = false;    if (PeelCount) { -    bool Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA); +    Peeled = peelLoop(L, PeelCount, LI, SE, DT, AC, PreserveLCSSA);      // Successful peeling may result in a change in the loop preheader/trip      // counts. If we later unroll the loop, we want these to be updated. @@ -422,7 +450,7 @@ LoopUnrollResult llvm::UnrollLoop(    // Loops containing convergent instructions must have a count that divides    // their TripMultiple. -  DEBUG( +  LLVM_DEBUG(        {          bool HasConvergent = false;          for (auto &BB : L->blocks()) @@ -445,18 +473,12 @@ LoopUnrollResult llvm::UnrollLoop(      if (Force)        RuntimeTripCount = false;      else { -      DEBUG( -          dbgs() << "Wont unroll; remainder loop could not be generated" -                    "when assuming runtime trip count\n"); +      LLVM_DEBUG(dbgs() << "Won't unroll; remainder loop could not be " +                           "generated when assuming runtime trip count\n");        return LoopUnrollResult::Unmodified;      }    } -  // Notify ScalarEvolution that the loop will be substantially changed, -  // if not outright eliminated. -  if (SE) -    SE->forgetLoop(L); -    // If we know the trip count, we know the multiple...    unsigned BreakoutTrip = 0;    if (TripCount != 0) { @@ -471,8 +493,8 @@ LoopUnrollResult llvm::UnrollLoop(    using namespace ore;    // Report the unrolling decision.    if (CompletelyUnroll) { -    DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() -                 << " with trip count " << TripCount << "!\n"); +    LLVM_DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName() +                      << " with trip count " << TripCount << "!\n");      if (ORE)        ORE->emit([&]() {          return OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), @@ -481,8 +503,8 @@ LoopUnrollResult llvm::UnrollLoop(                 << NV("UnrollCount", TripCount) << " iterations";        });    } else if (PeelCount) { -    DEBUG(dbgs() << "PEELING loop %" << Header->getName() -                 << " with iteration count " << PeelCount << "!\n"); +    LLVM_DEBUG(dbgs() << "PEELING loop %" << Header->getName() +                      << " with iteration count " << PeelCount << "!\n");      if (ORE)        ORE->emit([&]() {          return OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(), @@ -498,31 +520,42 @@ LoopUnrollResult llvm::UnrollLoop(                    << NV("UnrollCount", Count);      }; -    DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() -          << " by " << Count); +    LLVM_DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " +                      << Count);      if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { -      DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); +      LLVM_DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);        if (ORE)          ORE->emit([&]() {            return DiagBuilder() << " with a breakout at trip "                                 << NV("BreakoutTrip", BreakoutTrip);          });      } else if (TripMultiple != 1) { -      DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); +      LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");        if (ORE)          ORE->emit([&]() {            return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple)                                 << " trips per branch";          });      } else if (RuntimeTripCount) { -      DEBUG(dbgs() << " with run-time trip count"); +      LLVM_DEBUG(dbgs() << " with run-time trip count");        if (ORE)          ORE->emit(              [&]() { return DiagBuilder() << " with run-time trip count"; });      } -    DEBUG(dbgs() << "!\n"); +    LLVM_DEBUG(dbgs() << "!\n");    } +  // We are going to make changes to this loop. SCEV may be keeping cached info +  // about it, in particular about backedge taken count. The changes we make +  // are guaranteed to invalidate this information for our loop. It is tempting +  // to only invalidate the loop being unrolled, but it is incorrect as long as +  // all exiting branches from all inner loops have impact on the outer loops, +  // and if something changes inside them then any of outer loops may also +  // change. When we forget outermost loop, we also forget all contained loops +  // and this is what we need here. +  if (SE) +    SE->forgetTopmostLoop(L); +    bool ContinueOnTrue = L->contains(BI->getSuccessor(0));    BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); @@ -580,14 +613,9 @@ LoopUnrollResult llvm::UnrollLoop(               "Header should not be in a sub-loop");        // Tell LI about New.        const Loop *OldLoop = addClonedBlockToLoopInfo(*BB, New, LI, NewLoops); -      if (OldLoop) { +      if (OldLoop)          LoopsToSimplify.insert(NewLoops[OldLoop]); -        // Forget the old loop, since its inputs may have changed. -        if (SE) -          SE->forgetLoop(OldLoop); -      } -        if (*BB == Header)          // Loop over all of the PHI nodes in the block, changing them to use          // the incoming values from the previous block. @@ -611,13 +639,12 @@ LoopUnrollResult llvm::UnrollLoop(        for (BasicBlock *Succ : successors(*BB)) {          if (L->contains(Succ))            continue; -        for (BasicBlock::iterator BBI = Succ->begin(); -             PHINode *phi = dyn_cast<PHINode>(BBI); ++BBI) { -          Value *Incoming = phi->getIncomingValueForBlock(*BB); +        for (PHINode &PHI : Succ->phis()) { +          Value *Incoming = PHI.getIncomingValueForBlock(*BB);            ValueToValueMapTy::iterator It = LastValueMap.find(Incoming);            if (It != LastValueMap.end())              Incoming = It->second; -          phi->addIncoming(Incoming, New); +          PHI.addIncoming(Incoming, New);          }        }        // Keep track of new headers and latches as we create them, so that @@ -721,10 +748,8 @@ LoopUnrollResult llvm::UnrollLoop(          for (BasicBlock *Succ: successors(BB)) {            if (Succ == Headers[i])              continue; -          for (BasicBlock::iterator BBI = Succ->begin(); -               PHINode *Phi = dyn_cast<PHINode>(BBI); ++BBI) { -            Phi->removeIncomingValue(BB, false); -          } +          for (PHINode &Phi : Succ->phis()) +            Phi.removeIncomingValue(BB, false);          }        }        // Replace the conditional branch with an unconditional one. @@ -775,17 +800,15 @@ LoopUnrollResult llvm::UnrollLoop(      }    } -  if (DT && UnrollVerifyDomtree) -    DT->verifyDomTree(); +  assert(!DT || !UnrollVerifyDomtree || +      DT->verify(DominatorTree::VerificationLevel::Fast));    // Merge adjacent basic blocks, if possible. -  SmallPtrSet<Loop *, 4> ForgottenLoops;    for (BasicBlock *Latch : Latches) {      BranchInst *Term = cast<BranchInst>(Latch->getTerminator());      if (Term->isUnconditional()) {        BasicBlock *Dest = Term->getSuccessor(0); -      if (BasicBlock *Fold = -              foldBlockIntoPredecessor(Dest, LI, SE, ForgottenLoops, DT)) { +      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) {          // Dest has been folded into Fold. Update our worklists accordingly.          std::replace(Latches.begin(), Latches.end(), Dest, Fold);          UnrolledLoopBlocks.erase(std::remove(UnrolledLoopBlocks.begin(), @@ -795,40 +818,10 @@ LoopUnrollResult llvm::UnrollLoop(      }    } -  // Simplify any new induction variables in the partially unrolled loop. -  if (SE && !CompletelyUnroll && Count > 1) { -    SmallVector<WeakTrackingVH, 16> DeadInsts; -    simplifyLoopIVs(L, SE, DT, LI, DeadInsts); - -    // Aggressively clean up dead instructions that simplifyLoopIVs already -    // identified. Any remaining should be cleaned up below. -    while (!DeadInsts.empty()) -      if (Instruction *Inst = -              dyn_cast_or_null<Instruction>(&*DeadInsts.pop_back_val())) -        RecursivelyDeleteTriviallyDeadInstructions(Inst); -  } - -  // At this point, the code is well formed.  We now do a quick sweep over the -  // inserted code, doing constant propagation and dead code elimination as we -  // go. -  const DataLayout &DL = Header->getModule()->getDataLayout(); -  const std::vector<BasicBlock*> &NewLoopBlocks = L->getBlocks(); -  for (BasicBlock *BB : NewLoopBlocks) { -    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ) { -      Instruction *Inst = &*I++; - -      if (Value *V = SimplifyInstruction(Inst, {DL, nullptr, DT, AC})) -        if (LI->replacementPreservesLCSSAForm(Inst, V)) -          Inst->replaceAllUsesWith(V); -      if (isInstructionTriviallyDead(Inst)) -        BB->getInstList().erase(Inst); -    } -  } - -  // TODO: after peeling or unrolling, previously loop variant conditions are -  // likely to fold to constants, eagerly propagating those here will require -  // fewer cleanup passes to be run.  Alternatively, a LoopEarlyCSE might be -  // appropriate. +  // At this point, the code is well formed.  We now simplify the unrolled loop, +  // doing constant propagation and dead code elimination as we go. +  simplifyLoopAfterUnroll(L, !CompletelyUnroll && (Count > 1 || Peeled), LI, SE, +                          DT, AC);    NumCompletelyUnrolled += CompletelyUnroll;    ++NumUnrolled; diff --git a/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/lib/Transforms/Utils/LoopUnrollAndJam.cpp new file mode 100644 index 000000000000..b919f73c3817 --- /dev/null +++ b/lib/Transforms/Utils/LoopUnrollAndJam.cpp @@ -0,0 +1,785 @@ +//===-- LoopUnrollAndJam.cpp - Loop unrolling utilities -------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements loop unroll and jam as a routine, much like +// LoopUnroll.cpp implements loop unroll. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/Analysis/Utils/Local.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +#include "llvm/Transforms/Utils/SimplifyIndVar.h" +#include "llvm/Transforms/Utils/UnrollLoop.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-unroll-and-jam" + +STATISTIC(NumUnrolledAndJammed, "Number of loops unroll and jammed"); +STATISTIC(NumCompletelyUnrolledAndJammed, "Number of loops unroll and jammed"); + +typedef SmallPtrSet<BasicBlock *, 4> BasicBlockSet; + +// Partition blocks in an outer/inner loop pair into blocks before and after +// the loop +static bool partitionOuterLoopBlocks(Loop *L, Loop *SubLoop, +                                     BasicBlockSet &ForeBlocks, +                                     BasicBlockSet &SubLoopBlocks, +                                     BasicBlockSet &AftBlocks, +                                     DominatorTree *DT) { +  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); +  SubLoopBlocks.insert(SubLoop->block_begin(), SubLoop->block_end()); + +  for (BasicBlock *BB : L->blocks()) { +    if (!SubLoop->contains(BB)) { +      if (DT->dominates(SubLoopLatch, BB)) +        AftBlocks.insert(BB); +      else +        ForeBlocks.insert(BB); +    } +  } + +  // Check that all blocks in ForeBlocks together dominate the subloop +  // TODO: This might ideally be done better with a dominator/postdominators. +  BasicBlock *SubLoopPreHeader = SubLoop->getLoopPreheader(); +  for (BasicBlock *BB : ForeBlocks) { +    if (BB == SubLoopPreHeader) +      continue; +    TerminatorInst *TI = BB->getTerminator(); +    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) +      if (!ForeBlocks.count(TI->getSuccessor(i))) +        return false; +  } + +  return true; +} + +// Looks at the phi nodes in Header for values coming from Latch. For these +// instructions and all their operands calls Visit on them, keeping going for +// all the operands in AftBlocks. Returns false if Visit returns false, +// otherwise returns true. This is used to process the instructions in the +// Aft blocks that need to be moved before the subloop. It is used in two +// places. One to check that the required set of instructions can be moved +// before the loop. Then to collect the instructions to actually move in +// moveHeaderPhiOperandsToForeBlocks. +template <typename T> +static bool processHeaderPhiOperands(BasicBlock *Header, BasicBlock *Latch, +                                     BasicBlockSet &AftBlocks, T Visit) { +  SmallVector<Instruction *, 8> Worklist; +  for (auto &Phi : Header->phis()) { +    Value *V = Phi.getIncomingValueForBlock(Latch); +    if (Instruction *I = dyn_cast<Instruction>(V)) +      Worklist.push_back(I); +  } + +  while (!Worklist.empty()) { +    Instruction *I = Worklist.back(); +    Worklist.pop_back(); +    if (!Visit(I)) +      return false; + +    if (AftBlocks.count(I->getParent())) +      for (auto &U : I->operands()) +        if (Instruction *II = dyn_cast<Instruction>(U)) +          Worklist.push_back(II); +  } + +  return true; +} + +// Move the phi operands of Header from Latch out of AftBlocks to InsertLoc. +static void moveHeaderPhiOperandsToForeBlocks(BasicBlock *Header, +                                              BasicBlock *Latch, +                                              Instruction *InsertLoc, +                                              BasicBlockSet &AftBlocks) { +  // We need to ensure we move the instructions in the correct order, +  // starting with the earliest required instruction and moving forward. +  std::vector<Instruction *> Visited; +  processHeaderPhiOperands(Header, Latch, AftBlocks, +                           [&Visited, &AftBlocks](Instruction *I) { +                             if (AftBlocks.count(I->getParent())) +                               Visited.push_back(I); +                             return true; +                           }); + +  // Move all instructions in program order to before the InsertLoc +  BasicBlock *InsertLocBB = InsertLoc->getParent(); +  for (Instruction *I : reverse(Visited)) { +    if (I->getParent() != InsertLocBB) +      I->moveBefore(InsertLoc); +  } +} + +/* +  This method performs Unroll and Jam. For a simple loop like: +  for (i = ..) +    Fore(i) +    for (j = ..) +      SubLoop(i, j) +    Aft(i) + +  Instead of doing normal inner or outer unrolling, we do: +  for (i = .., i+=2) +    Fore(i) +    Fore(i+1) +    for (j = ..) +      SubLoop(i, j) +      SubLoop(i+1, j) +    Aft(i) +    Aft(i+1) + +  So the outer loop is essetially unrolled and then the inner loops are fused +  ("jammed") together into a single loop. This can increase speed when there +  are loads in SubLoop that are invariant to i, as they become shared between +  the now jammed inner loops. + +  We do this by spliting the blocks in the loop into Fore, Subloop and Aft. +  Fore blocks are those before the inner loop, Aft are those after. Normal +  Unroll code is used to copy each of these sets of blocks and the results are +  combined together into the final form above. + +  isSafeToUnrollAndJam should be used prior to calling this to make sure the +  unrolling will be valid. Checking profitablility is also advisable. +*/ +LoopUnrollResult +llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount, +                       unsigned TripMultiple, bool UnrollRemainder, +                       LoopInfo *LI, ScalarEvolution *SE, DominatorTree *DT, +                       AssumptionCache *AC, OptimizationRemarkEmitter *ORE) { + +  // When we enter here we should have already checked that it is safe +  BasicBlock *Header = L->getHeader(); +  assert(L->getSubLoops().size() == 1); +  Loop *SubLoop = *L->begin(); + +  // Don't enter the unroll code if there is nothing to do. +  if (TripCount == 0 && Count < 2) { +    LLVM_DEBUG(dbgs() << "Won't unroll; almost nothing to do\n"); +    return LoopUnrollResult::Unmodified; +  } + +  assert(Count > 0); +  assert(TripMultiple > 0); +  assert(TripCount == 0 || TripCount % TripMultiple == 0); + +  // Are we eliminating the loop control altogether? +  bool CompletelyUnroll = (Count == TripCount); + +  // We use the runtime remainder in cases where we don't know trip multiple +  if (TripMultiple == 1 || TripMultiple % Count != 0) { +    if (!UnrollRuntimeLoopRemainder(L, Count, /*AllowExpensiveTripCount*/ false, +                                    /*UseEpilogRemainder*/ true, +                                    UnrollRemainder, LI, SE, DT, AC, true)) { +      LLVM_DEBUG(dbgs() << "Won't unroll-and-jam; remainder loop could not be " +                           "generated when assuming runtime trip count\n"); +      return LoopUnrollResult::Unmodified; +    } +  } + +  // Notify ScalarEvolution that the loop will be substantially changed, +  // if not outright eliminated. +  if (SE) { +    SE->forgetLoop(L); +    SE->forgetLoop(SubLoop); +  } + +  using namespace ore; +  // Report the unrolling decision. +  if (CompletelyUnroll) { +    LLVM_DEBUG(dbgs() << "COMPLETELY UNROLL AND JAMMING loop %" +                      << Header->getName() << " with trip count " << TripCount +                      << "!\n"); +    ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(), +                                 L->getHeader()) +              << "completely unroll and jammed loop with " +              << NV("UnrollCount", TripCount) << " iterations"); +  } else { +    auto DiagBuilder = [&]() { +      OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(), +                              L->getHeader()); +      return Diag << "unroll and jammed loop by a factor of " +                  << NV("UnrollCount", Count); +    }; + +    LLVM_DEBUG(dbgs() << "UNROLL AND JAMMING loop %" << Header->getName() +                      << " by " << Count); +    if (TripMultiple != 1) { +      LLVM_DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); +      ORE->emit([&]() { +        return DiagBuilder() << " with " << NV("TripMultiple", TripMultiple) +                             << " trips per branch"; +      }); +    } else { +      LLVM_DEBUG(dbgs() << " with run-time trip count"); +      ORE->emit([&]() { return DiagBuilder() << " with run-time trip count"; }); +    } +    LLVM_DEBUG(dbgs() << "!\n"); +  } + +  BasicBlock *Preheader = L->getLoopPreheader(); +  BasicBlock *LatchBlock = L->getLoopLatch(); +  BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator()); +  assert(Preheader && LatchBlock && Header); +  assert(BI && !BI->isUnconditional()); +  bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); +  BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); +  bool SubLoopContinueOnTrue = SubLoop->contains( +      SubLoop->getLoopLatch()->getTerminator()->getSuccessor(0)); + +  // Partition blocks in an outer/inner loop pair into blocks before and after +  // the loop +  BasicBlockSet SubLoopBlocks; +  BasicBlockSet ForeBlocks; +  BasicBlockSet AftBlocks; +  partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, AftBlocks, +                           DT); + +  // We keep track of the entering/first and exiting/last block of each of +  // Fore/SubLoop/Aft in each iteration. This helps make the stapling up of +  // blocks easier. +  std::vector<BasicBlock *> ForeBlocksFirst; +  std::vector<BasicBlock *> ForeBlocksLast; +  std::vector<BasicBlock *> SubLoopBlocksFirst; +  std::vector<BasicBlock *> SubLoopBlocksLast; +  std::vector<BasicBlock *> AftBlocksFirst; +  std::vector<BasicBlock *> AftBlocksLast; +  ForeBlocksFirst.push_back(Header); +  ForeBlocksLast.push_back(SubLoop->getLoopPreheader()); +  SubLoopBlocksFirst.push_back(SubLoop->getHeader()); +  SubLoopBlocksLast.push_back(SubLoop->getExitingBlock()); +  AftBlocksFirst.push_back(SubLoop->getExitBlock()); +  AftBlocksLast.push_back(L->getExitingBlock()); +  // Maps Blocks[0] -> Blocks[It] +  ValueToValueMapTy LastValueMap; + +  // Move any instructions from fore phi operands from AftBlocks into Fore. +  moveHeaderPhiOperandsToForeBlocks( +      Header, LatchBlock, SubLoop->getLoopPreheader()->getTerminator(), +      AftBlocks); + +  // The current on-the-fly SSA update requires blocks to be processed in +  // reverse postorder so that LastValueMap contains the correct value at each +  // exit. +  LoopBlocksDFS DFS(L); +  DFS.perform(LI); +  // Stash the DFS iterators before adding blocks to the loop. +  LoopBlocksDFS::RPOIterator BlockBegin = DFS.beginRPO(); +  LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO(); + +  if (Header->getParent()->isDebugInfoForProfiling()) +    for (BasicBlock *BB : L->getBlocks()) +      for (Instruction &I : *BB) +        if (!isa<DbgInfoIntrinsic>(&I)) +          if (const DILocation *DIL = I.getDebugLoc()) +            I.setDebugLoc(DIL->cloneWithDuplicationFactor(Count)); + +  // Copy all blocks +  for (unsigned It = 1; It != Count; ++It) { +    std::vector<BasicBlock *> NewBlocks; +    // Maps Blocks[It] -> Blocks[It-1] +    DenseMap<Value *, Value *> PrevItValueMap; + +    for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { +      ValueToValueMapTy VMap; +      BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It)); +      Header->getParent()->getBasicBlockList().push_back(New); + +      if (ForeBlocks.count(*BB)) { +        L->addBasicBlockToLoop(New, *LI); + +        if (*BB == ForeBlocksFirst[0]) +          ForeBlocksFirst.push_back(New); +        if (*BB == ForeBlocksLast[0]) +          ForeBlocksLast.push_back(New); +      } else if (SubLoopBlocks.count(*BB)) { +        SubLoop->addBasicBlockToLoop(New, *LI); + +        if (*BB == SubLoopBlocksFirst[0]) +          SubLoopBlocksFirst.push_back(New); +        if (*BB == SubLoopBlocksLast[0]) +          SubLoopBlocksLast.push_back(New); +      } else if (AftBlocks.count(*BB)) { +        L->addBasicBlockToLoop(New, *LI); + +        if (*BB == AftBlocksFirst[0]) +          AftBlocksFirst.push_back(New); +        if (*BB == AftBlocksLast[0]) +          AftBlocksLast.push_back(New); +      } else { +        llvm_unreachable("BB being cloned should be in Fore/Sub/Aft"); +      } + +      // Update our running maps of newest clones +      PrevItValueMap[New] = (It == 1 ? *BB : LastValueMap[*BB]); +      LastValueMap[*BB] = New; +      for (ValueToValueMapTy::iterator VI = VMap.begin(), VE = VMap.end(); +           VI != VE; ++VI) { +        PrevItValueMap[VI->second] = +            const_cast<Value *>(It == 1 ? VI->first : LastValueMap[VI->first]); +        LastValueMap[VI->first] = VI->second; +      } + +      NewBlocks.push_back(New); + +      // Update DomTree: +      if (*BB == ForeBlocksFirst[0]) +        DT->addNewBlock(New, ForeBlocksLast[It - 1]); +      else if (*BB == SubLoopBlocksFirst[0]) +        DT->addNewBlock(New, SubLoopBlocksLast[It - 1]); +      else if (*BB == AftBlocksFirst[0]) +        DT->addNewBlock(New, AftBlocksLast[It - 1]); +      else { +        // Each set of blocks (Fore/Sub/Aft) will have the same internal domtree +        // structure. +        auto BBDomNode = DT->getNode(*BB); +        auto BBIDom = BBDomNode->getIDom(); +        BasicBlock *OriginalBBIDom = BBIDom->getBlock(); +        assert(OriginalBBIDom); +        assert(LastValueMap[cast<Value>(OriginalBBIDom)]); +        DT->addNewBlock( +            New, cast<BasicBlock>(LastValueMap[cast<Value>(OriginalBBIDom)])); +      } +    } + +    // Remap all instructions in the most recent iteration +    for (BasicBlock *NewBlock : NewBlocks) { +      for (Instruction &I : *NewBlock) { +        ::remapInstruction(&I, LastValueMap); +        if (auto *II = dyn_cast<IntrinsicInst>(&I)) +          if (II->getIntrinsicID() == Intrinsic::assume) +            AC->registerAssumption(II); +      } +    } + +    // Alter the ForeBlocks phi's, pointing them at the latest version of the +    // value from the previous iteration's phis +    for (PHINode &Phi : ForeBlocksFirst[It]->phis()) { +      Value *OldValue = Phi.getIncomingValueForBlock(AftBlocksLast[It]); +      assert(OldValue && "should have incoming edge from Aft[It]"); +      Value *NewValue = OldValue; +      if (Value *PrevValue = PrevItValueMap[OldValue]) +        NewValue = PrevValue; + +      assert(Phi.getNumOperands() == 2); +      Phi.setIncomingBlock(0, ForeBlocksLast[It - 1]); +      Phi.setIncomingValue(0, NewValue); +      Phi.removeIncomingValue(1); +    } +  } + +  // Now that all the basic blocks for the unrolled iterations are in place, +  // finish up connecting the blocks and phi nodes. At this point LastValueMap +  // is the last unrolled iterations values. + +  // Update Phis in BB from OldBB to point to NewBB +  auto updatePHIBlocks = [](BasicBlock *BB, BasicBlock *OldBB, +                            BasicBlock *NewBB) { +    for (PHINode &Phi : BB->phis()) { +      int I = Phi.getBasicBlockIndex(OldBB); +      Phi.setIncomingBlock(I, NewBB); +    } +  }; +  // Update Phis in BB from OldBB to point to NewBB and use the latest value +  // from LastValueMap +  auto updatePHIBlocksAndValues = [](BasicBlock *BB, BasicBlock *OldBB, +                                     BasicBlock *NewBB, +                                     ValueToValueMapTy &LastValueMap) { +    for (PHINode &Phi : BB->phis()) { +      for (unsigned b = 0; b < Phi.getNumIncomingValues(); ++b) { +        if (Phi.getIncomingBlock(b) == OldBB) { +          Value *OldValue = Phi.getIncomingValue(b); +          if (Value *LastValue = LastValueMap[OldValue]) +            Phi.setIncomingValue(b, LastValue); +          Phi.setIncomingBlock(b, NewBB); +          break; +        } +      } +    } +  }; +  // Move all the phis from Src into Dest +  auto movePHIs = [](BasicBlock *Src, BasicBlock *Dest) { +    Instruction *insertPoint = Dest->getFirstNonPHI(); +    while (PHINode *Phi = dyn_cast<PHINode>(Src->begin())) +      Phi->moveBefore(insertPoint); +  }; + +  // Update the PHI values outside the loop to point to the last block +  updatePHIBlocksAndValues(LoopExit, AftBlocksLast[0], AftBlocksLast.back(), +                           LastValueMap); + +  // Update ForeBlocks successors and phi nodes +  BranchInst *ForeTerm = +      cast<BranchInst>(ForeBlocksLast.back()->getTerminator()); +  BasicBlock *Dest = SubLoopBlocksFirst[0]; +  ForeTerm->setSuccessor(0, Dest); + +  if (CompletelyUnroll) { +    while (PHINode *Phi = dyn_cast<PHINode>(ForeBlocksFirst[0]->begin())) { +      Phi->replaceAllUsesWith(Phi->getIncomingValueForBlock(Preheader)); +      Phi->getParent()->getInstList().erase(Phi); +    } +  } else { +    // Update the PHI values to point to the last aft block +    updatePHIBlocksAndValues(ForeBlocksFirst[0], AftBlocksLast[0], +                             AftBlocksLast.back(), LastValueMap); +  } + +  for (unsigned It = 1; It != Count; It++) { +    // Remap ForeBlock successors from previous iteration to this +    BranchInst *ForeTerm = +        cast<BranchInst>(ForeBlocksLast[It - 1]->getTerminator()); +    BasicBlock *Dest = ForeBlocksFirst[It]; +    ForeTerm->setSuccessor(0, Dest); +  } + +  // Subloop successors and phis +  BranchInst *SubTerm = +      cast<BranchInst>(SubLoopBlocksLast.back()->getTerminator()); +  SubTerm->setSuccessor(!SubLoopContinueOnTrue, SubLoopBlocksFirst[0]); +  SubTerm->setSuccessor(SubLoopContinueOnTrue, AftBlocksFirst[0]); +  updatePHIBlocks(SubLoopBlocksFirst[0], ForeBlocksLast[0], +                  ForeBlocksLast.back()); +  updatePHIBlocks(SubLoopBlocksFirst[0], SubLoopBlocksLast[0], +                  SubLoopBlocksLast.back()); + +  for (unsigned It = 1; It != Count; It++) { +    // Replace the conditional branch of the previous iteration subloop with an +    // unconditional one to this one +    BranchInst *SubTerm = +        cast<BranchInst>(SubLoopBlocksLast[It - 1]->getTerminator()); +    BranchInst::Create(SubLoopBlocksFirst[It], SubTerm); +    SubTerm->eraseFromParent(); + +    updatePHIBlocks(SubLoopBlocksFirst[It], ForeBlocksLast[It], +                    ForeBlocksLast.back()); +    updatePHIBlocks(SubLoopBlocksFirst[It], SubLoopBlocksLast[It], +                    SubLoopBlocksLast.back()); +    movePHIs(SubLoopBlocksFirst[It], SubLoopBlocksFirst[0]); +  } + +  // Aft blocks successors and phis +  BranchInst *Term = cast<BranchInst>(AftBlocksLast.back()->getTerminator()); +  if (CompletelyUnroll) { +    BranchInst::Create(LoopExit, Term); +    Term->eraseFromParent(); +  } else { +    Term->setSuccessor(!ContinueOnTrue, ForeBlocksFirst[0]); +  } +  updatePHIBlocks(AftBlocksFirst[0], SubLoopBlocksLast[0], +                  SubLoopBlocksLast.back()); + +  for (unsigned It = 1; It != Count; It++) { +    // Replace the conditional branch of the previous iteration subloop with an +    // unconditional one to this one +    BranchInst *AftTerm = +        cast<BranchInst>(AftBlocksLast[It - 1]->getTerminator()); +    BranchInst::Create(AftBlocksFirst[It], AftTerm); +    AftTerm->eraseFromParent(); + +    updatePHIBlocks(AftBlocksFirst[It], SubLoopBlocksLast[It], +                    SubLoopBlocksLast.back()); +    movePHIs(AftBlocksFirst[It], AftBlocksFirst[0]); +  } + +  // Dominator Tree. Remove the old links between Fore, Sub and Aft, adding the +  // new ones required. +  if (Count != 1) { +    SmallVector<DominatorTree::UpdateType, 4> DTUpdates; +    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, ForeBlocksLast[0], +                           SubLoopBlocksFirst[0]); +    DTUpdates.emplace_back(DominatorTree::UpdateKind::Delete, +                           SubLoopBlocksLast[0], AftBlocksFirst[0]); + +    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, +                           ForeBlocksLast.back(), SubLoopBlocksFirst[0]); +    DTUpdates.emplace_back(DominatorTree::UpdateKind::Insert, +                           SubLoopBlocksLast.back(), AftBlocksFirst[0]); +    DT->applyUpdates(DTUpdates); +  } + +  // Merge adjacent basic blocks, if possible. +  SmallPtrSet<BasicBlock *, 16> MergeBlocks; +  MergeBlocks.insert(ForeBlocksLast.begin(), ForeBlocksLast.end()); +  MergeBlocks.insert(SubLoopBlocksLast.begin(), SubLoopBlocksLast.end()); +  MergeBlocks.insert(AftBlocksLast.begin(), AftBlocksLast.end()); +  while (!MergeBlocks.empty()) { +    BasicBlock *BB = *MergeBlocks.begin(); +    BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()); +    if (Term && Term->isUnconditional() && L->contains(Term->getSuccessor(0))) { +      BasicBlock *Dest = Term->getSuccessor(0); +      if (BasicBlock *Fold = foldBlockIntoPredecessor(Dest, LI, SE, DT)) { +        // Don't remove BB and add Fold as they are the same BB +        assert(Fold == BB); +        (void)Fold; +        MergeBlocks.erase(Dest); +      } else +        MergeBlocks.erase(BB); +    } else +      MergeBlocks.erase(BB); +  } + +  // At this point, the code is well formed.  We now do a quick sweep over the +  // inserted code, doing constant propagation and dead code elimination as we +  // go. +  simplifyLoopAfterUnroll(SubLoop, true, LI, SE, DT, AC); +  simplifyLoopAfterUnroll(L, !CompletelyUnroll && Count > 1, LI, SE, DT, AC); + +  NumCompletelyUnrolledAndJammed += CompletelyUnroll; +  ++NumUnrolledAndJammed; + +#ifndef NDEBUG +  // We shouldn't have done anything to break loop simplify form or LCSSA. +  Loop *OuterL = L->getParentLoop(); +  Loop *OutestLoop = OuterL ? OuterL : (!CompletelyUnroll ? L : SubLoop); +  assert(OutestLoop->isRecursivelyLCSSAForm(*DT, *LI)); +  if (!CompletelyUnroll) +    assert(L->isLoopSimplifyForm()); +  assert(SubLoop->isLoopSimplifyForm()); +  assert(DT->verify()); +#endif + +  // Update LoopInfo if the loop is completely removed. +  if (CompletelyUnroll) +    LI->erase(L); + +  return CompletelyUnroll ? LoopUnrollResult::FullyUnrolled +                          : LoopUnrollResult::PartiallyUnrolled; +} + +static bool getLoadsAndStores(BasicBlockSet &Blocks, +                              SmallVector<Value *, 4> &MemInstr) { +  // Scan the BBs and collect legal loads and stores. +  // Returns false if non-simple loads/stores are found. +  for (BasicBlock *BB : Blocks) { +    for (Instruction &I : *BB) { +      if (auto *Ld = dyn_cast<LoadInst>(&I)) { +        if (!Ld->isSimple()) +          return false; +        MemInstr.push_back(&I); +      } else if (auto *St = dyn_cast<StoreInst>(&I)) { +        if (!St->isSimple()) +          return false; +        MemInstr.push_back(&I); +      } else if (I.mayReadOrWriteMemory()) { +        return false; +      } +    } +  } +  return true; +} + +static bool checkDependencies(SmallVector<Value *, 4> &Earlier, +                              SmallVector<Value *, 4> &Later, +                              unsigned LoopDepth, bool InnerLoop, +                              DependenceInfo &DI) { +  // Use DA to check for dependencies between loads and stores that make unroll +  // and jam invalid +  for (Value *I : Earlier) { +    for (Value *J : Later) { +      Instruction *Src = cast<Instruction>(I); +      Instruction *Dst = cast<Instruction>(J); +      if (Src == Dst) +        continue; +      // Ignore Input dependencies. +      if (isa<LoadInst>(Src) && isa<LoadInst>(Dst)) +        continue; + +      // Track dependencies, and if we find them take a conservative approach +      // by allowing only = or < (not >), altough some > would be safe +      // (depending upon unroll width). +      // For the inner loop, we need to disallow any (> <) dependencies +      // FIXME: Allow > so long as distance is less than unroll width +      if (auto D = DI.depends(Src, Dst, true)) { +        assert(D->isOrdered() && "Expected an output, flow or anti dep."); + +        if (D->isConfused()) +          return false; +        if (!InnerLoop) { +          if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT) +            return false; +        } else { +          assert(LoopDepth + 1 <= D->getLevels()); +          if (D->getDirection(LoopDepth) & Dependence::DVEntry::GT && +              D->getDirection(LoopDepth + 1) & Dependence::DVEntry::LT) +            return false; +        } +      } +    } +  } +  return true; +} + +static bool checkDependencies(Loop *L, BasicBlockSet &ForeBlocks, +                              BasicBlockSet &SubLoopBlocks, +                              BasicBlockSet &AftBlocks, DependenceInfo &DI) { +  // Get all loads/store pairs for each blocks +  SmallVector<Value *, 4> ForeMemInstr; +  SmallVector<Value *, 4> SubLoopMemInstr; +  SmallVector<Value *, 4> AftMemInstr; +  if (!getLoadsAndStores(ForeBlocks, ForeMemInstr) || +      !getLoadsAndStores(SubLoopBlocks, SubLoopMemInstr) || +      !getLoadsAndStores(AftBlocks, AftMemInstr)) +    return false; + +  // Check for dependencies between any blocks that may change order +  unsigned LoopDepth = L->getLoopDepth(); +  return checkDependencies(ForeMemInstr, SubLoopMemInstr, LoopDepth, false, +                           DI) && +         checkDependencies(ForeMemInstr, AftMemInstr, LoopDepth, false, DI) && +         checkDependencies(SubLoopMemInstr, AftMemInstr, LoopDepth, false, +                           DI) && +         checkDependencies(SubLoopMemInstr, SubLoopMemInstr, LoopDepth, true, +                           DI); +} + +bool llvm::isSafeToUnrollAndJam(Loop *L, ScalarEvolution &SE, DominatorTree &DT, +                                DependenceInfo &DI) { +  /* We currently handle outer loops like this: +        | +    ForeFirst    <----\    } +     Blocks           |    } ForeBlocks +    ForeLast          |    } +        |             | +    SubLoopFirst  <\  |    } +     Blocks        |  |    } SubLoopBlocks +    SubLoopLast   -/  |    } +        |             | +    AftFirst          |    } +     Blocks           |    } AftBlocks +    AftLast     ------/    } +        | + +    There are (theoretically) any number of blocks in ForeBlocks, SubLoopBlocks +    and AftBlocks, providing that there is one edge from Fores to SubLoops, +    one edge from SubLoops to Afts and a single outer loop exit (from Afts). +    In practice we currently limit Aft blocks to a single block, and limit +    things further in the profitablility checks of the unroll and jam pass. + +    Because of the way we rearrange basic blocks, we also require that +    the Fore blocks on all unrolled iterations are safe to move before the +    SubLoop blocks of all iterations. So we require that the phi node looping +    operands of ForeHeader can be moved to at least the end of ForeEnd, so that +    we can arrange cloned Fore Blocks before the subloop and match up Phi's +    correctly. + +    i.e. The old order of blocks used to be F1 S1_1 S1_2 A1 F2 S2_1 S2_2 A2. +    It needs to be safe to tranform this to F1 F2 S1_1 S2_1 S1_2 S2_2 A1 A2. + +    There are then a number of checks along the lines of no calls, no +    exceptions, inner loop IV is consistent, etc. Note that for loops requiring +    runtime unrolling, UnrollRuntimeLoopRemainder can also fail in +    UnrollAndJamLoop if the trip count cannot be easily calculated. +  */ + +  if (!L->isLoopSimplifyForm() || L->getSubLoops().size() != 1) +    return false; +  Loop *SubLoop = L->getSubLoops()[0]; +  if (!SubLoop->isLoopSimplifyForm()) +    return false; + +  BasicBlock *Header = L->getHeader(); +  BasicBlock *Latch = L->getLoopLatch(); +  BasicBlock *Exit = L->getExitingBlock(); +  BasicBlock *SubLoopHeader = SubLoop->getHeader(); +  BasicBlock *SubLoopLatch = SubLoop->getLoopLatch(); +  BasicBlock *SubLoopExit = SubLoop->getExitingBlock(); + +  if (Latch != Exit) +    return false; +  if (SubLoopLatch != SubLoopExit) +    return false; + +  if (Header->hasAddressTaken() || SubLoopHeader->hasAddressTaken()) +    return false; + +  // Split blocks into Fore/SubLoop/Aft based on dominators +  BasicBlockSet SubLoopBlocks; +  BasicBlockSet ForeBlocks; +  BasicBlockSet AftBlocks; +  if (!partitionOuterLoopBlocks(L, SubLoop, ForeBlocks, SubLoopBlocks, +                                AftBlocks, &DT)) +    return false; + +  // Aft blocks may need to move instructions to fore blocks, which becomes more +  // difficult if there are multiple (potentially conditionally executed) +  // blocks. For now we just exclude loops with multiple aft blocks. +  if (AftBlocks.size() != 1) +    return false; + +  // Check inner loop IV is consistent between all iterations +  const SCEV *SubLoopBECountSC = SE.getExitCount(SubLoop, SubLoopLatch); +  if (isa<SCEVCouldNotCompute>(SubLoopBECountSC) || +      !SubLoopBECountSC->getType()->isIntegerTy()) +    return false; +  ScalarEvolution::LoopDisposition LD = +      SE.getLoopDisposition(SubLoopBECountSC, L); +  if (LD != ScalarEvolution::LoopInvariant) +    return false; + +  // Check the loop safety info for exceptions. +  LoopSafetyInfo LSI; +  computeLoopSafetyInfo(&LSI, L); +  if (LSI.MayThrow) +    return false; + +  // We've ruled out the easy stuff and now need to check that there are no +  // interdependencies which may prevent us from moving the: +  //  ForeBlocks before Subloop and AftBlocks. +  //  Subloop before AftBlocks. +  //  ForeBlock phi operands before the subloop + +  // Make sure we can move all instructions we need to before the subloop +  if (!processHeaderPhiOperands( +          Header, Latch, AftBlocks, [&AftBlocks, &SubLoop](Instruction *I) { +            if (SubLoop->contains(I->getParent())) +              return false; +            if (AftBlocks.count(I->getParent())) { +              // If we hit a phi node in afts we know we are done (probably +              // LCSSA) +              if (isa<PHINode>(I)) +                return false; +              // Can't move instructions with side effects or memory +              // reads/writes +              if (I->mayHaveSideEffects() || I->mayReadOrWriteMemory()) +                return false; +            } +            // Keep going +            return true; +          })) +    return false; + +  // Check for memory dependencies which prohibit the unrolling we are doing. +  // Because of the way we are unrolling Fore/Sub/Aft blocks, we need to check +  // there are no dependencies between Fore-Sub, Fore-Aft, Sub-Aft and Sub-Sub. +  if (!checkDependencies(L, ForeBlocks, SubLoopBlocks, AftBlocks, DI)) +    return false; + +  return true; +} diff --git a/lib/Transforms/Utils/LoopUnrollPeel.cpp b/lib/Transforms/Utils/LoopUnrollPeel.cpp index c84ae7d693d7..13794c53f24b 100644 --- a/lib/Transforms/Utils/LoopUnrollPeel.cpp +++ b/lib/Transforms/Utils/LoopUnrollPeel.cpp @@ -20,6 +20,7 @@  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopIterator.h"  #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h"  #include "llvm/Analysis/TargetTransformInfo.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/Dominators.h" @@ -30,6 +31,7 @@  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/MDBuilder.h"  #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h"  #include "llvm/Support/Casting.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h" @@ -46,6 +48,7 @@  #include <limits>  using namespace llvm; +using namespace llvm::PatternMatch;  #define DEBUG_TYPE "loop-unroll" @@ -66,7 +69,7 @@ static const unsigned InfiniteIterationsToInvariance =      std::numeric_limits<unsigned>::max();  // Check whether we are capable of peeling this loop. -static bool canPeel(Loop *L) { +bool llvm::canPeel(Loop *L) {    // Make sure the loop is in simplified form    if (!L->isLoopSimplifyForm())      return false; @@ -136,11 +139,109 @@ static unsigned calculateIterationsToInvariance(    return ToInvariance;  } +// Return the number of iterations to peel off that make conditions in the +// body true/false. For example, if we peel 2 iterations off the loop below, +// the condition i < 2 can be evaluated at compile time. +//  for (i = 0; i < n; i++) +//    if (i < 2) +//      .. +//    else +//      .. +//   } +static unsigned countToEliminateCompares(Loop &L, unsigned MaxPeelCount, +                                         ScalarEvolution &SE) { +  assert(L.isLoopSimplifyForm() && "Loop needs to be in loop simplify form"); +  unsigned DesiredPeelCount = 0; + +  for (auto *BB : L.blocks()) { +    auto *BI = dyn_cast<BranchInst>(BB->getTerminator()); +    if (!BI || BI->isUnconditional()) +      continue; + +    // Ignore loop exit condition. +    if (L.getLoopLatch() == BB) +      continue; + +    Value *Condition = BI->getCondition(); +    Value *LeftVal, *RightVal; +    CmpInst::Predicate Pred; +    if (!match(Condition, m_ICmp(Pred, m_Value(LeftVal), m_Value(RightVal)))) +      continue; + +    const SCEV *LeftSCEV = SE.getSCEV(LeftVal); +    const SCEV *RightSCEV = SE.getSCEV(RightVal); + +    // Do not consider predicates that are known to be true or false +    // independently of the loop iteration. +    if (SE.isKnownPredicate(Pred, LeftSCEV, RightSCEV) || +        SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), LeftSCEV, +                            RightSCEV)) +      continue; + +    // Check if we have a condition with one AddRec and one non AddRec +    // expression. Normalize LeftSCEV to be the AddRec. +    if (!isa<SCEVAddRecExpr>(LeftSCEV)) { +      if (isa<SCEVAddRecExpr>(RightSCEV)) { +        std::swap(LeftSCEV, RightSCEV); +        Pred = ICmpInst::getSwappedPredicate(Pred); +      } else +        continue; +    } + +    const SCEVAddRecExpr *LeftAR = cast<SCEVAddRecExpr>(LeftSCEV); + +    // Avoid huge SCEV computations in the loop below, make sure we only +    // consider AddRecs of the loop we are trying to peel and avoid +    // non-monotonic predicates, as we will not be able to simplify the loop +    // body. +    // FIXME: For the non-monotonic predicates ICMP_EQ and ICMP_NE we can +    //        simplify the loop, if we peel 1 additional iteration, if there +    //        is no wrapping. +    bool Increasing; +    if (!LeftAR->isAffine() || LeftAR->getLoop() != &L || +        !SE.isMonotonicPredicate(LeftAR, Pred, Increasing)) +      continue; +    (void)Increasing; + +    // Check if extending the current DesiredPeelCount lets us evaluate Pred +    // or !Pred in the loop body statically. +    unsigned NewPeelCount = DesiredPeelCount; + +    const SCEV *IterVal = LeftAR->evaluateAtIteration( +        SE.getConstant(LeftSCEV->getType(), NewPeelCount), SE); + +    // If the original condition is not known, get the negated predicate +    // (which holds on the else branch) and check if it is known. This allows +    // us to peel of iterations that make the original condition false. +    if (!SE.isKnownPredicate(Pred, IterVal, RightSCEV)) +      Pred = ICmpInst::getInversePredicate(Pred); + +    const SCEV *Step = LeftAR->getStepRecurrence(SE); +    while (NewPeelCount < MaxPeelCount && +           SE.isKnownPredicate(Pred, IterVal, RightSCEV)) { +      IterVal = SE.getAddExpr(IterVal, Step); +      NewPeelCount++; +    } + +    // Only peel the loop if the monotonic predicate !Pred becomes known in the +    // first iteration of the loop body after peeling. +    if (NewPeelCount > DesiredPeelCount && +        SE.isKnownPredicate(ICmpInst::getInversePredicate(Pred), IterVal, +                            RightSCEV)) +      DesiredPeelCount = NewPeelCount; +  } + +  return DesiredPeelCount; +} +  // Return the number of iterations we want to peel off.  void llvm::computePeelCount(Loop *L, unsigned LoopSize,                              TargetTransformInfo::UnrollingPreferences &UP, -                            unsigned &TripCount) { +                            unsigned &TripCount, ScalarEvolution &SE) {    assert(LoopSize > 0 && "Zero loop size is not allowed!"); +  // Save the UP.PeelCount value set by the target in +  // TTI.getUnrollingPreferences or by the flag -unroll-peel-count. +  unsigned TargetPeelCount = UP.PeelCount;    UP.PeelCount = 0;    if (!canPeel(L))      return; @@ -149,6 +250,19 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,    if (!L->empty())      return; +  // If the user provided a peel count, use that. +  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; +  if (UserPeelCount) { +    LLVM_DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount +                      << " iterations.\n"); +    UP.PeelCount = UnrollForcePeelCount; +    return; +  } + +  // Skip peeling if it's disabled. +  if (!UP.AllowPeeling) +    return; +    // Here we try to get rid of Phis which become invariants after 1, 2, ..., N    // iterations of the loop. For this we compute the number for iterations after    // which every Phi is guaranteed to become an invariant, and try to peel the @@ -160,7 +274,9 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,      SmallDenseMap<PHINode *, unsigned> IterationsToInvariance;      // Now go through all Phis to calculate their the number of iterations they      // need to become invariants. -    unsigned DesiredPeelCount = 0; +    // Start the max computation with the UP.PeelCount value set by the target +    // in TTI.getUnrollingPreferences or by the flag -unroll-peel-count. +    unsigned DesiredPeelCount = TargetPeelCount;      BasicBlock *BackEdge = L->getLoopLatch();      assert(BackEdge && "Loop is not in simplified form?");      for (auto BI = L->getHeader()->begin(); isa<PHINode>(&*BI); ++BI) { @@ -170,15 +286,21 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,        if (ToInvariance != InfiniteIterationsToInvariance)          DesiredPeelCount = std::max(DesiredPeelCount, ToInvariance);      } + +    // Pay respect to limitations implied by loop size and the max peel count. +    unsigned MaxPeelCount = UnrollPeelMaxCount; +    MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1); + +    DesiredPeelCount = std::max(DesiredPeelCount, +                                countToEliminateCompares(*L, MaxPeelCount, SE)); +      if (DesiredPeelCount > 0) { -      // Pay respect to limitations implied by loop size and the max peel count. -      unsigned MaxPeelCount = UnrollPeelMaxCount; -      MaxPeelCount = std::min(MaxPeelCount, UP.Threshold / LoopSize - 1);        DesiredPeelCount = std::min(DesiredPeelCount, MaxPeelCount);        // Consider max peel count limitation.        assert(DesiredPeelCount > 0 && "Wrong loop size estimation?"); -      DEBUG(dbgs() << "Peel " << DesiredPeelCount << " iteration(s) to turn" -                   << " some Phis into invariants.\n"); +      LLVM_DEBUG(dbgs() << "Peel " << DesiredPeelCount +                        << " iteration(s) to turn" +                        << " some Phis into invariants.\n");        UP.PeelCount = DesiredPeelCount;        return;      } @@ -189,44 +311,37 @@ void llvm::computePeelCount(Loop *L, unsigned LoopSize,    if (TripCount)      return; -  // If the user provided a peel count, use that. -  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0; -  if (UserPeelCount) { -    DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount -                 << " iterations.\n"); -    UP.PeelCount = UnrollForcePeelCount; -    return; -  } -    // If we don't know the trip count, but have reason to believe the average    // trip count is low, peeling should be beneficial, since we will usually    // hit the peeled section.    // We only do this in the presence of profile information, since otherwise    // our estimates of the trip count are not reliable enough. -  if (UP.AllowPeeling && L->getHeader()->getParent()->hasProfileData()) { +  if (L->getHeader()->getParent()->hasProfileData()) {      Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);      if (!PeelCount)        return; -    DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount -                 << "\n"); +    LLVM_DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount +                      << "\n");      if (*PeelCount) {        if ((*PeelCount <= UnrollPeelMaxCount) &&            (LoopSize * (*PeelCount + 1) <= UP.Threshold)) { -        DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n"); +        LLVM_DEBUG(dbgs() << "Peeling first " << *PeelCount +                          << " iterations.\n");          UP.PeelCount = *PeelCount;          return;        } -      DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); -      DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); -      DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n"); -      DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n"); +      LLVM_DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n"); +      LLVM_DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n"); +      LLVM_DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) +                        << "\n"); +      LLVM_DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");      }    }  } -/// \brief Update the branch weights of the latch of a peeled-off loop +/// Update the branch weights of the latch of a peeled-off loop  /// iteration.  /// This sets the branch weights for the latch of the recently peeled off loop  /// iteration correctly.  @@ -267,12 +382,12 @@ static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,    }  } -/// \brief Clones the body of the loop L, putting it between \p InsertTop and \p +/// Clones the body of the loop L, putting it between \p InsertTop and \p  /// InsertBot.  /// \param IterNumber The serial number of the iteration currently being  /// peeled off.  /// \param Exit The exit block of the original loop. -/// \param[out] NewBlocks A list of the the blocks in the newly created clone +/// \param[out] NewBlocks A list of the blocks in the newly created clone  /// \param[out] VMap The value map between the loop and the new clone.  /// \param LoopBlocks A helper for DFS-traversal of the loop.  /// \param LVMap A value-map that maps instructions from the original loop to @@ -376,7 +491,7 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,      LVMap[KV.first] = KV.second;  } -/// \brief Peel off the first \p PeelCount iterations of loop \p L. +/// Peel off the first \p PeelCount iterations of loop \p L.  ///  /// Note that this does not peel them off as a single straight-line block.  /// Rather, each iteration is peeled off separately, and needs to check the @@ -388,8 +503,8 @@ static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,  bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,                      ScalarEvolution *SE, DominatorTree *DT,                      AssumptionCache *AC, bool PreserveLCSSA) { -  if (!canPeel(L)) -    return false; +  assert(PeelCount > 0 && "Attempt to peel out zero iterations?"); +  assert(canPeel(L) && "Attempt to peel a loop which is not peelable?");    LoopBlocksDFS LoopBlocks(L);    LoopBlocks.perform(LI); @@ -500,10 +615,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,        // the original loop body.        if (Iter == 0)          DT->changeImmediateDominator(Exit, cast<BasicBlock>(LVMap[Latch])); -#ifndef NDEBUG -      if (VerifyDomInfo) -        DT->verifyDomTree(); -#endif +      assert(DT->verify(DominatorTree::VerificationLevel::Fast));      }      updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter, diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index e00541d3c812..0057b4ba7ce1 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -21,8 +21,8 @@  //  //===----------------------------------------------------------------------===// +#include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallSet.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/LoopIterator.h"  #include "llvm/Analysis/ScalarEvolution.h" @@ -33,7 +33,7 @@  #include "llvm/IR/Module.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include "llvm/Transforms/Utils/Cloning.h"  #include "llvm/Transforms/Utils/LoopUtils.h" @@ -80,25 +80,21 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,    // The new PHI node value is added as an operand of a PHI node in either    // the loop header or the loop exit block.    for (BasicBlock *Succ : successors(Latch)) { -    for (Instruction &BBI : *Succ) { -      PHINode *PN = dyn_cast<PHINode>(&BBI); -      // Exit when we passed all PHI nodes. -      if (!PN) -        break; +    for (PHINode &PN : Succ->phis()) {        // Add a new PHI node to the prolog end block and add the        // appropriate incoming values. -      PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", +      PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",                                         PrologExit->getFirstNonPHI());        // Adding a value to the new PHI node from the original loop preheader.        // This is the value that skips all the prolog code. -      if (L->contains(PN)) { -        NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), +      if (L->contains(&PN)) { +        NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader),                             PreHeader);        } else { -        NewPN->addIncoming(UndefValue::get(PN->getType()), PreHeader); +        NewPN->addIncoming(UndefValue::get(PN.getType()), PreHeader);        } -      Value *V = PN->getIncomingValueForBlock(Latch); +      Value *V = PN.getIncomingValueForBlock(Latch);        if (Instruction *I = dyn_cast<Instruction>(V)) {          if (L->contains(I)) {            V = VMap.lookup(I); @@ -111,10 +107,10 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,        // Update the existing PHI node operand with the value from the        // new PHI node.  How this is done depends on if the existing        // PHI node is in the original loop block, or the exit block. -      if (L->contains(PN)) { -        PN->setIncomingValue(PN->getBasicBlockIndex(NewPreHeader), NewPN); +      if (L->contains(&PN)) { +        PN.setIncomingValue(PN.getBasicBlockIndex(NewPreHeader), NewPN);        } else { -        PN->addIncoming(NewPN, PrologExit); +        PN.addIncoming(NewPN, PrologExit);        }      }    } @@ -191,11 +187,7 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,    // Exit (EpilogPN)    // Update PHI nodes at NewExit and Exit. -  for (Instruction &BBI : *NewExit) { -    PHINode *PN = dyn_cast<PHINode>(&BBI); -    // Exit when we passed all PHI nodes. -    if (!PN) -      break; +  for (PHINode &PN : NewExit->phis()) {      // PN should be used in another PHI located in Exit block as      // Exit was split by SplitBlockPredecessors into Exit and NewExit      // Basicaly it should look like: @@ -207,14 +199,14 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,      //      // There is EpilogPreHeader incoming block instead of NewExit as      // NewExit was spilt 1 more time to get EpilogPreHeader. -    assert(PN->hasOneUse() && "The phi should have 1 use"); -    PHINode *EpilogPN = cast<PHINode> (PN->use_begin()->getUser()); +    assert(PN.hasOneUse() && "The phi should have 1 use"); +    PHINode *EpilogPN = cast<PHINode>(PN.use_begin()->getUser());      assert(EpilogPN->getParent() == Exit && "EpilogPN should be in Exit block");      // Add incoming PreHeader from branch around the Loop -    PN->addIncoming(UndefValue::get(PN->getType()), PreHeader); +    PN.addIncoming(UndefValue::get(PN.getType()), PreHeader); -    Value *V = PN->getIncomingValueForBlock(Latch); +    Value *V = PN.getIncomingValueForBlock(Latch);      Instruction *I = dyn_cast<Instruction>(V);      if (I && L->contains(I))        // If value comes from an instruction in the loop add VMap value. @@ -242,23 +234,19 @@ static void ConnectEpilog(Loop *L, Value *ModVal, BasicBlock *NewExit,      // Skip this as we already updated phis in exit blocks.      if (!L->contains(Succ))        continue; -    for (Instruction &BBI : *Succ) { -      PHINode *PN = dyn_cast<PHINode>(&BBI); -      // Exit when we passed all PHI nodes. -      if (!PN) -        break; +    for (PHINode &PN : Succ->phis()) {        // Add new PHI nodes to the loop exit block and update epilog        // PHIs with the new PHI values. -      PHINode *NewPN = PHINode::Create(PN->getType(), 2, PN->getName() + ".unr", +      PHINode *NewPN = PHINode::Create(PN.getType(), 2, PN.getName() + ".unr",                                         NewExit->getFirstNonPHI());        // Adding a value to the new PHI node from the unrolling loop preheader. -      NewPN->addIncoming(PN->getIncomingValueForBlock(NewPreHeader), PreHeader); +      NewPN->addIncoming(PN.getIncomingValueForBlock(NewPreHeader), PreHeader);        // Adding a value to the new PHI node from the unrolling loop latch. -      NewPN->addIncoming(PN->getIncomingValueForBlock(Latch), Latch); +      NewPN->addIncoming(PN.getIncomingValueForBlock(Latch), Latch);        // Update the existing PHI node operand with the value from the new PHI        // node.  Corresponding instruction in epilog loop should be PHI. -      PHINode *VPN = cast<PHINode>(VMap[&BBI]); +      PHINode *VPN = cast<PHINode>(VMap[&PN]);        VPN->setIncomingValue(VPN->getBasicBlockIndex(EpilogPreHeader), NewPN);      }    } @@ -430,8 +418,9 @@ canSafelyUnrollMultiExitLoop(Loop *L, SmallVectorImpl<BasicBlock *> &OtherExits,    // UnrollRuntimeMultiExit is true. This will need updating the logic in    // connectEpilog/connectProlog.    if (!LatchExit->getSinglePredecessor()) { -    DEBUG(dbgs() << "Bailout for multi-exit handling when latch exit has >1 " -                    "predecessor.\n"); +    LLVM_DEBUG( +        dbgs() << "Bailout for multi-exit handling when latch exit has >1 " +                  "predecessor.\n");      return false;    }    // FIXME: We bail out of multi-exit unrolling when epilog loop is generated @@ -540,14 +529,14 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,                                        LoopInfo *LI, ScalarEvolution *SE,                                        DominatorTree *DT, AssumptionCache *AC,                                        bool PreserveLCSSA) { -  DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); -  DEBUG(L->dump()); -  DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" : -        dbgs() << "Using prolog remainder.\n"); +  LLVM_DEBUG(dbgs() << "Trying runtime unrolling on Loop: \n"); +  LLVM_DEBUG(L->dump()); +  LLVM_DEBUG(UseEpilogRemainder ? dbgs() << "Using epilog remainder.\n" +                                : dbgs() << "Using prolog remainder.\n");    // Make sure the loop is in canonical form.    if (!L->isLoopSimplifyForm()) { -    DEBUG(dbgs() << "Not in simplify form!\n"); +    LLVM_DEBUG(dbgs() << "Not in simplify form!\n");      return false;    } @@ -573,7 +562,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,    // Support only single exit and exiting block unless multi-exit loop unrolling is enabled.    if (!isMultiExitUnrollingEnabled &&        (!L->getExitingBlock() || OtherExits.size())) { -    DEBUG( +    LLVM_DEBUG(          dbgs()          << "Multiple exit/exiting blocks in loop and multi-exit unrolling not "             "enabled!\n"); @@ -593,7 +582,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,    const SCEV *BECountSC = SE->getExitCount(L, Latch);    if (isa<SCEVCouldNotCompute>(BECountSC) ||        !BECountSC->getType()->isIntegerTy()) { -    DEBUG(dbgs() << "Could not compute exit block SCEV\n"); +    LLVM_DEBUG(dbgs() << "Could not compute exit block SCEV\n");      return false;    } @@ -603,7 +592,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,    const SCEV *TripCountSC =        SE->getAddExpr(BECountSC, SE->getConstant(BECountSC->getType(), 1));    if (isa<SCEVCouldNotCompute>(TripCountSC)) { -    DEBUG(dbgs() << "Could not compute trip count SCEV.\n"); +    LLVM_DEBUG(dbgs() << "Could not compute trip count SCEV.\n");      return false;    } @@ -613,15 +602,16 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,    SCEVExpander Expander(*SE, DL, "loop-unroll");    if (!AllowExpensiveTripCount &&        Expander.isHighCostExpansion(TripCountSC, L, PreHeaderBR)) { -    DEBUG(dbgs() << "High cost for expanding trip count scev!\n"); +    LLVM_DEBUG(dbgs() << "High cost for expanding trip count scev!\n");      return false;    }    // This constraint lets us deal with an overflowing trip count easily; see the    // comment on ModVal below.    if (Log2_32(Count) > BEWidth) { -    DEBUG(dbgs() -          << "Count failed constraint on overflow trip count calculation.\n"); +    LLVM_DEBUG( +        dbgs() +        << "Count failed constraint on overflow trip count calculation.\n");      return false;    } @@ -775,7 +765,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,    // values from the cloned region. Also update the dominator info for    // OtherExits and their immediate successors, since we have new edges into    // OtherExits. -  SmallSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks; +  SmallPtrSet<BasicBlock*, 8> ImmediateSuccessorsOfExitBlocks;    for (auto *BB : OtherExits) {     for (auto &II : *BB) { @@ -890,10 +880,9 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,                    NewPreHeader, VMap, DT, LI, PreserveLCSSA);    } -  // If this loop is nested, then the loop unroller changes the code in the -  // parent loop, so the Scalar Evolution pass needs to be run again. -  if (Loop *ParentLoop = L->getParentLoop()) -    SE->forgetLoop(ParentLoop); +  // If this loop is nested, then the loop unroller changes the code in the any +  // of its parent loops, so the Scalar Evolution pass needs to be run again. +  SE->forgetTopmostLoop(L);    // Canonicalize to LoopSimplifyForm both original and remainder loops. We    // cannot rely on the LoopUnrollPass to do this because it only does @@ -909,7 +898,7 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,    }    if (remainderLoop && UnrollRemainder) { -    DEBUG(dbgs() << "Unrolling remainder loop\n"); +    LLVM_DEBUG(dbgs() << "Unrolling remainder loop\n");      UnrollLoop(remainderLoop, /*Count*/ Count - 1, /*TripCount*/ Count - 1,                 /*Force*/ false, /*AllowRuntime*/ false,                 /*AllowExpensiveTripCount*/ false, /*PreserveCondBr*/ true, diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index fe106e33bca1..46af120a428b 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -16,13 +16,16 @@  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/BasicAliasAnalysis.h"  #include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/LoopPass.h" +#include "llvm/Analysis/MustExecute.h"  #include "llvm/Analysis/ScalarEvolution.h"  #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"  #include "llvm/Analysis/ScalarEvolutionExpander.h"  #include "llvm/Analysis/ScalarEvolutionExpressions.h"  #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/Module.h" @@ -30,6 +33,7 @@  #include "llvm/IR/ValueHandle.h"  #include "llvm/Pass.h"  #include "llvm/Support/Debug.h" +#include "llvm/Support/KnownBits.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  using namespace llvm; @@ -77,10 +81,13 @@ bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {    return false;  } -Instruction * -RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT, -                                     SmallPtrSetImpl<Instruction *> &Visited, -                                     SmallPtrSetImpl<Instruction *> &CI) { +/// Determines if Phi may have been type-promoted. If Phi has a single user +/// that ANDs the Phi with a type mask, return the user. RT is updated to +/// account for the narrower bit width represented by the mask, and the AND +/// instruction is added to CI. +static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT, +                                   SmallPtrSetImpl<Instruction *> &Visited, +                                   SmallPtrSetImpl<Instruction *> &CI) {    if (!Phi->hasOneUse())      return Phi; @@ -101,70 +108,92 @@ RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,    return Phi;  } -bool RecurrenceDescriptor::getSourceExtensionKind( -    Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned, -    SmallPtrSetImpl<Instruction *> &Visited, -    SmallPtrSetImpl<Instruction *> &CI) { +/// Compute the minimal bit width needed to represent a reduction whose exit +/// instruction is given by Exit. +static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit, +                                                     DemandedBits *DB, +                                                     AssumptionCache *AC, +                                                     DominatorTree *DT) { +  bool IsSigned = false; +  const DataLayout &DL = Exit->getModule()->getDataLayout(); +  uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType()); + +  if (DB) { +    // Use the demanded bits analysis to determine the bits that are live out +    // of the exit instruction, rounding up to the nearest power of two. If the +    // use of demanded bits results in a smaller bit width, we know the value +    // must be positive (i.e., IsSigned = false), because if this were not the +    // case, the sign bit would have been demanded. +    auto Mask = DB->getDemandedBits(Exit); +    MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros(); +  } + +  if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) { +    // If demanded bits wasn't able to limit the bit width, we can try to use +    // value tracking instead. This can be the case, for example, if the value +    // may be negative. +    auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT); +    auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType()); +    MaxBitWidth = NumTypeBits - NumSignBits; +    KnownBits Bits = computeKnownBits(Exit, DL); +    if (!Bits.isNonNegative()) { +      // If the value is not known to be non-negative, we set IsSigned to true, +      // meaning that we will use sext instructions instead of zext +      // instructions to restore the original type. +      IsSigned = true; +      if (!Bits.isNegative()) +        // If the value is not known to be negative, we don't known what the +        // upper bit is, and therefore, we don't know what kind of extend we +        // will need. In this case, just increase the bit width by one bit and +        // use sext. +        ++MaxBitWidth; +    } +  } +  if (!isPowerOf2_64(MaxBitWidth)) +    MaxBitWidth = NextPowerOf2(MaxBitWidth); + +  return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth), +                        IsSigned); +} + +/// Collect cast instructions that can be ignored in the vectorizer's cost +/// model, given a reduction exit value and the minimal type in which the +/// reduction can be represented. +static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit, +                                 Type *RecurrenceType, +                                 SmallPtrSetImpl<Instruction *> &Casts) {    SmallVector<Instruction *, 8> Worklist; -  bool FoundOneOperand = false; -  unsigned DstSize = RT->getPrimitiveSizeInBits(); +  SmallPtrSet<Instruction *, 8> Visited;    Worklist.push_back(Exit); -  // Traverse the instructions in the reduction expression, beginning with the -  // exit value.    while (!Worklist.empty()) { -    Instruction *I = Worklist.pop_back_val(); -    for (Use &U : I->operands()) { - -      // Terminate the traversal if the operand is not an instruction, or we -      // reach the starting value. -      Instruction *J = dyn_cast<Instruction>(U.get()); -      if (!J || J == Start) -        continue; - -      // Otherwise, investigate the operation if it is also in the expression. -      if (Visited.count(J)) { -        Worklist.push_back(J); +    Instruction *Val = Worklist.pop_back_val(); +    Visited.insert(Val); +    if (auto *Cast = dyn_cast<CastInst>(Val)) +      if (Cast->getSrcTy() == RecurrenceType) { +        // If the source type of a cast instruction is equal to the recurrence +        // type, it will be eliminated, and should be ignored in the vectorizer +        // cost model. +        Casts.insert(Cast);          continue;        } -      // If the operand is not in Visited, it is not a reduction operation, but -      // it does feed into one. Make sure it is either a single-use sign- or -      // zero-extend instruction. -      CastInst *Cast = dyn_cast<CastInst>(J); -      bool IsSExtInst = isa<SExtInst>(J); -      if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst)) -        return false; - -      // Ensure the source type of the extend is no larger than the reduction -      // type. It is not necessary for the types to be identical. -      unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits(); -      if (SrcSize > DstSize) -        return false; - -      // Furthermore, ensure that all such extends are of the same kind. -      if (FoundOneOperand) { -        if (IsSigned != IsSExtInst) -          return false; -      } else { -        FoundOneOperand = true; -        IsSigned = IsSExtInst; -      } - -      // Lastly, if the source type of the extend matches the reduction type, -      // add the extend to CI so that we can avoid accounting for it in the -      // cost model. -      if (SrcSize == DstSize) -        CI.insert(Cast); -    } +    // Add all operands to the work list if they are loop-varying values that +    // we haven't yet visited. +    for (Value *O : cast<User>(Val)->operands()) +      if (auto *I = dyn_cast<Instruction>(O)) +        if (TheLoop->contains(I) && !Visited.count(I)) +          Worklist.push_back(I);    } -  return true;  }  bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,                                             Loop *TheLoop, bool HasFunNoNaNAttr, -                                           RecurrenceDescriptor &RedDes) { +                                           RecurrenceDescriptor &RedDes, +                                           DemandedBits *DB, +                                           AssumptionCache *AC, +                                           DominatorTree *DT) {    if (Phi->getNumIncomingValues() != 2)      return false; @@ -353,14 +382,49 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,    if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)      return false; -  // If we think Phi may have been type-promoted, we also need to ensure that -  // all source operands of the reduction are either SExtInsts or ZEstInsts. If -  // so, we will be able to evaluate the reduction in the narrower bit width. -  if (Start != Phi) -    if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType, -                                IsSigned, VisitedInsts, CastInsts)) +  if (Start != Phi) { +    // If the starting value is not the same as the phi node, we speculatively +    // looked through an 'and' instruction when evaluating a potential +    // arithmetic reduction to determine if it may have been type-promoted. +    // +    // We now compute the minimal bit width that is required to represent the +    // reduction. If this is the same width that was indicated by the 'and', we +    // can represent the reduction in the smaller type. The 'and' instruction +    // will be eliminated since it will essentially be a cast instruction that +    // can be ignore in the cost model. If we compute a different type than we +    // did when evaluating the 'and', the 'and' will not be eliminated, and we +    // will end up with different kinds of operations in the recurrence +    // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is +    // the case. +    // +    // The vectorizer relies on InstCombine to perform the actual +    // type-shrinking. It does this by inserting instructions to truncate the +    // exit value of the reduction to the width indicated by RecurrenceType and +    // then extend this value back to the original width. If IsSigned is false, +    // a 'zext' instruction will be generated; otherwise, a 'sext' will be +    // used. +    // +    // TODO: We should not rely on InstCombine to rewrite the reduction in the +    //       smaller type. We should just generate a correctly typed expression +    //       to begin with. +    Type *ComputedType; +    std::tie(ComputedType, IsSigned) = +        computeRecurrenceType(ExitInstruction, DB, AC, DT); +    if (ComputedType != RecurrenceType)        return false; +    // The recurrence expression will be represented in a narrower type. If +    // there are any cast instructions that will be unnecessary, collect them +    // in CastInsts. Note that the 'and' instruction was already included in +    // this list. +    // +    // TODO: A better way to represent this may be to tag in some way all the +    //       instructions that are a part of the reduction. The vectorizer cost +    //       model could then apply the recurrence type to these instructions, +    //       without needing a white list of instructions to ignore. +    collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts); +  } +    // We found a reduction var if we have reached the original phi node and we    // only have a single instruction with out-of-loop users. @@ -480,48 +544,59 @@ bool RecurrenceDescriptor::hasMultipleUsesOf(    return false;  }  bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop, -                                          RecurrenceDescriptor &RedDes) { +                                          RecurrenceDescriptor &RedDes, +                                          DemandedBits *DB, AssumptionCache *AC, +                                          DominatorTree *DT) {    BasicBlock *Header = TheLoop->getHeader();    Function &F = *Header->getParent();    bool HasFunNoNaNAttr =        F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; -  if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { -    DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n"); +  if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, +                      AC, DT)) { +    LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");      return true;    } -  if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) { -    DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n"); +  if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, +                      AC, DT)) { +    LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");      return true;    } -  if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) { -    DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n"); +  if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB, +                      AC, DT)) { +    LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");      return true;    } -  if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) { -    DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n"); +  if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB, +                      AC, DT)) { +    LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");      return true;    } -  if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) { -    DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n"); +  if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB, +                      AC, DT)) { +    LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");      return true;    } -  if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, -                      RedDes)) { -    DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n"); +  if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes, +                      DB, AC, DT)) { +    LLVM_DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");      return true;    } -  if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) { -    DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n"); +  if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB, +                      AC, DT)) { +    LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");      return true;    } -  if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) { -    DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n"); +  if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB, +                      AC, DT)) { +    LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");      return true;    } -  if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) { -    DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n"); +  if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB, +                      AC, DT)) { +    LLVM_DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi +                      << "\n");      return true;    }    // Not a reduction of known type. @@ -849,13 +924,13 @@ bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,  }  /// This function is called when we suspect that the update-chain of a phi node -/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,  -/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime  -/// predicate P under which the SCEV expression for the phi can be the  -/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the  -/// cast instructions that are involved in the update-chain of this induction.  -/// A caller that adds the required runtime predicate can be free to drop these  -/// cast instructions, and compute the phi using \p AR (instead of some scev  +/// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts, +/// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime +/// predicate P under which the SCEV expression for the phi can be the +/// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the +/// cast instructions that are involved in the update-chain of this induction. +/// A caller that adds the required runtime predicate can be free to drop these +/// cast instructions, and compute the phi using \p AR (instead of some scev  /// expression with casts).  ///  /// For example, without a predicate the scev expression can take the following @@ -890,7 +965,7 @@ static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,    assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");    const Loop *L = AR->getLoop(); -  // Find any cast instructions that participate in the def-use chain of  +  // Find any cast instructions that participate in the def-use chain of    // PhiScev in the loop.    // FORNOW/TODO: We currently expect the def-use chain to include only    // two-operand instructions, where one of the operands is an invariant. @@ -978,7 +1053,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,      AR = PSE.getAsAddRec(Phi);    if (!AR) { -    DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); +    LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");      return false;    } @@ -1012,14 +1087,15 @@ bool InductionDescriptor::isInductionPHI(    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);    if (!AR) { -    DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n"); +    LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");      return false;    }    if (AR->getLoop() != TheLoop) {      // FIXME: We should treat this as a uniform. Unfortunately, we      // don't currently know how to handled uniform PHIs. -    DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n"); +    LLVM_DEBUG( +        dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");      return false;    } @@ -1100,11 +1176,12 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,          BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);      if (!NewExitBB) -      DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: " -                   << *L << "\n"); +      LLVM_DEBUG( +          dbgs() << "WARNING: Can't create a dedicated exit block for loop: " +                 << *L << "\n");      else -      DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " -                   << NewExitBB->getName() << "\n"); +      LLVM_DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block " +                        << NewExitBB->getName() << "\n");      return true;    }; @@ -1127,7 +1204,7 @@ bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,    return Changed;  } -/// \brief Returns the instructions that use values defined in the loop. +/// Returns the instructions that use values defined in the loop.  SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {    SmallVector<Instruction *, 8> UsedOutside; @@ -1204,7 +1281,7 @@ void llvm::initializeLoopPassPass(PassRegistry &Registry) {    INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)  } -/// \brief Find string metadata for loop +/// Find string metadata for loop  ///  /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an  /// operand or null otherwise.  If the string metadata is not found return @@ -1321,13 +1398,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,    // Rewrite phis in the exit block to get their inputs from the Preheader    // instead of the exiting block. -  BasicBlock::iterator BI = ExitBlock->begin(); -  while (PHINode *P = dyn_cast<PHINode>(BI)) { +  for (PHINode &P : ExitBlock->phis()) {      // Set the zero'th element of Phi to be from the preheader and remove all      // other incoming values. Given the loop has dedicated exits, all other      // incoming values must be from the exiting blocks.      int PredIndex = 0; -    P->setIncomingBlock(PredIndex, Preheader); +    P.setIncomingBlock(PredIndex, Preheader);      // Removes all incoming values from all other exiting blocks (including      // duplicate values from an exiting block).      // Nuke all entries except the zero'th entry which is the preheader entry. @@ -1335,13 +1411,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,      // below, to keep the indices valid for deletion (removeIncomingValues      // updates getNumIncomingValues and shifts all values down into the operand      // being deleted). -    for (unsigned i = 0, e = P->getNumIncomingValues() - 1; i != e; ++i) -      P->removeIncomingValue(e - i, false); +    for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i) +      P.removeIncomingValue(e - i, false); -    assert((P->getNumIncomingValues() == 1 && -            P->getIncomingBlock(PredIndex) == Preheader) && +    assert((P.getNumIncomingValues() == 1 && +            P.getIncomingBlock(PredIndex) == Preheader) &&             "Should have exactly one value and that's from the preheader!"); -    ++BI;    }    // Disconnect the loop body by branching directly to its exit. @@ -1358,6 +1433,32 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,      DT->deleteEdge(Preheader, L->getHeader());    } +  // Given LCSSA form is satisfied, we should not have users of instructions +  // within the dead loop outside of the loop. However, LCSSA doesn't take +  // unreachable uses into account. We handle them here. +  // We could do it after drop all references (in this case all users in the +  // loop will be already eliminated and we have less work to do but according +  // to API doc of User::dropAllReferences only valid operation after dropping +  // references, is deletion. So let's substitute all usages of +  // instruction from the loop with undef value of corresponding type first. +  for (auto *Block : L->blocks()) +    for (Instruction &I : *Block) { +      auto *Undef = UndefValue::get(I.getType()); +      for (Value::use_iterator UI = I.use_begin(), E = I.use_end(); UI != E;) { +        Use &U = *UI; +        ++UI; +        if (auto *Usr = dyn_cast<Instruction>(U.getUser())) +          if (L->contains(Usr->getParent())) +            continue; +        // If we have a DT then we can check that uses outside a loop only in +        // unreachable block. +        if (DT) +          assert(!DT->isReachableFromEntry(U) && +                 "Unexpected user in reachable block"); +        U.set(Undef); +      } +    } +    // Remove the block from the reference counting scheme, so that we can    // delete it freely later.    for (auto *Block : L->blocks()) @@ -1385,54 +1486,12 @@ void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,    }  } -/// Returns true if the instruction in a loop is guaranteed to execute at least -/// once. -bool llvm::isGuaranteedToExecute(const Instruction &Inst, -                                 const DominatorTree *DT, const Loop *CurLoop, -                                 const LoopSafetyInfo *SafetyInfo) { -  // We have to check to make sure that the instruction dominates all -  // of the exit blocks.  If it doesn't, then there is a path out of the loop -  // which does not execute this instruction, so we can't hoist it. - -  // If the instruction is in the header block for the loop (which is very -  // common), it is always guaranteed to dominate the exit blocks.  Since this -  // is a common case, and can save some work, check it now. -  if (Inst.getParent() == CurLoop->getHeader()) -    // If there's a throw in the header block, we can't guarantee we'll reach -    // Inst. -    return !SafetyInfo->HeaderMayThrow; - -  // Somewhere in this loop there is an instruction which may throw and make us -  // exit the loop. -  if (SafetyInfo->MayThrow) -    return false; - -  // Get the exit blocks for the current loop. -  SmallVector<BasicBlock *, 8> ExitBlocks; -  CurLoop->getExitBlocks(ExitBlocks); - -  // Verify that the block dominates each of the exit blocks of the loop. -  for (BasicBlock *ExitBlock : ExitBlocks) -    if (!DT->dominates(Inst.getParent(), ExitBlock)) -      return false; - -  // As a degenerate case, if the loop is statically infinite then we haven't -  // proven anything since there are no exit blocks. -  if (ExitBlocks.empty()) -    return false; - -  // FIXME: In general, we have to prove that the loop isn't an infinite loop. -  // See http::llvm.org/PR24078 .  (The "ExitBlocks.empty()" check above is -  // just a special case of this.) -  return true; -} -  Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {    // Only support loops with a unique exiting block, and a latch.    if (!L->getExitingBlock())      return None; -  // Get the branch weights for the the loop's backedge. +  // Get the branch weights for the loop's backedge.    BranchInst *LatchBR =        dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());    if (!LatchBR || LatchBR->getNumSuccessors() != 2) @@ -1460,7 +1519,7 @@ Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {      return (FalseVal + (TrueVal / 2)) / TrueVal;  } -/// \brief Adds a 'fast' flag to floating point operations. +/// Adds a 'fast' flag to floating point operations.  static Value *addFastMathFlag(Value *V) {    if (isa<FPMathOperator>(V)) {      FastMathFlags Flags; @@ -1470,6 +1529,38 @@ static Value *addFastMathFlag(Value *V) {    return V;  } +// Helper to generate an ordered reduction. +Value * +llvm::getOrderedReduction(IRBuilder<> &Builder, Value *Acc, Value *Src, +                          unsigned Op, +                          RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind, +                          ArrayRef<Value *> RedOps) { +  unsigned VF = Src->getType()->getVectorNumElements(); + +  // Extract and apply reduction ops in ascending order: +  // e.g. ((((Acc + Scl[0]) + Scl[1]) + Scl[2]) + ) ... + Scl[VF-1] +  Value *Result = Acc; +  for (unsigned ExtractIdx = 0; ExtractIdx != VF; ++ExtractIdx) { +    Value *Ext = +        Builder.CreateExtractElement(Src, Builder.getInt32(ExtractIdx)); + +    if (Op != Instruction::ICmp && Op != Instruction::FCmp) { +      Result = Builder.CreateBinOp((Instruction::BinaryOps)Op, Result, Ext, +                                   "bin.rdx"); +    } else { +      assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid && +             "Invalid min/max"); +      Result = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, Result, +                                                    Ext); +    } + +    if (!RedOps.empty()) +      propagateIRFlags(Result, RedOps); +  } + +  return Result; +} +  // Helper to generate a log2 shuffle reduction.  Value *  llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op, diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp index 29756d9dab7f..abbcd5f9e3b8 100644 --- a/lib/Transforms/Utils/LoopVersioning.cpp +++ b/lib/Transforms/Utils/LoopVersioning.cpp @@ -140,9 +140,12 @@ void LoopVersioning::addPHINodes(      if (!PN) {        PN = PHINode::Create(Inst->getType(), 2, Inst->getName() + ".lver",                             &PHIBlock->front()); -      for (auto *User : Inst->users()) -        if (!VersionedLoop->contains(cast<Instruction>(User)->getParent())) -          User->replaceUsesOfWith(Inst, PN); +      SmallVector<User*, 8> UsersToUpdate; +      for (User *U : Inst->users()) +        if (!VersionedLoop->contains(cast<Instruction>(U)->getParent())) +          UsersToUpdate.push_back(U); +      for (User *U : UsersToUpdate) +        U->replaceUsesOfWith(Inst, PN);        PN->addIncoming(Inst, VersionedLoop->getExitingBlock());      }    } @@ -248,7 +251,7 @@ void LoopVersioning::annotateInstWithNoAlias(Instruction *VersionedInst,  }  namespace { -/// \brief Also expose this is a pass.  Currently this is only used for +/// Also expose this is a pass.  Currently this is only used for  /// unit-testing.  It adds all memchecks necessary to remove all may-aliasing  /// array accesses from the loop.  class LoopVersioningPass : public FunctionPass { diff --git a/lib/Transforms/Utils/LowerInvoke.cpp b/lib/Transforms/Utils/LowerInvoke.cpp index ee84541e526d..c852d538b0d1 100644 --- a/lib/Transforms/Utils/LowerInvoke.cpp +++ b/lib/Transforms/Utils/LowerInvoke.cpp @@ -21,7 +21,7 @@  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/Module.h"  #include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  using namespace llvm;  #define DEBUG_TYPE "lowerinvoke" @@ -48,10 +48,12 @@ static bool runImpl(Function &F) {    bool Changed = false;    for (BasicBlock &BB : F)      if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) { -      SmallVector<Value *, 16> CallArgs(II->op_begin(), II->op_end() - 3); +      SmallVector<Value *, 16> CallArgs(II->arg_begin(), II->arg_end()); +      SmallVector<OperandBundleDef, 1> OpBundles; +      II->getOperandBundlesAsDefs(OpBundles);        // Insert a normal call instruction...        CallInst *NewCall = -          CallInst::Create(II->getCalledValue(), CallArgs, "", II); +          CallInst::Create(II->getCalledValue(), CallArgs, OpBundles, "", II);        NewCall->takeName(II);        NewCall->setCallingConv(II->getCallingConv());        NewCall->setAttributes(II->getAttributes()); diff --git a/lib/Transforms/Utils/LowerMemIntrinsics.cpp b/lib/Transforms/Utils/LowerMemIntrinsics.cpp index 57dc225e9dab..03006ef3a2d3 100644 --- a/lib/Transforms/Utils/LowerMemIntrinsics.cpp +++ b/lib/Transforms/Utils/LowerMemIntrinsics.cpp @@ -409,8 +409,8 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,                                /* SrcAddr */ Memcpy->getRawSource(),                                /* DstAddr */ Memcpy->getRawDest(),                                /* CopyLen */ CI, -                              /* SrcAlign */ Memcpy->getAlignment(), -                              /* DestAlign */ Memcpy->getAlignment(), +                              /* SrcAlign */ Memcpy->getSourceAlignment(), +                              /* DestAlign */ Memcpy->getDestAlignment(),                                /* SrcIsVolatile */ Memcpy->isVolatile(),                                /* DstIsVolatile */ Memcpy->isVolatile(),                                /* TargetTransformInfo */ TTI); @@ -419,8 +419,8 @@ void llvm::expandMemCpyAsLoop(MemCpyInst *Memcpy,                                  /* SrcAddr */ Memcpy->getRawSource(),                                  /* DstAddr */ Memcpy->getRawDest(),                                  /* CopyLen */ Memcpy->getLength(), -                                /* SrcAlign */ Memcpy->getAlignment(), -                                /* DestAlign */ Memcpy->getAlignment(), +                                /* SrcAlign */ Memcpy->getSourceAlignment(), +                                /* DestAlign */ Memcpy->getDestAlignment(),                                  /* SrcIsVolatile */ Memcpy->isVolatile(),                                  /* DstIsVolatile */ Memcpy->isVolatile(),                                  /* TargetTransfomrInfo */ TTI); @@ -432,8 +432,8 @@ void llvm::expandMemMoveAsLoop(MemMoveInst *Memmove) {                      /* SrcAddr */ Memmove->getRawSource(),                      /* DstAddr */ Memmove->getRawDest(),                      /* CopyLen */ Memmove->getLength(), -                    /* SrcAlign */ Memmove->getAlignment(), -                    /* DestAlign */ Memmove->getAlignment(), +                    /* SrcAlign */ Memmove->getSourceAlignment(), +                    /* DestAlign */ Memmove->getDestAlignment(),                      /* SrcIsVolatile */ Memmove->isVolatile(),                      /* DstIsVolatile */ Memmove->isVolatile());  } @@ -443,6 +443,6 @@ void llvm::expandMemSetAsLoop(MemSetInst *Memset) {                     /* DstAddr */ Memset->getRawDest(),                     /* CopyLen */ Memset->getLength(),                     /* SetValue */ Memset->getValue(), -                   /* Alignment */ Memset->getAlignment(), +                   /* Alignment */ Memset->getDestAlignment(),                     Memset->isVolatile());  } diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 344cb35df986..e99ecfef19cd 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -29,7 +29,7 @@  #include "llvm/Support/Compiler.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h"  #include <algorithm>  #include <cassert> @@ -74,7 +74,7 @@ namespace {      LowerSwitch() : FunctionPass(ID) {        initializeLowerSwitchPass(*PassRegistry::getPassRegistry()); -    }  +    }      bool runOnFunction(Function &F) override; @@ -155,11 +155,8 @@ bool LowerSwitch::runOnFunction(Function &F) {  }  /// Used for debugging purposes. -static raw_ostream& operator<<(raw_ostream &O, -                               const LowerSwitch::CaseVector &C) -    LLVM_ATTRIBUTE_USED; - -static raw_ostream& operator<<(raw_ostream &O, +LLVM_ATTRIBUTE_USED +static raw_ostream &operator<<(raw_ostream &O,                                 const LowerSwitch::CaseVector &C) {    O << "["; @@ -172,7 +169,7 @@ static raw_ostream& operator<<(raw_ostream &O,    return O << "]";  } -/// \brief Update the first occurrence of the "switch statement" BB in the PHI +/// Update the first occurrence of the "switch statement" BB in the PHI  /// node with the "new" BB. The other occurrences will:  ///  /// 1) Be updated by subsequent calls to this function.  Switch statements may @@ -245,14 +242,13 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,    unsigned Mid = Size / 2;    std::vector<CaseRange> LHS(Begin, Begin + Mid); -  DEBUG(dbgs() << "LHS: " << LHS << "\n"); +  LLVM_DEBUG(dbgs() << "LHS: " << LHS << "\n");    std::vector<CaseRange> RHS(Begin + Mid, End); -  DEBUG(dbgs() << "RHS: " << RHS << "\n"); +  LLVM_DEBUG(dbgs() << "RHS: " << RHS << "\n");    CaseRange &Pivot = *(Begin + Mid); -  DEBUG(dbgs() << "Pivot ==> " -               << Pivot.Low->getValue() -               << " -" << Pivot.High->getValue() << "\n"); +  LLVM_DEBUG(dbgs() << "Pivot ==> " << Pivot.Low->getValue() << " -" +                    << Pivot.High->getValue() << "\n");    // NewLowerBound here should never be the integer minimal value.    // This is because it is computed from a case range that is never @@ -274,20 +270,14 @@ LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, ConstantInt *LowerBound,        NewUpperBound = LHS.back().High;    } -  DEBUG(dbgs() << "LHS Bounds ==> "; -        if (LowerBound) { -          dbgs() << LowerBound->getSExtValue(); -        } else { -          dbgs() << "NONE"; -        } -        dbgs() << " - " << NewUpperBound->getSExtValue() << "\n"; -        dbgs() << "RHS Bounds ==> "; -        dbgs() << NewLowerBound->getSExtValue() << " - "; -        if (UpperBound) { -          dbgs() << UpperBound->getSExtValue() << "\n"; -        } else { -          dbgs() << "NONE\n"; -        }); +  LLVM_DEBUG(dbgs() << "LHS Bounds ==> "; if (LowerBound) { +    dbgs() << LowerBound->getSExtValue(); +  } else { dbgs() << "NONE"; } dbgs() << " - " +                                      << NewUpperBound->getSExtValue() << "\n"; +             dbgs() << "RHS Bounds ==> "; +             dbgs() << NewLowerBound->getSExtValue() << " - "; if (UpperBound) { +               dbgs() << UpperBound->getSExtValue() << "\n"; +             } else { dbgs() << "NONE\n"; });    // Create a new node that checks if the value is < pivot. Go to the    // left branch if it is and right branch if not. @@ -337,7 +327,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,      } else if (Leaf.Low->isZero()) {        // Val >= 0 && Val <= Hi --> Val <=u Hi        Comp = new ICmpInst(*NewLeaf, ICmpInst::ICMP_ULE, Val, Leaf.High, -                          "SwitchLeaf");       +                          "SwitchLeaf");      } else {        // Emit V-Lo <=u Hi-Lo        Constant* NegLo = ConstantExpr::getNeg(Leaf.Low); @@ -364,7 +354,7 @@ BasicBlock* LowerSwitch::newLeafBlock(CaseRange& Leaf, Value* Val,      for (uint64_t j = 0; j < Range; ++j) {        PN->removeIncomingValue(OrigBlock);      } -     +      int BlockIdx = PN->getBasicBlockIndex(OrigBlock);      assert(BlockIdx != -1 && "Switch didn't go to this successor??");      PN->setIncomingBlock((unsigned)BlockIdx, NewLeaf); @@ -382,7 +372,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {      Cases.push_back(CaseRange(Case.getCaseValue(), Case.getCaseValue(),                                Case.getCaseSuccessor())); -  std::sort(Cases.begin(), Cases.end(), CaseCmp()); +  llvm::sort(Cases.begin(), Cases.end(), CaseCmp());    // Merge case into clusters    if (Cases.size() >= 2) { @@ -443,9 +433,9 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,    // Prepare cases vector.    CaseVector Cases;    unsigned numCmps = Clusterify(Cases, SI); -  DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() -               << ". Total compares: " << numCmps << "\n"); -  DEBUG(dbgs() << "Cases: " << Cases << "\n"); +  LLVM_DEBUG(dbgs() << "Clusterify finished. Total clusters: " << Cases.size() +                    << ". Total compares: " << numCmps << "\n"); +  LLVM_DEBUG(dbgs() << "Cases: " << Cases << "\n");    (void)numCmps;    ConstantInt *LowerBound = nullptr; @@ -505,6 +495,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,      }  #endif +    // As the default block in the switch is unreachable, update the PHI nodes +    // (remove the entry to the default block) to reflect this. +    Default->removePredecessor(OrigBlock); +      // Use the most popular block as the new default, reducing the number of      // cases.      assert(MaxPop > 0 && PopSucc); @@ -518,29 +512,33 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,      if (Cases.empty()) {        BranchInst::Create(Default, CurBlock);        SI->eraseFromParent(); +      // As all the cases have been replaced with a single branch, only keep +      // one entry in the PHI nodes. +      for (unsigned I = 0 ; I < (MaxPop - 1) ; ++I) +        PopSucc->removePredecessor(OrigBlock);        return;      }    } +  unsigned NrOfDefaults = (SI->getDefaultDest() == Default) ? 1 : 0; +  for (const auto &Case : SI->cases()) +    if (Case.getCaseSuccessor() == Default) +      NrOfDefaults++; +    // Create a new, empty default block so that the new hierarchy of    // if-then statements go to this and the PHI nodes are happy.    BasicBlock *NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault");    F->getBasicBlockList().insert(Default->getIterator(), NewDefault);    BranchInst::Create(Default, NewDefault); -  // If there is an entry in any PHI nodes for the default edge, make sure -  // to update them as well. -  for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { -    PHINode *PN = cast<PHINode>(I); -    int BlockIdx = PN->getBasicBlockIndex(OrigBlock); -    assert(BlockIdx != -1 && "Switch didn't go to this successor??"); -    PN->setIncomingBlock((unsigned)BlockIdx, NewDefault); -  } -    BasicBlock *SwitchBlock =        switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val,                      OrigBlock, OrigBlock, NewDefault, UnreachableRanges); +  // If there are entries in any PHI nodes for the default edge, make sure +  // to update them as well. +  fixPhis(Default, OrigBlock, NewDefault, NrOfDefaults); +    // Branch to our shiny new if-then stuff...    BranchInst::Create(SwitchBlock, OrigBlock); diff --git a/lib/Transforms/Utils/Mem2Reg.cpp b/lib/Transforms/Utils/Mem2Reg.cpp index 29f289b62da0..23145e584751 100644 --- a/lib/Transforms/Utils/Mem2Reg.cpp +++ b/lib/Transforms/Utils/Mem2Reg.cpp @@ -22,7 +22,7 @@  #include "llvm/IR/PassManager.h"  #include "llvm/Pass.h"  #include "llvm/Support/Casting.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  #include "llvm/Transforms/Utils/PromoteMemToReg.h"  #include <vector> diff --git a/lib/Transforms/Utils/MetaRenamer.cpp b/lib/Transforms/Utils/MetaRenamer.cpp index 0f7bd76c03ca..323f2552ca80 100644 --- a/lib/Transforms/Utils/MetaRenamer.cpp +++ b/lib/Transforms/Utils/MetaRenamer.cpp @@ -29,7 +29,7 @@  #include "llvm/IR/Type.h"  #include "llvm/IR/TypeFinder.h"  #include "llvm/Pass.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils.h"  using namespace llvm; diff --git a/lib/Transforms/Utils/OrderedInstructions.cpp b/lib/Transforms/Utils/OrderedInstructions.cpp index dc780542ce68..6d0b96f6aa8a 100644 --- a/lib/Transforms/Utils/OrderedInstructions.cpp +++ b/lib/Transforms/Utils/OrderedInstructions.cpp @@ -14,19 +14,38 @@  #include "llvm/Transforms/Utils/OrderedInstructions.h"  using namespace llvm; +bool OrderedInstructions::localDominates(const Instruction *InstA, +                                         const Instruction *InstB) const { +  assert(InstA->getParent() == InstB->getParent() && +         "Instructions must be in the same basic block"); + +  const BasicBlock *IBB = InstA->getParent(); +  auto OBB = OBBMap.find(IBB); +  if (OBB == OBBMap.end()) +    OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first; +  return OBB->second->dominates(InstA, InstB); +} +  /// Given 2 instructions, use OrderedBasicBlock to check for dominance relation  /// if the instructions are in the same basic block, Otherwise, use dominator  /// tree.  bool OrderedInstructions::dominates(const Instruction *InstA,                                      const Instruction *InstB) const { -  const BasicBlock *IBB = InstA->getParent();    // Use ordered basic block to do dominance check in case the 2 instructions    // are in the same basic block. -  if (IBB == InstB->getParent()) { -    auto OBB = OBBMap.find(IBB); -    if (OBB == OBBMap.end()) -      OBB = OBBMap.insert({IBB, make_unique<OrderedBasicBlock>(IBB)}).first; -    return OBB->second->dominates(InstA, InstB); -  } +  if (InstA->getParent() == InstB->getParent()) +    return localDominates(InstA, InstB);    return DT->dominates(InstA->getParent(), InstB->getParent());  } + +bool OrderedInstructions::dfsBefore(const Instruction *InstA, +                                    const Instruction *InstB) const { +  // Use ordered basic block in case the 2 instructions are in the same basic +  // block. +  if (InstA->getParent() == InstB->getParent()) +    return localDominates(InstA, InstB); + +  DomTreeNode *DA = DT->getNode(InstA->getParent()); +  DomTreeNode *DB = DT->getNode(InstB->getParent()); +  return DA->getDFSNumIn() < DB->getDFSNumIn(); +} diff --git a/lib/Transforms/Utils/PredicateInfo.cpp b/lib/Transforms/Utils/PredicateInfo.cpp index d47be6ea566b..2923977b791a 100644 --- a/lib/Transforms/Utils/PredicateInfo.cpp +++ b/lib/Transforms/Utils/PredicateInfo.cpp @@ -17,6 +17,7 @@  #include "llvm/ADT/STLExtras.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h"  #include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/CFG.h"  #include "llvm/IR/AssemblyAnnotationWriter.h" @@ -24,6 +25,7 @@  #include "llvm/IR/Dominators.h"  #include "llvm/IR/GlobalVariable.h"  #include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h"  #include "llvm/IR/IntrinsicInst.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/Metadata.h" @@ -32,7 +34,7 @@  #include "llvm/Support/Debug.h"  #include "llvm/Support/DebugCounter.h"  #include "llvm/Support/FormattedStream.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  #include "llvm/Transforms/Utils/OrderedInstructions.h"  #include <algorithm>  #define DEBUG_TYPE "predicateinfo" @@ -118,7 +120,7 @@ static bool valueComesBefore(OrderedInstructions &OI, const Value *A,      return false;    if (ArgA && ArgB)      return ArgA->getArgNo() < ArgB->getArgNo(); -  return OI.dominates(cast<Instruction>(A), cast<Instruction>(B)); +  return OI.dfsBefore(cast<Instruction>(A), cast<Instruction>(B));  }  // This compares ValueDFS structures, creating OrderedBasicBlocks where @@ -479,6 +481,19 @@ void PredicateInfo::buildPredicateInfo() {    renameUses(OpsToRename);  } +// Create a ssa_copy declaration with custom mangling, because +// Intrinsic::getDeclaration does not handle overloaded unnamed types properly: +// all unnamed types get mangled to the same string. We use the pointer +// to the type as name here, as it guarantees unique names for different +// types and we remove the declarations when destroying PredicateInfo. +// It is a workaround for PR38117, because solving it in a fully general way is +// tricky (FIXME). +static Function *getCopyDeclaration(Module *M, Type *Ty) { +  std::string Name = "llvm.ssa.copy." + utostr((uintptr_t) Ty); +  return cast<Function>(M->getOrInsertFunction( +      Name, getType(M->getContext(), Intrinsic::ssa_copy, Ty))); +} +  // Given the renaming stack, make all the operands currently on the stack real  // by inserting them into the IR.  Return the last operation's value.  Value *PredicateInfo::materializeStack(unsigned int &Counter, @@ -507,8 +522,9 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,      // order in the case of multiple predicateinfo in the same block.      if (isa<PredicateWithEdge>(ValInfo)) {        IRBuilder<> B(getBranchTerminator(ValInfo)); -      Function *IF = Intrinsic::getDeclaration( -          F.getParent(), Intrinsic::ssa_copy, Op->getType()); +      Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); +      if (IF->user_begin() == IF->user_end()) +        CreatedDeclarations.insert(IF);        CallInst *PIC =            B.CreateCall(IF, Op, Op->getName() + "." + Twine(Counter++));        PredicateMap.insert({PIC, ValInfo}); @@ -518,8 +534,9 @@ Value *PredicateInfo::materializeStack(unsigned int &Counter,        assert(PAssume &&               "Should not have gotten here without it being an assume");        IRBuilder<> B(PAssume->AssumeInst); -      Function *IF = Intrinsic::getDeclaration( -          F.getParent(), Intrinsic::ssa_copy, Op->getType()); +      Function *IF = getCopyDeclaration(F.getParent(), Op->getType()); +      if (IF->user_begin() == IF->user_end()) +        CreatedDeclarations.insert(IF);        CallInst *PIC = B.CreateCall(IF, Op);        PredicateMap.insert({PIC, ValInfo});        Result.Def = PIC; @@ -553,10 +570,11 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {    auto Comparator = [&](const Value *A, const Value *B) {      return valueComesBefore(OI, A, B);    }; -  std::sort(OpsToRename.begin(), OpsToRename.end(), Comparator); +  llvm::sort(OpsToRename.begin(), OpsToRename.end(), Comparator);    ValueDFS_Compare Compare(OI);    // Compute liveness, and rename in O(uses) per Op.    for (auto *Op : OpsToRename) { +    LLVM_DEBUG(dbgs() << "Visiting " << *Op << "\n");      unsigned Counter = 0;      SmallVector<ValueDFS, 16> OrderedUses;      const auto &ValueInfo = getValueInfo(Op); @@ -625,15 +643,15 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {        // we want to.        bool PossibleCopy = VD.PInfo != nullptr;        if (RenameStack.empty()) { -        DEBUG(dbgs() << "Rename Stack is empty\n"); +        LLVM_DEBUG(dbgs() << "Rename Stack is empty\n");        } else { -        DEBUG(dbgs() << "Rename Stack Top DFS numbers are (" -                     << RenameStack.back().DFSIn << "," -                     << RenameStack.back().DFSOut << ")\n"); +        LLVM_DEBUG(dbgs() << "Rename Stack Top DFS numbers are (" +                          << RenameStack.back().DFSIn << "," +                          << RenameStack.back().DFSOut << ")\n");        } -      DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << "," -                   << VD.DFSOut << ")\n"); +      LLVM_DEBUG(dbgs() << "Current DFS numbers are (" << VD.DFSIn << "," +                        << VD.DFSOut << ")\n");        bool ShouldPush = (VD.Def || PossibleCopy);        bool OutOfScope = !stackIsInScope(RenameStack, VD); @@ -652,7 +670,7 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {        if (VD.Def || PossibleCopy)          continue;        if (!DebugCounter::shouldExecute(RenameCounter)) { -        DEBUG(dbgs() << "Skipping execution due to debug counter\n"); +        LLVM_DEBUG(dbgs() << "Skipping execution due to debug counter\n");          continue;        }        ValueDFS &Result = RenameStack.back(); @@ -663,8 +681,9 @@ void PredicateInfo::renameUses(SmallPtrSetImpl<Value *> &OpSet) {        if (!Result.Def)          Result.Def = materializeStack(Counter, RenameStack, Op); -      DEBUG(dbgs() << "Found replacement " << *Result.Def << " for " -                   << *VD.U->get() << " in " << *(VD.U->getUser()) << "\n"); +      LLVM_DEBUG(dbgs() << "Found replacement " << *Result.Def << " for " +                        << *VD.U->get() << " in " << *(VD.U->getUser()) +                        << "\n");        assert(DT.dominates(cast<Instruction>(Result.Def), *VD.U) &&               "Predicateinfo def should have dominated this use");        VD.U->set(Result.Def); @@ -702,7 +721,22 @@ PredicateInfo::PredicateInfo(Function &F, DominatorTree &DT,    buildPredicateInfo();  } -PredicateInfo::~PredicateInfo() {} +// Remove all declarations we created . The PredicateInfo consumers are +// responsible for remove the ssa_copy calls created. +PredicateInfo::~PredicateInfo() { +  // Collect function pointers in set first, as SmallSet uses a SmallVector +  // internally and we have to remove the asserting value handles first. +  SmallPtrSet<Function *, 20> FunctionPtrs; +  for (auto &F : CreatedDeclarations) +    FunctionPtrs.insert(&*F); +  CreatedDeclarations.clear(); + +  for (Function *F : FunctionPtrs) { +    assert(F->user_begin() == F->user_end() && +           "PredicateInfo consumer did not remove all SSA copies."); +    F->eraseFromParent(); +  } +}  void PredicateInfo::verifyPredicateInfo() const {} @@ -720,6 +754,20 @@ void PredicateInfoPrinterLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addRequired<AssumptionCacheTracker>();  } +// Replace ssa_copy calls created by PredicateInfo with their operand. +static void replaceCreatedSSACopys(PredicateInfo &PredInfo, Function &F) { +  for (auto I = inst_begin(F), E = inst_end(F); I != E;) { +    Instruction *Inst = &*I++; +    const auto *PI = PredInfo.getPredicateInfoFor(Inst); +    auto *II = dyn_cast<IntrinsicInst>(Inst); +    if (!PI || !II || II->getIntrinsicID() != Intrinsic::ssa_copy) +      continue; + +    Inst->replaceAllUsesWith(II->getOperand(0)); +    Inst->eraseFromParent(); +  } +} +  bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();    auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); @@ -727,6 +775,8 @@ bool PredicateInfoPrinterLegacyPass::runOnFunction(Function &F) {    PredInfo->print(dbgs());    if (VerifyPredicateInfo)      PredInfo->verifyPredicateInfo(); + +  replaceCreatedSSACopys(*PredInfo, F);    return false;  } @@ -735,12 +785,14 @@ PreservedAnalyses PredicateInfoPrinterPass::run(Function &F,    auto &DT = AM.getResult<DominatorTreeAnalysis>(F);    auto &AC = AM.getResult<AssumptionAnalysis>(F);    OS << "PredicateInfo for function: " << F.getName() << "\n"; -  make_unique<PredicateInfo>(F, DT, AC)->print(OS); +  auto PredInfo = make_unique<PredicateInfo>(F, DT, AC); +  PredInfo->print(OS); +  replaceCreatedSSACopys(*PredInfo, F);    return PreservedAnalyses::all();  } -/// \brief An assembly annotator class to print PredicateInfo information in +/// An assembly annotator class to print PredicateInfo information in  /// comments.  class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {    friend class PredicateInfo; diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp index fcd3bd08482a..86e15bbd7f22 100644 --- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp +++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp @@ -26,6 +26,7 @@  #include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/CFG.h" @@ -45,7 +46,6 @@  #include "llvm/IR/Type.h"  #include "llvm/IR/User.h"  #include "llvm/Support/Casting.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/PromoteMemToReg.h"  #include <algorithm>  #include <cassert> @@ -164,26 +164,27 @@ struct AllocaInfo {    }  }; -// Data package used by RenamePass() -class RenamePassData { -public: +/// Data package used by RenamePass(). +struct RenamePassData {    using ValVector = std::vector<Value *>; +  using LocationVector = std::vector<DebugLoc>; -  RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V) -      : BB(B), Pred(P), Values(std::move(V)) {} +  RenamePassData(BasicBlock *B, BasicBlock *P, ValVector V, LocationVector L) +      : BB(B), Pred(P), Values(std::move(V)), Locations(std::move(L)) {}    BasicBlock *BB;    BasicBlock *Pred;    ValVector Values; +  LocationVector Locations;  }; -/// \brief This assigns and keeps a per-bb relative ordering of load/store +/// This assigns and keeps a per-bb relative ordering of load/store  /// instructions in the block that directly load or store an alloca.  ///  /// This functionality is important because it avoids scanning large basic  /// blocks multiple times when promoting many allocas in the same block.  class LargeBlockInfo { -  /// \brief For each instruction that we track, keep the index of the +  /// For each instruction that we track, keep the index of the    /// instruction.    ///    /// The index starts out as the number of the instruction from the start of @@ -242,7 +243,7 @@ struct PromoteMem2Reg {    /// Reverse mapping of Allocas.    DenseMap<AllocaInst *, unsigned> AllocaLookup; -  /// \brief The PhiNodes we're adding. +  /// The PhiNodes we're adding.    ///    /// That map is used to simplify some Phi nodes as we iterate over it, so    /// it should have deterministic iterators.  We could use a MapVector, but @@ -294,7 +295,7 @@ private:    unsigned getNumPreds(const BasicBlock *BB) {      unsigned &NP = BBNumPreds[BB];      if (NP == 0) -      NP = std::distance(pred_begin(BB), pred_end(BB)) + 1; +      NP = pred_size(BB) + 1;      return NP - 1;    } @@ -303,6 +304,7 @@ private:                             SmallPtrSetImpl<BasicBlock *> &LiveInBlocks);    void RenamePass(BasicBlock *BB, BasicBlock *Pred,                    RenamePassData::ValVector &IncVals, +                  RenamePassData::LocationVector &IncLocs,                    std::vector<RenamePassData> &Worklist);    bool QueuePhiNode(BasicBlock *BB, unsigned AllocaIdx, unsigned &Version);  }; @@ -345,7 +347,7 @@ static void removeLifetimeIntrinsicUsers(AllocaInst *AI) {    }  } -/// \brief Rewrite as many loads as possible given a single store. +/// Rewrite as many loads as possible given a single store.  ///  /// When there is only a single store, we can use the domtree to trivially  /// replace all of the dominated loads with the stored value. Do so, and return @@ -475,7 +477,7 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,    // Sort the stores by their index, making it efficient to do a lookup with a    // binary search. -  std::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first()); +  llvm::sort(StoresByIndex.begin(), StoresByIndex.end(), less_first());    // Walk all of the loads from this alloca, replacing them with the nearest    // store above them, if any. @@ -509,6 +511,11 @@ static bool promoteSingleBlockAlloca(AllocaInst *AI, const AllocaInfo &Info,            !isKnownNonZero(ReplVal, DL, 0, AC, LI, &DT))          addAssumeNonNull(AC, LI); +      // If the replacement value is the load, this must occur in unreachable +      // code. +      if (ReplVal == LI) +        ReplVal = UndefValue::get(LI->getType()); +        LI->replaceAllUsesWith(ReplVal);      } @@ -631,10 +638,10 @@ void PromoteMem2Reg::run() {      SmallVector<BasicBlock *, 32> PHIBlocks;      IDF.calculate(PHIBlocks);      if (PHIBlocks.size() > 1) -      std::sort(PHIBlocks.begin(), PHIBlocks.end(), -                [this](BasicBlock *A, BasicBlock *B) { -                  return BBNumbers.lookup(A) < BBNumbers.lookup(B); -                }); +      llvm::sort(PHIBlocks.begin(), PHIBlocks.end(), +                 [this](BasicBlock *A, BasicBlock *B) { +                   return BBNumbers.lookup(A) < BBNumbers.lookup(B); +                 });      unsigned CurrentVersion = 0;      for (BasicBlock *BB : PHIBlocks) @@ -653,15 +660,20 @@ void PromoteMem2Reg::run() {    for (unsigned i = 0, e = Allocas.size(); i != e; ++i)      Values[i] = UndefValue::get(Allocas[i]->getAllocatedType()); +  // When handling debug info, treat all incoming values as if they have unknown +  // locations until proven otherwise. +  RenamePassData::LocationVector Locations(Allocas.size()); +    // Walks all basic blocks in the function performing the SSA rename algorithm    // and inserting the phi nodes we marked as necessary    std::vector<RenamePassData> RenamePassWorkList; -  RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values)); +  RenamePassWorkList.emplace_back(&F.front(), nullptr, std::move(Values), +                                  std::move(Locations));    do {      RenamePassData RPD = std::move(RenamePassWorkList.back());      RenamePassWorkList.pop_back();      // RenamePass may add new worklist entries. -    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RenamePassWorkList); +    RenamePass(RPD.BB, RPD.Pred, RPD.Values, RPD.Locations, RenamePassWorkList);    } while (!RenamePassWorkList.empty());    // The renamer uses the Visited set to avoid infinite loops.  Clear it now. @@ -740,7 +752,7 @@ void PromoteMem2Reg::run() {      // Ok, now we know that all of the PHI nodes are missing entries for some      // basic blocks.  Start by sorting the incoming predecessors for efficient      // access. -    std::sort(Preds.begin(), Preds.end()); +    llvm::sort(Preds.begin(), Preds.end());      // Now we loop through all BB's which have entries in SomePHI and remove      // them from the Preds list. @@ -772,7 +784,7 @@ void PromoteMem2Reg::run() {    NewPhiNodes.clear();  } -/// \brief Determine which blocks the value is live in. +/// Determine which blocks the value is live in.  ///  /// These are blocks which lead to uses.  Knowing this allows us to avoid  /// inserting PHI nodes into blocks which don't lead to uses (thus, the @@ -846,7 +858,7 @@ void PromoteMem2Reg::ComputeLiveInBlocks(    }  } -/// \brief Queue a phi-node to be added to a basic-block for a specific Alloca. +/// Queue a phi-node to be added to a basic-block for a specific Alloca.  ///  /// Returns true if there wasn't already a phi-node for that variable  bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo, @@ -868,13 +880,24 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,    return true;  } -/// \brief Recursively traverse the CFG of the function, renaming loads and +/// Update the debug location of a phi. \p ApplyMergedLoc indicates whether to +/// create a merged location incorporating \p DL, or to set \p DL directly. +static void updateForIncomingValueLocation(PHINode *PN, DebugLoc DL, +                                           bool ApplyMergedLoc) { +  if (ApplyMergedLoc) +    PN->applyMergedLocation(PN->getDebugLoc(), DL); +  else +    PN->setDebugLoc(DL); +} + +/// Recursively traverse the CFG of the function, renaming loads and  /// stores to the allocas which we are promoting.  ///  /// IncomingVals indicates what value each Alloca contains on exit from the  /// predecessor block Pred.  void PromoteMem2Reg::RenamePass(BasicBlock *BB, BasicBlock *Pred,                                  RenamePassData::ValVector &IncomingVals, +                                RenamePassData::LocationVector &IncomingLocs,                                  std::vector<RenamePassData> &Worklist) {  NextIteration:    // If we are inserting any phi nodes into this BB, they will already be in the @@ -899,6 +922,10 @@ NextIteration:        do {          unsigned AllocaNo = PhiToAllocaMap[APN]; +        // Update the location of the phi node. +        updateForIncomingValueLocation(APN, IncomingLocs[AllocaNo], +                                       APN->getNumIncomingValues() > 0); +          // Add N incoming values to the PHI node.          for (unsigned i = 0; i != NumEdges; ++i)            APN->addIncoming(IncomingVals[AllocaNo], Pred); @@ -960,8 +987,11 @@ NextIteration:          continue;        // what value were we writing? -      IncomingVals[ai->second] = SI->getOperand(0); +      unsigned AllocaNo = ai->second; +      IncomingVals[AllocaNo] = SI->getOperand(0); +        // Record debuginfo for the store before removing it. +      IncomingLocs[AllocaNo] = SI->getDebugLoc();        for (DbgInfoIntrinsic *DII : AllocaDbgDeclares[ai->second])          ConvertDebugDeclareToDebugValue(DII, SI, DIB);        BB->getInstList().erase(SI); @@ -984,7 +1014,7 @@ NextIteration:    for (; I != E; ++I)      if (VisitedSuccs.insert(*I).second) -      Worklist.emplace_back(*I, Pred, IncomingVals); +      Worklist.emplace_back(*I, Pred, IncomingVals, IncomingLocs);    goto NextIteration;  } diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index e4b20b0faa15..ca184ed7c4e3 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -147,11 +147,9 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {    if (isa<PHINode>(BB->begin())) {      SmallDenseMap<BasicBlock *, Value *, 8> ValueMapping(PredValues.begin(),                                                           PredValues.end()); -    PHINode *SomePHI; -    for (BasicBlock::iterator It = BB->begin(); -         (SomePHI = dyn_cast<PHINode>(It)); ++It) { -      if (IsEquivalentPHI(SomePHI, ValueMapping)) -        return SomePHI; +    for (PHINode &SomePHI : BB->phis()) { +      if (IsEquivalentPHI(&SomePHI, ValueMapping)) +        return &SomePHI;      }    } @@ -180,7 +178,7 @@ Value *SSAUpdater::GetValueInMiddleOfBlock(BasicBlock *BB) {    // If the client wants to know about all new instructions, tell it.    if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI); -  DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n"); +  LLVM_DEBUG(dbgs() << "  Inserted PHI: " << *InsertedPHI << "\n");    return InsertedPHI;  } diff --git a/lib/Transforms/Utils/SSAUpdaterBulk.cpp b/lib/Transforms/Utils/SSAUpdaterBulk.cpp new file mode 100644 index 000000000000..397bac2940a4 --- /dev/null +++ b/lib/Transforms/Utils/SSAUpdaterBulk.cpp @@ -0,0 +1,191 @@ +//===- SSAUpdaterBulk.cpp - Unstructured SSA Update Tool ------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SSAUpdaterBulk class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/SSAUpdaterBulk.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" + +using namespace llvm; + +#define DEBUG_TYPE "ssaupdaterbulk" + +/// Helper function for finding a block which should have a value for the given +/// user. For PHI-nodes this block is the corresponding predecessor, for other +/// instructions it's their parent block. +static BasicBlock *getUserBB(Use *U) { +  auto *User = cast<Instruction>(U->getUser()); + +  if (auto *UserPN = dyn_cast<PHINode>(User)) +    return UserPN->getIncomingBlock(*U); +  else +    return User->getParent(); +} + +/// Add a new variable to the SSA rewriter. This needs to be called before +/// AddAvailableValue or AddUse calls. +unsigned SSAUpdaterBulk::AddVariable(StringRef Name, Type *Ty) { +  unsigned Var = Rewrites.size(); +  LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": initialized with Ty = " +                    << *Ty << ", Name = " << Name << "\n"); +  RewriteInfo RI(Name, Ty); +  Rewrites.push_back(RI); +  return Var; +} + +/// Indicate that a rewritten value is available in the specified block with the +/// specified value. +void SSAUpdaterBulk::AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V) { +  assert(Var < Rewrites.size() && "Variable not found!"); +  LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var +                    << ": added new available value" << *V << " in " +                    << BB->getName() << "\n"); +  Rewrites[Var].Defines[BB] = V; +} + +/// Record a use of the symbolic value. This use will be updated with a +/// rewritten value when RewriteAllUses is called. +void SSAUpdaterBulk::AddUse(unsigned Var, Use *U) { +  assert(Var < Rewrites.size() && "Variable not found!"); +  LLVM_DEBUG(dbgs() << "SSAUpdater: Var=" << Var << ": added a use" << *U->get() +                    << " in " << getUserBB(U)->getName() << "\n"); +  Rewrites[Var].Uses.push_back(U); +} + +/// Return true if the SSAUpdater already has a value for the specified variable +/// in the specified block. +bool SSAUpdaterBulk::HasValueForBlock(unsigned Var, BasicBlock *BB) { +  return (Var < Rewrites.size()) ? Rewrites[Var].Defines.count(BB) : false; +} + +// Compute value at the given block BB. We either should already know it, or we +// should be able to recursively reach it going up dominator tree. +Value *SSAUpdaterBulk::computeValueAt(BasicBlock *BB, RewriteInfo &R, +                                      DominatorTree *DT) { +  if (!R.Defines.count(BB)) { +    if (DT->isReachableFromEntry(BB) && PredCache.get(BB).size()) { +      BasicBlock *IDom = DT->getNode(BB)->getIDom()->getBlock(); +      Value *V = computeValueAt(IDom, R, DT); +      R.Defines[BB] = V; +    } else +      R.Defines[BB] = UndefValue::get(R.Ty); +  } +  return R.Defines[BB]; +} + +/// Given sets of UsingBlocks and DefBlocks, compute the set of LiveInBlocks. +/// This is basically a subgraph limited by DefBlocks and UsingBlocks. +static void +ComputeLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &UsingBlocks, +                    const SmallPtrSetImpl<BasicBlock *> &DefBlocks, +                    SmallPtrSetImpl<BasicBlock *> &LiveInBlocks, +                    PredIteratorCache &PredCache) { +  // To determine liveness, we must iterate through the predecessors of blocks +  // where the def is live.  Blocks are added to the worklist if we need to +  // check their predecessors.  Start with all the using blocks. +  SmallVector<BasicBlock *, 64> LiveInBlockWorklist(UsingBlocks.begin(), +                                                    UsingBlocks.end()); + +  // Now that we have a set of blocks where the phi is live-in, recursively add +  // their predecessors until we find the full region the value is live. +  while (!LiveInBlockWorklist.empty()) { +    BasicBlock *BB = LiveInBlockWorklist.pop_back_val(); + +    // The block really is live in here, insert it into the set.  If already in +    // the set, then it has already been processed. +    if (!LiveInBlocks.insert(BB).second) +      continue; + +    // Since the value is live into BB, it is either defined in a predecessor or +    // live into it to.  Add the preds to the worklist unless they are a +    // defining block. +    for (BasicBlock *P : PredCache.get(BB)) { +      // The value is not live into a predecessor if it defines the value. +      if (DefBlocks.count(P)) +        continue; + +      // Otherwise it is, add to the worklist. +      LiveInBlockWorklist.push_back(P); +    } +  } +} + +/// Perform all the necessary updates, including new PHI-nodes insertion and the +/// requested uses update. +void SSAUpdaterBulk::RewriteAllUses(DominatorTree *DT, +                                    SmallVectorImpl<PHINode *> *InsertedPHIs) { +  for (auto &R : Rewrites) { +    // Compute locations for new phi-nodes. +    // For that we need to initialize DefBlocks from definitions in R.Defines, +    // UsingBlocks from uses in R.Uses, then compute LiveInBlocks, and then use +    // this set for computing iterated dominance frontier (IDF). +    // The IDF blocks are the blocks where we need to insert new phi-nodes. +    ForwardIDFCalculator IDF(*DT); +    LLVM_DEBUG(dbgs() << "SSAUpdater: rewriting " << R.Uses.size() +                      << " use(s)\n"); + +    SmallPtrSet<BasicBlock *, 2> DefBlocks; +    for (auto &Def : R.Defines) +      DefBlocks.insert(Def.first); +    IDF.setDefiningBlocks(DefBlocks); + +    SmallPtrSet<BasicBlock *, 2> UsingBlocks; +    for (Use *U : R.Uses) +      UsingBlocks.insert(getUserBB(U)); + +    SmallVector<BasicBlock *, 32> IDFBlocks; +    SmallPtrSet<BasicBlock *, 32> LiveInBlocks; +    ComputeLiveInBlocks(UsingBlocks, DefBlocks, LiveInBlocks, PredCache); +    IDF.resetLiveInBlocks(); +    IDF.setLiveInBlocks(LiveInBlocks); +    IDF.calculate(IDFBlocks); + +    // We've computed IDF, now insert new phi-nodes there. +    SmallVector<PHINode *, 4> InsertedPHIsForVar; +    for (auto *FrontierBB : IDFBlocks) { +      IRBuilder<> B(FrontierBB, FrontierBB->begin()); +      PHINode *PN = B.CreatePHI(R.Ty, 0, R.Name); +      R.Defines[FrontierBB] = PN; +      InsertedPHIsForVar.push_back(PN); +      if (InsertedPHIs) +        InsertedPHIs->push_back(PN); +    } + +    // Fill in arguments of the inserted PHIs. +    for (auto *PN : InsertedPHIsForVar) { +      BasicBlock *PBB = PN->getParent(); +      for (BasicBlock *Pred : PredCache.get(PBB)) +        PN->addIncoming(computeValueAt(Pred, R, DT), Pred); +    } + +    // Rewrite actual uses with the inserted definitions. +    SmallPtrSet<Use *, 4> ProcessedUses; +    for (Use *U : R.Uses) { +      if (!ProcessedUses.insert(U).second) +        continue; +      Value *V = computeValueAt(getUserBB(U), R, DT); +      Value *OldVal = U->get(); +      assert(OldVal && "Invalid use!"); +      // Notify that users of the existing value that it is being replaced. +      if (OldVal != V && OldVal->hasValueHandle()) +        ValueHandleBase::ValueIsRAUWd(OldVal, V); +      LLVM_DEBUG(dbgs() << "SSAUpdater: replacing " << *OldVal << " with " << *V +                        << "\n"); +      U->set(V); +    } +  } +} diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index e7358dbcb624..c87b5c16ffce 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -19,7 +19,6 @@  #include "llvm/ADT/SetOperations.h"  #include "llvm/ADT/SetVector.h"  #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h"  #include "llvm/ADT/SmallVector.h"  #include "llvm/ADT/Statistic.h"  #include "llvm/ADT/StringRef.h" @@ -28,6 +27,7 @@  #include "llvm/Analysis/EHPersonalities.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/Attributes.h"  #include "llvm/IR/BasicBlock.h" @@ -66,7 +66,6 @@  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Transforms/Utils/ValueMapper.h"  #include <algorithm>  #include <cassert> @@ -283,12 +282,8 @@ isProfitableToFoldUnconditional(BranchInst *SI1, BranchInst *SI2,  /// of Succ.  static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,                                    BasicBlock *ExistPred) { -  if (!isa<PHINode>(Succ->begin())) -    return; // Quick exit if nothing to do - -  PHINode *PN; -  for (BasicBlock::iterator I = Succ->begin(); (PN = dyn_cast<PHINode>(I)); ++I) -    PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred); +  for (PHINode &PN : Succ->phis()) +    PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);  }  /// Compute an abstract "cost" of speculating the given instruction, @@ -692,9 +687,7 @@ Value *SimplifyCFGOpt::isValueEqualityComparison(TerminatorInst *TI) {    if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {      // Do not permit merging of large switch instructions into their      // predecessors unless there is only one predecessor. -    if (SI->getNumSuccessors() * std::distance(pred_begin(SI->getParent()), -                                               pred_end(SI->getParent())) <= -        128) +    if (SI->getNumSuccessors() * pred_size(SI->getParent()) <= 128)        CV = SI->getCondition();    } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))      if (BI->isConditional() && BI->getCondition()->hasOneUse()) @@ -851,9 +844,9 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(        // Remove PHI node entries for the dead edge.        ThisCases[0].Dest->removePredecessor(TI->getParent()); -      DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() -                   << "Through successor TI: " << *TI << "Leaving: " << *NI -                   << "\n"); +      LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() +                        << "Through successor TI: " << *TI << "Leaving: " << *NI +                        << "\n");        EraseTerminatorInstAndDCECond(TI);        return true; @@ -865,8 +858,8 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(      for (unsigned i = 0, e = PredCases.size(); i != e; ++i)        DeadCases.insert(PredCases[i].Value); -    DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() -                 << "Through successor TI: " << *TI); +    LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() +                      << "Through successor TI: " << *TI);      // Collect branch weights into a vector.      SmallVector<uint32_t, 8> Weights; @@ -892,7 +885,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(      if (HasWeight && Weights.size() >= 2)        setBranchWeights(SI, Weights); -    DEBUG(dbgs() << "Leaving: " << *TI << "\n"); +    LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n");      return true;    } @@ -933,9 +926,9 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(    Instruction *NI = Builder.CreateBr(TheRealDest);    (void)NI; -  DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() -               << "Through successor TI: " << *TI << "Leaving: " << *NI -               << "\n"); +  LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator() +                    << "Through successor TI: " << *TI << "Leaving: " << *NI +                    << "\n");    EraseTerminatorInstAndDCECond(TI);    return true; @@ -1228,11 +1221,9 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,  static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,                                  Instruction *I1, Instruction *I2) {    for (BasicBlock *Succ : successors(BB1)) { -    PHINode *PN; -    for (BasicBlock::iterator BBI = Succ->begin(); -         (PN = dyn_cast<PHINode>(BBI)); ++BBI) { -      Value *BB1V = PN->getIncomingValueForBlock(BB1); -      Value *BB2V = PN->getIncomingValueForBlock(BB2); +    for (const PHINode &PN : Succ->phis()) { +      Value *BB1V = PN.getIncomingValueForBlock(BB1); +      Value *BB2V = PN.getIncomingValueForBlock(BB2);        if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {          return false;        } @@ -1282,34 +1273,58 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,      if (isa<TerminatorInst>(I1))        goto HoistTerminator; +    // If we're going to hoist a call, make sure that the two instructions we're +    // commoning/hoisting are both marked with musttail, or neither of them is +    // marked as such. Otherwise, we might end up in a situation where we hoist +    // from a block where the terminator is a `ret` to a block where the terminator +    // is a `br`, and `musttail` calls expect to be followed by a return. +    auto *C1 = dyn_cast<CallInst>(I1); +    auto *C2 = dyn_cast<CallInst>(I2); +    if (C1 && C2) +      if (C1->isMustTailCall() != C2->isMustTailCall()) +        return Changed; +      if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))        return Changed; -    // For a normal instruction, we just move one to right before the branch, -    // then replace all uses of the other with the first.  Finally, we remove -    // the now redundant second instruction. -    BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1); -    if (!I2->use_empty()) -      I2->replaceAllUsesWith(I1); -    I1->andIRFlags(I2); -    unsigned KnownIDs[] = {LLVMContext::MD_tbaa, -                           LLVMContext::MD_range, -                           LLVMContext::MD_fpmath, -                           LLVMContext::MD_invariant_load, -                           LLVMContext::MD_nonnull, -                           LLVMContext::MD_invariant_group, -                           LLVMContext::MD_align, -                           LLVMContext::MD_dereferenceable, -                           LLVMContext::MD_dereferenceable_or_null, -                           LLVMContext::MD_mem_parallel_loop_access}; -    combineMetadata(I1, I2, KnownIDs); - -    // I1 and I2 are being combined into a single instruction.  Its debug -    // location is the merged locations of the original instructions. -    I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); - -    I2->eraseFromParent(); -    Changed = true; +    if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) { +      assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2)); +      // The debug location is an integral part of a debug info intrinsic +      // and can't be separated from it or replaced.  Instead of attempting +      // to merge locations, simply hoist both copies of the intrinsic. +      BIParent->getInstList().splice(BI->getIterator(), +                                     BB1->getInstList(), I1); +      BIParent->getInstList().splice(BI->getIterator(), +                                     BB2->getInstList(), I2); +      Changed = true; +    } else { +      // For a normal instruction, we just move one to right before the branch, +      // then replace all uses of the other with the first.  Finally, we remove +      // the now redundant second instruction. +      BIParent->getInstList().splice(BI->getIterator(), +                                     BB1->getInstList(), I1); +      if (!I2->use_empty()) +        I2->replaceAllUsesWith(I1); +      I1->andIRFlags(I2); +      unsigned KnownIDs[] = {LLVMContext::MD_tbaa, +                             LLVMContext::MD_range, +                             LLVMContext::MD_fpmath, +                             LLVMContext::MD_invariant_load, +                             LLVMContext::MD_nonnull, +                             LLVMContext::MD_invariant_group, +                             LLVMContext::MD_align, +                             LLVMContext::MD_dereferenceable, +                             LLVMContext::MD_dereferenceable_or_null, +                             LLVMContext::MD_mem_parallel_loop_access}; +      combineMetadata(I1, I2, KnownIDs); + +      // I1 and I2 are being combined into a single instruction.  Its debug +      // location is the merged locations of the original instructions. +      I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()); + +      I2->eraseFromParent(); +      Changed = true; +    }      I1 = &*BB1_Itr++;      I2 = &*BB2_Itr++; @@ -1332,18 +1347,16 @@ HoistTerminator:      return Changed;    for (BasicBlock *Succ : successors(BB1)) { -    PHINode *PN; -    for (BasicBlock::iterator BBI = Succ->begin(); -         (PN = dyn_cast<PHINode>(BBI)); ++BBI) { -      Value *BB1V = PN->getIncomingValueForBlock(BB1); -      Value *BB2V = PN->getIncomingValueForBlock(BB2); +    for (PHINode &PN : Succ->phis()) { +      Value *BB1V = PN.getIncomingValueForBlock(BB1); +      Value *BB2V = PN.getIncomingValueForBlock(BB2);        if (BB1V == BB2V)          continue;        // Check for passingValueIsAlwaysUndefined here because we would rather        // eliminate undefined control flow then converting it to a select. -      if (passingValueIsAlwaysUndefined(BB1V, PN) || -          passingValueIsAlwaysUndefined(BB2V, PN)) +      if (passingValueIsAlwaysUndefined(BB1V, &PN) || +          passingValueIsAlwaysUndefined(BB2V, &PN))          return Changed;        if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) @@ -1369,11 +1382,9 @@ HoistTerminator:    // nodes, so we insert select instruction to compute the final result.    std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;    for (BasicBlock *Succ : successors(BB1)) { -    PHINode *PN; -    for (BasicBlock::iterator BBI = Succ->begin(); -         (PN = dyn_cast<PHINode>(BBI)); ++BBI) { -      Value *BB1V = PN->getIncomingValueForBlock(BB1); -      Value *BB2V = PN->getIncomingValueForBlock(BB2); +    for (PHINode &PN : Succ->phis()) { +      Value *BB1V = PN.getIncomingValueForBlock(BB1); +      Value *BB2V = PN.getIncomingValueForBlock(BB2);        if (BB1V == BB2V)          continue; @@ -1386,9 +1397,9 @@ HoistTerminator:                                   BB1V->getName() + "." + BB2V->getName(), BI));        // Make the PHI node use the select for all incoming values for BB1/BB2 -      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) -        if (PN->getIncomingBlock(i) == BB1 || PN->getIncomingBlock(i) == BB2) -          PN->setIncomingValue(i, SI); +      for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) +        if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2) +          PN.setIncomingValue(i, SI);      }    } @@ -1727,7 +1738,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {    LockstepReverseIterator LRI(UnconditionalPreds);    while (LRI.isValid() &&           canSinkInstructions(*LRI, PHIOperands)) { -    DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n"); +    LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] +                      << "\n");      InstructionsToSink.insert((*LRI).begin(), (*LRI).end());      ++ScanIdx;      --LRI; @@ -1739,7 +1751,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {        for (auto *V : PHIOperands[I])          if (InstructionsToSink.count(V) == 0)            ++NumPHIdValues; -    DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n"); +    LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");      unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();      if ((NumPHIdValues % UnconditionalPreds.size()) != 0)          NumPHIInsts++; @@ -1767,7 +1779,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {      if (!Profitable)        return false; -    DEBUG(dbgs() << "SINK: Splitting edge\n"); +    LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n");      // We have a conditional edge and we're going to sink some instructions.      // Insert a new block postdominating all blocks we're going to sink from.      if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split")) @@ -1789,16 +1801,17 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {    // and never actually sink it which means we produce more PHIs than intended.    // This is unlikely in practice though.    for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) { -    DEBUG(dbgs() << "SINK: Sink: " -                 << *UnconditionalPreds[0]->getTerminator()->getPrevNode() -                 << "\n"); +    LLVM_DEBUG(dbgs() << "SINK: Sink: " +                      << *UnconditionalPreds[0]->getTerminator()->getPrevNode() +                      << "\n");      // Because we've sunk every instruction in turn, the current instruction to      // sink is always at index 0.      LRI.reset();      if (!ProfitableToSinkInstruction(LRI)) {        // Too many PHIs would be created. -      DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n"); +      LLVM_DEBUG( +          dbgs() << "SINK: stopping here, too many PHIs would be created!\n");        break;      } @@ -1810,7 +1823,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {    return Changed;  } -/// \brief Determine if we can hoist sink a sole store instruction out of a +/// Determine if we can hoist sink a sole store instruction out of a  /// conditional block.  ///  /// We are looking for code like the following: @@ -1850,12 +1863,9 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,    // Look for a store to the same pointer in BrBB.    unsigned MaxNumInstToLookAt = 9; -  for (Instruction &CurI : reverse(*BrBB)) { +  for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug())) {      if (!MaxNumInstToLookAt)        break; -    // Skip debug info. -    if (isa<DbgInfoIntrinsic>(CurI)) -      continue;      --MaxNumInstToLookAt;      // Could be calling an instruction that affects memory like free(). @@ -1874,7 +1884,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,    return nullptr;  } -/// \brief Speculate a conditional basic block flattening the CFG. +/// Speculate a conditional basic block flattening the CFG.  ///  /// Note that this is a very risky transform currently. Speculating  /// instructions like this is most often not desirable. Instead, there is an MI @@ -1999,10 +2009,9 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,    // Check that the PHI nodes can be converted to selects.    bool HaveRewritablePHIs = false; -  for (BasicBlock::iterator I = EndBB->begin(); -       PHINode *PN = dyn_cast<PHINode>(I); ++I) { -    Value *OrigV = PN->getIncomingValueForBlock(BB); -    Value *ThenV = PN->getIncomingValueForBlock(ThenBB); +  for (PHINode &PN : EndBB->phis()) { +    Value *OrigV = PN.getIncomingValueForBlock(BB); +    Value *ThenV = PN.getIncomingValueForBlock(ThenBB);      // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.      // Skip PHIs which are trivial. @@ -2010,8 +2019,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,        continue;      // Don't convert to selects if we could remove undefined behavior instead. -    if (passingValueIsAlwaysUndefined(OrigV, PN) || -        passingValueIsAlwaysUndefined(ThenV, PN)) +    if (passingValueIsAlwaysUndefined(OrigV, &PN) || +        passingValueIsAlwaysUndefined(ThenV, &PN))        return false;      HaveRewritablePHIs = true; @@ -2045,7 +2054,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,      return false;    // If we get here, we can hoist the instruction and if-convert. -  DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";); +  LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";);    // Insert a select of the value of the speculated store.    if (SpeculatedStoreValue) { @@ -2072,12 +2081,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,    // Insert selects and rewrite the PHI operands.    IRBuilder<NoFolder> Builder(BI); -  for (BasicBlock::iterator I = EndBB->begin(); -       PHINode *PN = dyn_cast<PHINode>(I); ++I) { -    unsigned OrigI = PN->getBasicBlockIndex(BB); -    unsigned ThenI = PN->getBasicBlockIndex(ThenBB); -    Value *OrigV = PN->getIncomingValue(OrigI); -    Value *ThenV = PN->getIncomingValue(ThenI); +  for (PHINode &PN : EndBB->phis()) { +    unsigned OrigI = PN.getBasicBlockIndex(BB); +    unsigned ThenI = PN.getBasicBlockIndex(ThenBB); +    Value *OrigV = PN.getIncomingValue(OrigI); +    Value *ThenV = PN.getIncomingValue(ThenI);      // Skip PHIs which are trivial.      if (OrigV == ThenV) @@ -2091,8 +2099,8 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,        std::swap(TrueV, FalseV);      Value *V = Builder.CreateSelect(          BrCond, TrueV, FalseV, "spec.select", BI); -    PN->setIncomingValue(OrigI, V); -    PN->setIncomingValue(ThenI, V); +    PN.setIncomingValue(OrigI, V); +    PN.setIncomingValue(ThenI, V);    }    // Remove speculated dbg intrinsics. @@ -2107,19 +2115,16 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,  /// Return true if we can thread a branch across this block.  static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) { -  BranchInst *BI = cast<BranchInst>(BB->getTerminator());    unsigned Size = 0; -  for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) { -    if (isa<DbgInfoIntrinsic>(BBI)) -      continue; +  for (Instruction &I : BB->instructionsWithoutDebug()) {      if (Size > 10)        return false; // Don't clone large BB's.      ++Size;      // We can only support instructions that do not define values that are      // live outside of the current basic block. -    for (User *U : BBI->users()) { +    for (User *U : I.users()) {        Instruction *UI = cast<Instruction>(U);        if (UI->getParent() != BB || isa<PHINode>(UI))          return false; @@ -2261,6 +2266,10 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,    // dependence information for this check, but simplifycfg can't keep it up    // to date, and this catches most of the cases we care about anyway.    BasicBlock *BB = PN->getParent(); +  const Function *Fn = BB->getParent(); +  if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing)) +    return false; +    BasicBlock *IfTrue, *IfFalse;    Value *IfCond = GetIfCondition(BB, IfTrue, IfFalse);    if (!IfCond || @@ -2351,8 +2360,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,        }    } -  DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond << "  T: " -               << IfTrue->getName() << "  F: " << IfFalse->getName() << "\n"); +  LLVM_DEBUG(dbgs() << "FOUND IF CONDITION!  " << *IfCond +                    << "  T: " << IfTrue->getName() +                    << "  F: " << IfFalse->getName() << "\n");    // If we can still promote the PHI nodes after this gauntlet of tests,    // do all of the PHI's now. @@ -2476,9 +2486,9 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,    (void)RI; -  DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" -               << "\n  " << *BI << "NewRet = " << *RI -               << "TRUEBLOCK: " << *TrueSucc << "FALSEBLOCK: " << *FalseSucc); +  LLVM_DEBUG(dbgs() << "\nCHANGING BRANCH TO TWO RETURNS INTO SELECT:" +                    << "\n  " << *BI << "NewRet = " << *RI << "TRUEBLOCK: " +                    << *TrueSucc << "FALSEBLOCK: " << *FalseSucc);    EraseTerminatorInstAndDCECond(BI); @@ -2487,7 +2497,7 @@ static bool SimplifyCondBranchToTwoReturns(BranchInst *BI,  /// Return true if the given instruction is available  /// in its predecessor block. If yes, the instruction will be removed. -static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { +static bool tryCSEWithPredecessor(Instruction *Inst, BasicBlock *PB) {    if (!isa<BinaryOperator>(Inst) && !isa<CmpInst>(Inst))      return false;    for (Instruction &I : *PB) { @@ -2544,14 +2554,16 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {          if (PBI->isConditional() &&              (BI->getSuccessor(0) == PBI->getSuccessor(0) ||               BI->getSuccessor(0) == PBI->getSuccessor(1))) { -          for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { +          for (auto I = BB->instructionsWithoutDebug().begin(), +                    E = BB->instructionsWithoutDebug().end(); +               I != E;) {              Instruction *Curr = &*I++;              if (isa<CmpInst>(Curr)) {                Cond = Curr;                break;              }              // Quit if we can't remove this instruction. -            if (!checkCSEInPredecessor(Curr, PB)) +            if (!tryCSEWithPredecessor(Curr, PB))                return false;            }          } @@ -2651,7 +2663,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {          continue;      } -    DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB); +    LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);      IRBuilder<> Builder(PBI);      // If we need to invert the condition in the pred block to match, do so now. @@ -2861,7 +2873,7 @@ static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,        if (!AlternativeV)          break; -      assert(std::distance(pred_begin(Succ), pred_end(Succ)) == 2); +      assert(pred_size(Succ) == 2);        auto PredI = pred_begin(Succ);        BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;        if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV) @@ -2904,14 +2916,13 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,      // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to      // thread this store.      unsigned N = 0; -    for (auto &I : *BB) { +    for (auto &I : BB->instructionsWithoutDebug()) {        // Cheap instructions viable for folding.        if (isa<BinaryOperator>(I) || isa<GetElementPtrInst>(I) ||            isa<StoreInst>(I))          ++N;        // Free instructions. -      else if (isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || -               IsaBitcastOfPointerType(I)) +      else if (isa<TerminatorInst>(I) || IsaBitcastOfPointerType(I))          continue;        else          return false; @@ -2966,6 +2977,21 @@ static bool mergeConditionalStoreToAddress(BasicBlock *PTB, BasicBlock *PFB,      if (&*I != PStore && I->mayReadOrWriteMemory())        return false; +  // If PostBB has more than two predecessors, we need to split it so we can +  // sink the store. +  if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) { +    // We know that QFB's only successor is PostBB. And QFB has a single +    // predecessor. If QTB exists, then its only successor is also PostBB. +    // If QTB does not exist, then QFB's only predecessor has a conditional +    // branch to QFB and PostBB. +    BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor(); +    BasicBlock *NewBB = SplitBlockPredecessors(PostBB, { QFB, TruePred}, +                                               "condstore.split"); +    if (!NewBB) +      return false; +    PostBB = NewBB; +  } +    // OK, we're going to sink the stores to PostBB. The store has to be    // conditional though, so first create the predicate.    Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator()) @@ -3101,7 +3127,7 @@ static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,    if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||        (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))      return false; -  if (!PostBB->hasNUses(2) || !QBI->getParent()->hasNUses(2)) +  if (!QBI->getParent()->hasNUses(2))      return false;    // OK, this is a sequence of two diamonds or triangles. @@ -3201,11 +3227,9 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,    // If this is a conditional branch in an empty block, and if any    // predecessors are a conditional branch to one of our destinations,    // fold the conditions into logical ops and one cond br. -  BasicBlock::iterator BBI = BB->begin(); +    // Ignore dbg intrinsics. -  while (isa<DbgInfoIntrinsic>(BBI)) -    ++BBI; -  if (&*BBI != BI) +  if (&*BB->instructionsWithoutDebug().begin() != BI)      return false;    int PBIOp, BIOp; @@ -3262,8 +3286,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,    // Finally, if everything is ok, fold the branches to logical ops.    BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); -  DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() -               << "AND: " << *BI->getParent()); +  LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() +                    << "AND: " << *BI->getParent());    // If OtherDest *is* BB, then BB is a basic block with a single conditional    // branch in it, where one edge (OtherDest) goes back to itself but the other @@ -3281,7 +3305,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,      OtherDest = InfLoopBlock;    } -  DEBUG(dbgs() << *PBI->getParent()->getParent()); +  LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());    // BI may have other predecessors.  Because of this, we leave    // it alone, but modify PBI. @@ -3335,17 +3359,15 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,    // it.  If it has PHIs though, the PHIs may have different    // entries for BB and PBI's BB.  If so, insert a select to make    // them agree. -  PHINode *PN; -  for (BasicBlock::iterator II = CommonDest->begin(); -       (PN = dyn_cast<PHINode>(II)); ++II) { -    Value *BIV = PN->getIncomingValueForBlock(BB); -    unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); -    Value *PBIV = PN->getIncomingValue(PBBIdx); +  for (PHINode &PN : CommonDest->phis()) { +    Value *BIV = PN.getIncomingValueForBlock(BB); +    unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent()); +    Value *PBIV = PN.getIncomingValue(PBBIdx);      if (BIV != PBIV) {        // Insert a select in PBI to pick the right value.        SelectInst *NV = cast<SelectInst>(            Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux")); -      PN->setIncomingValue(PBBIdx, NV); +      PN.setIncomingValue(PBBIdx, NV);        // Although the select has the same condition as PBI, the original branch        // weights for PBI do not apply to the new select because the select's        // 'logical' edges are incoming edges of the phi that is eliminated, not @@ -3367,8 +3389,8 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,      }    } -  DEBUG(dbgs() << "INTO: " << *PBI->getParent()); -  DEBUG(dbgs() << *PBI->getParent()->getParent()); +  LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent()); +  LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent());    // This basic block is probably dead.  We know it has at least    // one fewer predecessor. @@ -3668,9 +3690,9 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,    BasicBlock *BB = BI->getParent(); -  DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() -               << " cases into SWITCH.  BB is:\n" -               << *BB); +  LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size() +                    << " cases into SWITCH.  BB is:\n" +                    << *BB);    // If there are any extra values that couldn't be folded into the switch    // then we evaluate them with an explicit branch first.  Split the block @@ -3693,8 +3715,8 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,      // for the edge we just added.      AddPredecessorToBlock(EdgeBB, BB, NewBB); -    DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase -                 << "\nEXTRABB = " << *BB); +    LLVM_DEBUG(dbgs() << "  ** 'icmp' chain unhandled condition: " << *ExtraCase +                      << "\nEXTRABB = " << *BB);      BB = NewBB;    } @@ -3725,7 +3747,7 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,    // Erase the old branch instruction.    EraseTerminatorInstAndDCECond(BI); -  DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n'); +  LLVM_DEBUG(dbgs() << "  ** 'icmp' chain result is:\n" << *BB << '\n');    return true;  } @@ -3876,6 +3898,7 @@ static bool removeEmptyCleanup(CleanupReturnInst *RI) {      switch (IntrinsicID) {      case Intrinsic::dbg_declare:      case Intrinsic::dbg_value: +    case Intrinsic::dbg_label:      case Intrinsic::lifetime_end:        break;      default: @@ -4052,8 +4075,8 @@ bool SimplifyCFGOpt::SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder) {    if (!UncondBranchPreds.empty() && DupRet) {      while (!UncondBranchPreds.empty()) {        BasicBlock *Pred = UncondBranchPreds.pop_back_val(); -      DEBUG(dbgs() << "FOLDING: " << *BB -                   << "INTO UNCOND BRANCH PRED: " << *Pred); +      LLVM_DEBUG(dbgs() << "FOLDING: " << *BB +                        << "INTO UNCOND BRANCH PRED: " << *Pred);        (void)FoldReturnIntoUncondBranch(RI, BB, Pred);      } @@ -4377,7 +4400,8 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,      if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||          (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {        DeadCases.push_back(Case.getCaseValue()); -      DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal << " is dead.\n"); +      LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseVal +                        << " is dead.\n");      }    } @@ -4393,7 +4417,7 @@ static bool eliminateDeadSwitchCases(SwitchInst *SI, AssumptionCache *AC,    if (HasDefault && DeadCases.empty() &&        NumUnknownBits < 64 /* avoid overflow */ &&        SI->getNumCases() == (1ULL << NumUnknownBits)) { -    DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n"); +    LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n");      BasicBlock *NewDefault =          SplitBlockPredecessors(SI->getDefaultDest(), SI->getParent(), "");      SI->setDefaultDest(&*NewDefault); @@ -4451,17 +4475,16 @@ static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,    BasicBlock *Succ = Branch->getSuccessor(0); -  BasicBlock::iterator I = Succ->begin(); -  while (PHINode *PHI = dyn_cast<PHINode>(I++)) { -    int Idx = PHI->getBasicBlockIndex(BB); +  for (PHINode &PHI : Succ->phis()) { +    int Idx = PHI.getBasicBlockIndex(BB);      assert(Idx >= 0 && "PHI has no entry for predecessor?"); -    Value *InValue = PHI->getIncomingValue(Idx); +    Value *InValue = PHI.getIncomingValue(Idx);      if (InValue != CaseValue)        continue;      *PhiIndex = Idx; -    return PHI; +    return &PHI;    }    return nullptr; @@ -4491,19 +4514,16 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {      // -->      //     %r = phi i32 ... [ %x, %switchbb ] ... -    for (Instruction &InstInCaseDest : *CaseDest) { -      auto *Phi = dyn_cast<PHINode>(&InstInCaseDest); -      if (!Phi) break; - +    for (PHINode &Phi : CaseDest->phis()) {        // This only works if there is exactly 1 incoming edge from the switch to        // a phi. If there is >1, that means multiple cases of the switch map to 1        // value in the phi, and that phi value is not the switch condition. Thus,        // this transform would not make sense (the phi would be invalid because        // a phi can't have different incoming values from the same block). -      int SwitchBBIdx = Phi->getBasicBlockIndex(SwitchBlock); -      if (Phi->getIncomingValue(SwitchBBIdx) == CaseValue && -          count(Phi->blocks(), SwitchBlock) == 1) { -        Phi->setIncomingValue(SwitchBBIdx, SI->getCondition()); +      int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock); +      if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue && +          count(Phi.blocks(), SwitchBlock) == 1) { +        Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());          Changed = true;        }      } @@ -4614,24 +4634,20 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,    // which we can constant-propagate the CaseVal, continue to its successor.    SmallDenseMap<Value *, Constant *> ConstantPool;    ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal)); -  for (BasicBlock::iterator I = CaseDest->begin(), E = CaseDest->end(); I != E; -       ++I) { -    if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) { +  for (Instruction &I :CaseDest->instructionsWithoutDebug()) { +    if (TerminatorInst *T = dyn_cast<TerminatorInst>(&I)) {        // If the terminator is a simple branch, continue to the next block.        if (T->getNumSuccessors() != 1 || T->isExceptional())          return false;        Pred = CaseDest;        CaseDest = T->getSuccessor(0); -    } else if (isa<DbgInfoIntrinsic>(I)) { -      // Skip debug intrinsic. -      continue; -    } else if (Constant *C = ConstantFold(&*I, DL, ConstantPool)) { +    } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {        // Instruction is side-effect free and constant.        // If the instruction has uses outside this block or a phi node slot for        // the block, it is not safe to bypass the instruction since it would then        // no longer dominate all its uses. -      for (auto &Use : I->uses()) { +      for (auto &Use : I.uses()) {          User *User = Use.getUser();          if (Instruction *I = dyn_cast<Instruction>(User))            if (I->getParent() == CaseDest) @@ -4642,7 +4658,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,          return false;        } -      ConstantPool.insert(std::make_pair(&*I, C)); +      ConstantPool.insert(std::make_pair(&I, C));      } else {        break;      } @@ -4656,14 +4672,13 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,      return false;    // Get the values for this case from phi nodes in the destination block. -  BasicBlock::iterator I = (*CommonDest)->begin(); -  while (PHINode *PHI = dyn_cast<PHINode>(I++)) { -    int Idx = PHI->getBasicBlockIndex(Pred); +  for (PHINode &PHI : (*CommonDest)->phis()) { +    int Idx = PHI.getBasicBlockIndex(Pred);      if (Idx == -1)        continue;      Constant *ConstVal = -        LookupConstant(PHI->getIncomingValue(Idx), ConstantPool); +        LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);      if (!ConstVal)        return false; @@ -4671,37 +4686,38 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,      if (!ValidLookupTableConstant(ConstVal, TTI))        return false; -    Res.push_back(std::make_pair(PHI, ConstVal)); +    Res.push_back(std::make_pair(&PHI, ConstVal));    }    return Res.size() > 0;  }  // Helper function used to add CaseVal to the list of cases that generate -// Result. -static void MapCaseToResult(ConstantInt *CaseVal, -                            SwitchCaseResultVectorTy &UniqueResults, -                            Constant *Result) { +// Result. Returns the updated number of cases that generate this result. +static uintptr_t MapCaseToResult(ConstantInt *CaseVal, +                                 SwitchCaseResultVectorTy &UniqueResults, +                                 Constant *Result) {    for (auto &I : UniqueResults) {      if (I.first == Result) {        I.second.push_back(CaseVal); -      return; +      return I.second.size();      }    }    UniqueResults.push_back(        std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal))); +  return 1;  }  // Helper function that initializes a map containing  // results for the PHI node of the common destination block for a switch  // instruction. Returns false if multiple PHI nodes have been found or if  // there is not a common destination block for the switch. -static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, -                                  BasicBlock *&CommonDest, -                                  SwitchCaseResultVectorTy &UniqueResults, -                                  Constant *&DefaultResult, -                                  const DataLayout &DL, -                                  const TargetTransformInfo &TTI) { +static bool +InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest, +                      SwitchCaseResultVectorTy &UniqueResults, +                      Constant *&DefaultResult, const DataLayout &DL, +                      const TargetTransformInfo &TTI, +                      uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) {    for (auto &I : SI->cases()) {      ConstantInt *CaseVal = I.getCaseValue(); @@ -4711,10 +4727,21 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,                          DL, TTI))        return false; -    // Only one value per case is permitted +    // Only one value per case is permitted.      if (Results.size() > 1)        return false; -    MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + +    // Add the case->result mapping to UniqueResults. +    const uintptr_t NumCasesForResult = +        MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second); + +    // Early out if there are too many cases for this result. +    if (NumCasesForResult > MaxCasesPerResult) +      return false; + +    // Early out if there are too many unique results. +    if (UniqueResults.size() > MaxUniqueResults) +      return false;      // Check the PHI consistency.      if (!PHI) @@ -4814,7 +4841,7 @@ static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,    SwitchCaseResultVectorTy UniqueResults;    // Collect all the cases that will deliver the same value from the switch.    if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult, -                             DL, TTI)) +                             DL, TTI, 2, 1))      return false;    // Selects choose between maximum two values.    if (UniqueResults.size() != 2) @@ -5392,8 +5419,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,    }    bool ReturnedEarly = false; -  for (size_t I = 0, E = PHIs.size(); I != E; ++I) { -    PHINode *PHI = PHIs[I]; +  for (PHINode *PHI : PHIs) {      const ResultListTy &ResultList = ResultLists[PHI];      // If using a bitmask, use any value to fill the lookup table holes. @@ -5483,7 +5509,7 @@ static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,    SmallVector<int64_t,4> Values;    for (auto &C : SI->cases())      Values.push_back(C.getCaseValue()->getValue().getSExtValue()); -  std::sort(Values.begin(), Values.end()); +  llvm::sort(Values.begin(), Values.end());    // If the switch is already dense, there's nothing useful to do here.    if (isSwitchDense(Values)) @@ -5566,11 +5592,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {      // If the block only contains the switch, see if we can fold the block      // away into any preds. -    BasicBlock::iterator BBI = BB->begin(); -    // Ignore dbg intrinsics. -    while (isa<DbgInfoIntrinsic>(BBI)) -      ++BBI; -    if (SI == &*BBI) +    if (SI == &*BB->instructionsWithoutDebug().begin())        if (FoldValueComparisonIntoPredecessors(SI, Builder))          return simplifyCFG(BB, TTI, Options) | true;    } @@ -5657,7 +5679,7 @@ bool SimplifyCFGOpt::SimplifyIndirectBr(IndirectBrInst *IBI) {  /// any transform which might inhibit optimization (such as our ability to  /// specialize a particular handler via tail commoning).  We do this by not  /// merging any blocks which require us to introduce a phi.  Since the same -/// values are flowing through both blocks, we don't loose any ability to +/// values are flowing through both blocks, we don't lose any ability to  /// specialize.  If anything, we make such specialization more likely.  ///  /// TODO - This transformation could remove entries from a phi in the target @@ -5687,7 +5709,7 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,      // We've found an identical block.  Update our predecessors to take that      // path instead and make ourselves dead. -    SmallSet<BasicBlock *, 16> Preds; +    SmallPtrSet<BasicBlock *, 16> Preds;      Preds.insert(pred_begin(BB), pred_end(BB));      for (BasicBlock *Pred : Preds) {        InvokeInst *II = cast<InvokeInst>(Pred->getTerminator()); @@ -5705,7 +5727,7 @@ static bool TryToMergeLandingPad(LandingPadInst *LPad, BranchInst *BI,          Inst.eraseFromParent();      } -    SmallSet<BasicBlock *, 16> Succs; +    SmallPtrSet<BasicBlock *, 16> Succs;      Succs.insert(succ_begin(BB), succ_end(BB));      for (BasicBlock *Succ : Succs) {        Succ->removePredecessor(BB); @@ -5729,9 +5751,12 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,    // header. (This is for early invocations before loop simplify and    // vectorization to keep canonical loop forms for nested loops. These blocks    // can be eliminated when the pass is invoked later in the back-end.) +  // Note that if BB has only one predecessor then we do not introduce new +  // backedge, so we can eliminate BB.    bool NeedCanonicalLoop =        Options.NeedCanonicalLoop && -      (LoopHeaders && (LoopHeaders->count(BB) || LoopHeaders->count(Succ))); +      (LoopHeaders && pred_size(BB) > 1 && +       (LoopHeaders->count(BB) || LoopHeaders->count(Succ)));    BasicBlock::iterator I = BB->getFirstNonPHIOrDbg()->getIterator();    if (I->isTerminator() && BB != &BB->getParent()->getEntryBlock() &&        !NeedCanonicalLoop && TryToSimplifyUncondBranchFromEmptyBlock(BB)) @@ -5779,6 +5804,9 @@ static BasicBlock *allPredecessorsComeFromSameSource(BasicBlock *BB) {  bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {    BasicBlock *BB = BI->getParent(); +  const Function *Fn = BB->getParent(); +  if (Fn && Fn->hasFnAttribute(Attribute::OptForFuzzing)) +    return false;    // Conditional branch    if (isValueEqualityComparison(BI)) { @@ -5791,18 +5819,12 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) {      // This block must be empty, except for the setcond inst, if it exists.      // Ignore dbg intrinsics. -    BasicBlock::iterator I = BB->begin(); -    // Ignore dbg intrinsics. -    while (isa<DbgInfoIntrinsic>(I)) -      ++I; +    auto I = BB->instructionsWithoutDebug().begin();      if (&*I == BI) {        if (FoldValueComparisonIntoPredecessors(BI, Builder))          return simplifyCFG(BB, TTI, Options) | true;      } else if (&*I == cast<Instruction>(BI->getCondition())) {        ++I; -      // Ignore dbg intrinsics. -      while (isa<DbgInfoIntrinsic>(I)) -        ++I;        if (&*I == BI && FoldValueComparisonIntoPredecessors(BI, Builder))          return simplifyCFG(BB, TTI, Options) | true;      } @@ -5928,17 +5950,20 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {      // Load from null is undefined.      if (LoadInst *LI = dyn_cast<LoadInst>(Use))        if (!LI->isVolatile()) -        return LI->getPointerAddressSpace() == 0; +        return !NullPointerIsDefined(LI->getFunction(), +                                     LI->getPointerAddressSpace());      // Store to null is undefined.      if (StoreInst *SI = dyn_cast<StoreInst>(Use))        if (!SI->isVolatile()) -        return SI->getPointerAddressSpace() == 0 && +        return (!NullPointerIsDefined(SI->getFunction(), +                                      SI->getPointerAddressSpace())) &&                 SI->getPointerOperand() == I;      // A call to null is undefined.      if (auto CS = CallSite(Use)) -      return CS.getCalledValue() == I; +      return !NullPointerIsDefined(CS->getFunction()) && +             CS.getCalledValue() == I;    }    return false;  } @@ -5946,14 +5971,13 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {  /// If BB has an incoming value that will always trigger undefined behavior  /// (eg. null pointer dereference), remove the branch leading here.  static bool removeUndefIntroducingPredecessor(BasicBlock *BB) { -  for (BasicBlock::iterator i = BB->begin(); -       PHINode *PHI = dyn_cast<PHINode>(i); ++i) -    for (unsigned i = 0, e = PHI->getNumIncomingValues(); i != e; ++i) -      if (passingValueIsAlwaysUndefined(PHI->getIncomingValue(i), PHI)) { -        TerminatorInst *T = PHI->getIncomingBlock(i)->getTerminator(); +  for (PHINode &PHI : BB->phis()) +    for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) +      if (passingValueIsAlwaysUndefined(PHI.getIncomingValue(i), &PHI)) { +        TerminatorInst *T = PHI.getIncomingBlock(i)->getTerminator();          IRBuilder<> Builder(T);          if (BranchInst *BI = dyn_cast<BranchInst>(T)) { -          BB->removePredecessor(PHI->getIncomingBlock(i)); +          BB->removePredecessor(PHI.getIncomingBlock(i));            // Turn uncoditional branches into unreachables and remove the dead            // destination from conditional branches.            if (BI->isUnconditional()) @@ -5980,7 +6004,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {    // or that just have themself as a predecessor.  These are unreachable.    if ((pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) ||        BB->getSinglePredecessor() == BB) { -    DEBUG(dbgs() << "Removing BB: \n" << *BB); +    LLVM_DEBUG(dbgs() << "Removing BB: \n" << *BB);      DeleteDeadBlock(BB);      return true;    } diff --git a/lib/Transforms/Utils/SimplifyIndVar.cpp b/lib/Transforms/Utils/SimplifyIndVar.cpp index ad1faea0a7ae..e381fbc34ab4 100644 --- a/lib/Transforms/Utils/SimplifyIndVar.cpp +++ b/lib/Transforms/Utils/SimplifyIndVar.cpp @@ -26,6 +26,7 @@  #include "llvm/IR/PatternMatch.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/Local.h"  using namespace llvm; @@ -80,6 +81,7 @@ namespace {      bool replaceIVUserWithLoopInvariant(Instruction *UseInst);      bool eliminateOverflowIntrinsic(CallInst *CI); +    bool eliminateTrunc(TruncInst *TI);      bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);      bool makeIVComparisonInvariant(ICmpInst *ICmp, Value *IVOperand);      void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand); @@ -147,8 +149,8 @@ Value *SimplifyIndvar::foldIVUser(Instruction *UseInst, Instruction *IVOperand)    if (SE->getSCEV(UseInst) != FoldedExpr)      return nullptr; -  DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand -        << " -> " << *UseInst << '\n'); +  LLVM_DEBUG(dbgs() << "INDVARS: Eliminated IV operand: " << *IVOperand +                    << " -> " << *UseInst << '\n');    UseInst->setOperand(OperIdx, IVSrc);    assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper"); @@ -221,7 +223,7 @@ bool SimplifyIndvar::makeIVComparisonInvariant(ICmpInst *ICmp,      // for now.      return false; -  DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n'); +  LLVM_DEBUG(dbgs() << "INDVARS: Simplified comparison: " << *ICmp << '\n');    ICmp->setPredicate(InvariantPredicate);    ICmp->setOperand(0, NewLHS);    ICmp->setOperand(1, NewRHS); @@ -252,11 +254,11 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {    if (SE->isKnownPredicate(Pred, S, X)) {      ICmp->replaceAllUsesWith(ConstantInt::getTrue(ICmp->getContext()));      DeadInsts.emplace_back(ICmp); -    DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); +    LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');    } else if (SE->isKnownPredicate(ICmpInst::getInversePredicate(Pred), S, X)) {      ICmp->replaceAllUsesWith(ConstantInt::getFalse(ICmp->getContext()));      DeadInsts.emplace_back(ICmp); -    DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n'); +    LLVM_DEBUG(dbgs() << "INDVARS: Eliminated comparison: " << *ICmp << '\n');    } else if (makeIVComparisonInvariant(ICmp, IVOperand)) {      // fallthrough to end of function    } else if (ICmpInst::isSigned(OriginalPred) && @@ -267,7 +269,8 @@ void SimplifyIndvar::eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand) {      // we turn the instruction's predicate to its unsigned version. Note that      // we cannot rely on Pred here unless we check if we have swapped it.      assert(ICmp->getPredicate() == OriginalPred && "Predicate changed?"); -    DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp << '\n'); +    LLVM_DEBUG(dbgs() << "INDVARS: Turn to unsigned comparison: " << *ICmp +                      << '\n');      ICmp->setPredicate(ICmpInst::getUnsignedPredicate(OriginalPred));    } else      return; @@ -293,7 +296,7 @@ bool SimplifyIndvar::eliminateSDiv(BinaryOperator *SDiv) {          SDiv->getName() + ".udiv", SDiv);      UDiv->setIsExact(SDiv->isExact());      SDiv->replaceAllUsesWith(UDiv); -    DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n'); +    LLVM_DEBUG(dbgs() << "INDVARS: Simplified sdiv: " << *SDiv << '\n');      ++NumSimplifiedSDiv;      Changed = true;      DeadInsts.push_back(SDiv); @@ -309,7 +312,7 @@ void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {    auto *URem = BinaryOperator::Create(BinaryOperator::URem, N, D,                                        Rem->getName() + ".urem", Rem);    Rem->replaceAllUsesWith(URem); -  DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n'); +  LLVM_DEBUG(dbgs() << "INDVARS: Simplified srem: " << *Rem << '\n');    ++NumSimplifiedSRem;    Changed = true;    DeadInsts.emplace_back(Rem); @@ -318,7 +321,7 @@ void SimplifyIndvar::replaceSRemWithURem(BinaryOperator *Rem) {  // i % n  -->  i  if i is in [0,n).  void SimplifyIndvar::replaceRemWithNumerator(BinaryOperator *Rem) {    Rem->replaceAllUsesWith(Rem->getOperand(0)); -  DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); +  LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');    ++NumElimRem;    Changed = true;    DeadInsts.emplace_back(Rem); @@ -332,7 +335,7 @@ void SimplifyIndvar::replaceRemWithNumeratorOrZero(BinaryOperator *Rem) {    SelectInst *Sel =        SelectInst::Create(ICmp, ConstantInt::get(T, 0), N, "iv.rem", Rem);    Rem->replaceAllUsesWith(Sel); -  DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n'); +  LLVM_DEBUG(dbgs() << "INDVARS: Simplified rem: " << *Rem << '\n');    ++NumElimRem;    Changed = true;    DeadInsts.emplace_back(Rem); @@ -492,6 +495,118 @@ bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {    return true;  } +bool SimplifyIndvar::eliminateTrunc(TruncInst *TI) { +  // It is always legal to replace +  //   icmp <pred> i32 trunc(iv), n +  // with +  //   icmp <pred> i64 sext(trunc(iv)), sext(n), if pred is signed predicate. +  // Or with +  //   icmp <pred> i64 zext(trunc(iv)), zext(n), if pred is unsigned predicate. +  // Or with either of these if pred is an equality predicate. +  // +  // If we can prove that iv == sext(trunc(iv)) or iv == zext(trunc(iv)) for +  // every comparison which uses trunc, it means that we can replace each of +  // them with comparison of iv against sext/zext(n). We no longer need trunc +  // after that. +  // +  // TODO: Should we do this if we can widen *some* comparisons, but not all +  // of them? Sometimes it is enough to enable other optimizations, but the +  // trunc instruction will stay in the loop. +  Value *IV = TI->getOperand(0); +  Type *IVTy = IV->getType(); +  const SCEV *IVSCEV = SE->getSCEV(IV); +  const SCEV *TISCEV = SE->getSCEV(TI); + +  // Check if iv == zext(trunc(iv)) and if iv == sext(trunc(iv)). If so, we can +  // get rid of trunc +  bool DoesSExtCollapse = false; +  bool DoesZExtCollapse = false; +  if (IVSCEV == SE->getSignExtendExpr(TISCEV, IVTy)) +    DoesSExtCollapse = true; +  if (IVSCEV == SE->getZeroExtendExpr(TISCEV, IVTy)) +    DoesZExtCollapse = true; + +  // If neither sext nor zext does collapse, it is not profitable to do any +  // transform. Bail. +  if (!DoesSExtCollapse && !DoesZExtCollapse) +    return false; + +  // Collect users of the trunc that look like comparisons against invariants. +  // Bail if we find something different. +  SmallVector<ICmpInst *, 4> ICmpUsers; +  for (auto *U : TI->users()) { +    // We don't care about users in unreachable blocks. +    if (isa<Instruction>(U) && +        !DT->isReachableFromEntry(cast<Instruction>(U)->getParent())) +      continue; +    if (ICmpInst *ICI = dyn_cast<ICmpInst>(U)) { +      if (ICI->getOperand(0) == TI && L->isLoopInvariant(ICI->getOperand(1))) { +        assert(L->contains(ICI->getParent()) && "LCSSA form broken?"); +        // If we cannot get rid of trunc, bail. +        if (ICI->isSigned() && !DoesSExtCollapse) +          return false; +        if (ICI->isUnsigned() && !DoesZExtCollapse) +          return false; +        // For equality, either signed or unsigned works. +        ICmpUsers.push_back(ICI); +      } else +        return false; +    } else +      return false; +  } + +  auto CanUseZExt = [&](ICmpInst *ICI) { +    // Unsigned comparison can be widened as unsigned. +    if (ICI->isUnsigned()) +      return true; +    // Is it profitable to do zext? +    if (!DoesZExtCollapse) +      return false; +    // For equality, we can safely zext both parts. +    if (ICI->isEquality()) +      return true; +    // Otherwise we can only use zext when comparing two non-negative or two +    // negative values. But in practice, we will never pass DoesZExtCollapse +    // check for a negative value, because zext(trunc(x)) is non-negative. So +    // it only make sense to check for non-negativity here. +    const SCEV *SCEVOP1 = SE->getSCEV(ICI->getOperand(0)); +    const SCEV *SCEVOP2 = SE->getSCEV(ICI->getOperand(1)); +    return SE->isKnownNonNegative(SCEVOP1) && SE->isKnownNonNegative(SCEVOP2); +  }; +  // Replace all comparisons against trunc with comparisons against IV. +  for (auto *ICI : ICmpUsers) { +    auto *Op1 = ICI->getOperand(1); +    Instruction *Ext = nullptr; +    // For signed/unsigned predicate, replace the old comparison with comparison +    // of immediate IV against sext/zext of the invariant argument. If we can +    // use either sext or zext (i.e. we are dealing with equality predicate), +    // then prefer zext as a more canonical form. +    // TODO: If we see a signed comparison which can be turned into unsigned, +    // we can do it here for canonicalization purposes. +    ICmpInst::Predicate Pred = ICI->getPredicate(); +    if (CanUseZExt(ICI)) { +      assert(DoesZExtCollapse && "Unprofitable zext?"); +      Ext = new ZExtInst(Op1, IVTy, "zext", ICI); +      Pred = ICmpInst::getUnsignedPredicate(Pred); +    } else { +      assert(DoesSExtCollapse && "Unprofitable sext?"); +      Ext = new SExtInst(Op1, IVTy, "sext", ICI); +      assert(Pred == ICmpInst::getSignedPredicate(Pred) && "Must be signed!"); +    } +    bool Changed; +    L->makeLoopInvariant(Ext, Changed); +    (void)Changed; +    ICmpInst *NewICI = new ICmpInst(ICI, Pred, IV, Ext); +    ICI->replaceAllUsesWith(NewICI); +    DeadInsts.emplace_back(ICI); +  } + +  // Trunc no longer needed. +  TI->replaceAllUsesWith(UndefValue::get(TI->getType())); +  DeadInsts.emplace_back(TI); +  return true; +} +  /// Eliminate an operation that consumes a simple IV and has no observable  /// side-effect given the range of IV values.  IVOperand is guaranteed SCEVable,  /// but UseInst may not be. @@ -516,6 +631,10 @@ bool SimplifyIndvar::eliminateIVUser(Instruction *UseInst,      if (eliminateOverflowIntrinsic(CI))        return true; +  if (auto *TI = dyn_cast<TruncInst>(UseInst)) +    if (eliminateTrunc(TI)) +      return true; +    if (eliminateIdentitySCEV(UseInst, IVOperand))      return true; @@ -548,8 +667,8 @@ bool SimplifyIndvar::replaceIVUserWithLoopInvariant(Instruction *I) {    auto *Invariant = Rewriter.expandCodeFor(S, I->getType(), IP);    I->replaceAllUsesWith(Invariant); -  DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I -               << " with loop invariant: " << *S << '\n'); +  LLVM_DEBUG(dbgs() << "INDVARS: Replace IV user: " << *I +                    << " with loop invariant: " << *S << '\n');    ++NumFoldedUser;    Changed = true;    DeadInsts.emplace_back(I); @@ -589,7 +708,7 @@ bool SimplifyIndvar::eliminateIdentitySCEV(Instruction *UseInst,    if (!LI->replacementPreservesLCSSAForm(UseInst, IVOperand))      return false; -  DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n'); +  LLVM_DEBUG(dbgs() << "INDVARS: Eliminated identity: " << *UseInst << '\n');    UseInst->replaceAllUsesWith(IVOperand);    ++NumElimIdentity; @@ -771,6 +890,15 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {        SimpleIVUsers.pop_back_val();      Instruction *UseInst = UseOper.first; +    // If a user of the IndVar is trivially dead, we prefer just to mark it dead +    // rather than try to do some complex analysis or transformation (such as +    // widening) basing on it. +    // TODO: Propagate TLI and pass it here to handle more cases. +    if (isInstructionTriviallyDead(UseInst, /* TLI */ nullptr)) { +      DeadInsts.emplace_back(UseInst); +      continue; +    } +      // Bypass back edges to avoid extra work.      if (UseInst == CurrIV) continue; @@ -783,7 +911,7 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {      for (unsigned N = 0; IVOperand; ++N) {        assert(N <= Simplified.size() && "runaway iteration"); -      Value *NewOper = foldIVUser(UseOper.first, IVOperand); +      Value *NewOper = foldIVUser(UseInst, IVOperand);        if (!NewOper)          break; // done folding        IVOperand = dyn_cast<Instruction>(NewOper); @@ -791,12 +919,12 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {      if (!IVOperand)        continue; -    if (eliminateIVUser(UseOper.first, IVOperand)) { +    if (eliminateIVUser(UseInst, IVOperand)) {        pushIVUsers(IVOperand, L, Simplified, SimpleIVUsers);        continue;      } -    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseOper.first)) { +    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(UseInst)) {        if ((isa<OverflowingBinaryOperator>(BO) &&             strengthenOverflowingOperation(BO, IVOperand)) ||            (isa<ShlOperator>(BO) && strengthenRightShift(BO, IVOperand))) { @@ -806,13 +934,13 @@ void SimplifyIndvar::simplifyUsers(PHINode *CurrIV, IVVisitor *V) {        }      } -    CastInst *Cast = dyn_cast<CastInst>(UseOper.first); +    CastInst *Cast = dyn_cast<CastInst>(UseInst);      if (V && Cast) {        V->visitCast(Cast);        continue;      } -    if (isSimpleIVUser(UseOper.first, L, SE)) { -      pushIVUsers(UseOper.first, L, Simplified, SimpleIVUsers); +    if (isSimpleIVUser(UseInst, L, SE)) { +      pushIVUsers(UseInst, L, Simplified, SimpleIVUsers);      }    }  } diff --git a/lib/Transforms/Utils/SimplifyInstructions.cpp b/lib/Transforms/Utils/SimplifyInstructions.cpp deleted file mode 100644 index f3d4f2ef38d7..000000000000 --- a/lib/Transforms/Utils/SimplifyInstructions.cpp +++ /dev/null @@ -1,152 +0,0 @@ -//===------ SimplifyInstructions.cpp - Remove redundant instructions ------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This is a utility pass used for testing the InstructionSimplify analysis. -// The analysis is applied to every instruction, and if it simplifies then the -// instruction is replaced by the simplification.  If you are looking for a pass -// that performs serious instruction folding, use the instcombine pass instead. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Transforms/Utils/SimplifyInstructions.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AssumptionCache.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/OptimizationRemarkEmitter.h" -#include "llvm/Analysis/TargetLibraryInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/Type.h" -#include "llvm/Pass.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" -using namespace llvm; - -#define DEBUG_TYPE "instsimplify" - -STATISTIC(NumSimplified, "Number of redundant instructions removed"); - -static bool runImpl(Function &F, const SimplifyQuery &SQ, -                    OptimizationRemarkEmitter *ORE) { -  SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2; -  bool Changed = false; - -  do { -    for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { -      // Here be subtlety: the iterator must be incremented before the loop -      // body (not sure why), so a range-for loop won't work here. -      for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) { -        Instruction *I = &*BI++; -        // The first time through the loop ToSimplify is empty and we try to -        // simplify all instructions.  On later iterations ToSimplify is not -        // empty and we only bother simplifying instructions that are in it. -        if (!ToSimplify->empty() && !ToSimplify->count(I)) -          continue; - -        // Don't waste time simplifying unused instructions. -        if (!I->use_empty()) { -          if (Value *V = SimplifyInstruction(I, SQ, ORE)) { -            // Mark all uses for resimplification next time round the loop. -            for (User *U : I->users()) -              Next->insert(cast<Instruction>(U)); -            I->replaceAllUsesWith(V); -            ++NumSimplified; -            Changed = true; -          } -        } -        if (RecursivelyDeleteTriviallyDeadInstructions(I, SQ.TLI)) { -          // RecursivelyDeleteTriviallyDeadInstruction can remove more than one -          // instruction, so simply incrementing the iterator does not work. -          // When instructions get deleted re-iterate instead. -          BI = BB->begin(); -          BE = BB->end(); -          Changed = true; -        } -      } -    } - -    // Place the list of instructions to simplify on the next loop iteration -    // into ToSimplify. -    std::swap(ToSimplify, Next); -    Next->clear(); -  } while (!ToSimplify->empty()); - -  return Changed; -} - -namespace { -  struct InstSimplifier : public FunctionPass { -    static char ID; // Pass identification, replacement for typeid -    InstSimplifier() : FunctionPass(ID) { -      initializeInstSimplifierPass(*PassRegistry::getPassRegistry()); -    } - -    void getAnalysisUsage(AnalysisUsage &AU) const override { -      AU.setPreservesCFG(); -      AU.addRequired<DominatorTreeWrapperPass>(); -      AU.addRequired<AssumptionCacheTracker>(); -      AU.addRequired<TargetLibraryInfoWrapperPass>(); -      AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); -    } - -    /// runOnFunction - Remove instructions that simplify. -    bool runOnFunction(Function &F) override { -      if (skipFunction(F)) -        return false; - -      const DominatorTree *DT = -          &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); -      const TargetLibraryInfo *TLI = -          &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); -      AssumptionCache *AC = -          &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); -      OptimizationRemarkEmitter *ORE = -          &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); -      const DataLayout &DL = F.getParent()->getDataLayout(); -      const SimplifyQuery SQ(DL, TLI, DT, AC); -      return runImpl(F, SQ, ORE); -    } -  }; -} - -char InstSimplifier::ID = 0; -INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify", -                      "Remove redundant instructions", false, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(InstSimplifier, "instsimplify", -                    "Remove redundant instructions", false, false) -char &llvm::InstructionSimplifierID = InstSimplifier::ID; - -// Public interface to the simplify instructions pass. -FunctionPass *llvm::createInstructionSimplifierPass() { -  return new InstSimplifier(); -} - -PreservedAnalyses InstSimplifierPass::run(Function &F, -                                      FunctionAnalysisManager &AM) { -  auto &DT = AM.getResult<DominatorTreeAnalysis>(F); -  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); -  auto &AC = AM.getResult<AssumptionAnalysis>(F); -  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F); -  const DataLayout &DL = F.getParent()->getDataLayout(); -  const SimplifyQuery SQ(DL, &TLI, &DT, &AC); -  bool Changed = runImpl(F, SQ, &ORE); -  if (!Changed) -    return PreservedAnalyses::all(); - -  PreservedAnalyses PA; -  PA.preserveSet<CFGAnalyses>(); -  return PA; -} diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 03a1d55ddc30..8c48597fc2e4 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -7,10 +7,8 @@  //  //===----------------------------------------------------------------------===//  // -// This is a utility pass used for testing the InstructionSimplify analysis. -// The analysis is applied to every instruction, and if it simplifies then the -// instruction is replaced by the simplification.  If you are looking for a pass -// that performs serious instruction folding, use the instcombine pass instead. +// This file implements the library calls simplifier. It does not implement +// any pass, but can't be used by other passes to do simplifications.  //  //===----------------------------------------------------------------------===// @@ -21,7 +19,9 @@  #include "llvm/Analysis/ConstantFolding.h"  #include "llvm/Analysis/OptimizationRemarkEmitter.h"  #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Transforms/Utils/Local.h"  #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/CaptureTracking.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/IRBuilder.h" @@ -33,7 +33,6 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/KnownBits.h"  #include "llvm/Transforms/Utils/BuildLibCalls.h" -#include "llvm/Transforms/Utils/Local.h"  using namespace llvm;  using namespace PatternMatch; @@ -104,19 +103,51 @@ static bool callHasFloatingPointArgument(const CallInst *CI) {    });  } -/// \brief Check whether the overloaded unary floating point function -/// corresponding to \a Ty is available. -static bool hasUnaryFloatFn(const TargetLibraryInfo *TLI, Type *Ty, -                            LibFunc DoubleFn, LibFunc FloatFn, -                            LibFunc LongDoubleFn) { -  switch (Ty->getTypeID()) { -  case Type::FloatTyID: -    return TLI->has(FloatFn); -  case Type::DoubleTyID: -    return TLI->has(DoubleFn); -  default: -    return TLI->has(LongDoubleFn); -  } +static Value *convertStrToNumber(CallInst *CI, StringRef &Str, int64_t Base) { +  if (Base < 2 || Base > 36) +    // handle special zero base +    if (Base != 0) +      return nullptr; + +  char *End; +  std::string nptr = Str.str(); +  errno = 0; +  long long int Result = strtoll(nptr.c_str(), &End, Base); +  if (errno) +    return nullptr; + +  // if we assume all possible target locales are ASCII supersets, +  // then if strtoll successfully parses a number on the host, +  // it will also successfully parse the same way on the target +  if (*End != '\0') +    return nullptr; + +  if (!isIntN(CI->getType()->getPrimitiveSizeInBits(), Result)) +    return nullptr; + +  return ConstantInt::get(CI->getType(), Result); +} + +static bool isLocallyOpenedFile(Value *File, CallInst *CI, IRBuilder<> &B, +                                const TargetLibraryInfo *TLI) { +  CallInst *FOpen = dyn_cast<CallInst>(File); +  if (!FOpen) +    return false; + +  Function *InnerCallee = FOpen->getCalledFunction(); +  if (!InnerCallee) +    return false; + +  LibFunc Func; +  if (!TLI->getLibFunc(*InnerCallee, Func) || !TLI->has(Func) || +      Func != LibFunc_fopen) +    return false; + +  inferLibFuncAttributes(*CI->getCalledFunction(), *TLI); +  if (PointerMayBeCaptured(File, true, true)) +    return false; + +  return true;  }  //===----------------------------------------------------------------------===// @@ -156,9 +187,8 @@ Value *LibCallSimplifier::emitStrLenMemCpy(Value *Src, Value *Dst, uint64_t Len,    // We have enough information to now generate the memcpy call to do the    // concatenation for us.  Make a memcpy to copy the nul byte with align = 1. -  B.CreateMemCpy(CpyDst, Src, -                 ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1), -                 1); +  B.CreateMemCpy(CpyDst, 1, Src, 1, +                 ConstantInt::get(DL.getIntPtrType(Src->getContext()), Len + 1));    return Dst;  } @@ -346,8 +376,8 @@ Value *LibCallSimplifier::optimizeStrCpy(CallInst *CI, IRBuilder<> &B) {    // We have enough information to now generate the memcpy call to do the    // copy for us.  Make a memcpy to copy the nul byte with align = 1. -  B.CreateMemCpy(Dst, Src, -                 ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len), 1); +  B.CreateMemCpy(Dst, 1, Src, 1, +                 ConstantInt::get(DL.getIntPtrType(CI->getContext()), Len));    return Dst;  } @@ -371,7 +401,7 @@ Value *LibCallSimplifier::optimizeStpCpy(CallInst *CI, IRBuilder<> &B) {    // We have enough information to now generate the memcpy call to do the    // copy for us.  Make a memcpy to copy the nul byte with align = 1. -  B.CreateMemCpy(Dst, Src, LenV, 1); +  B.CreateMemCpy(Dst, 1, Src, 1, LenV);    return DstEnd;  } @@ -388,7 +418,7 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {    --SrcLen;    if (SrcLen == 0) { -    // strncpy(x, "", y) -> memset(x, '\0', y, 1) +    // strncpy(x, "", y) -> memset(align 1 x, '\0', y)      B.CreateMemSet(Dst, B.getInt8('\0'), LenOp, 1);      return Dst;    } @@ -407,8 +437,8 @@ Value *LibCallSimplifier::optimizeStrNCpy(CallInst *CI, IRBuilder<> &B) {      return nullptr;    Type *PT = Callee->getFunctionType()->getParamType(0); -  // strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant] -  B.CreateMemCpy(Dst, Src, ConstantInt::get(DL.getIntPtrType(PT), Len), 1); +  // strncpy(x, s, c) -> memcpy(align 1 x, align 1 s, c) [s and c are constant] +  B.CreateMemCpy(Dst, 1, Src, 1, ConstantInt::get(DL.getIntPtrType(PT), Len));    return Dst;  } @@ -508,7 +538,7 @@ Value *LibCallSimplifier::optimizeStrLen(CallInst *CI, IRBuilder<> &B) {  }  Value *LibCallSimplifier::optimizeWcslen(CallInst *CI, IRBuilder<> &B) { -  Module &M = *CI->getParent()->getParent()->getParent(); +  Module &M = *CI->getModule();    unsigned WCharSize = TLI->getWCharSize(M) * 8;    // We cannot perform this optimization without wchar_size metadata.    if (WCharSize == 0) @@ -816,40 +846,19 @@ Value *LibCallSimplifier::optimizeMemCmp(CallInst *CI, IRBuilder<> &B) {  }  Value *LibCallSimplifier::optimizeMemCpy(CallInst *CI, IRBuilder<> &B) { -  // memcpy(x, y, n) -> llvm.memcpy(x, y, n, 1) -  B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), -                 CI->getArgOperand(2), 1); +  // memcpy(x, y, n) -> llvm.memcpy(align 1 x, align 1 y, n) +  B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, +                 CI->getArgOperand(2));    return CI->getArgOperand(0);  }  Value *LibCallSimplifier::optimizeMemMove(CallInst *CI, IRBuilder<> &B) { -  // memmove(x, y, n) -> llvm.memmove(x, y, n, 1) -  B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), -                  CI->getArgOperand(2), 1); +  // memmove(x, y, n) -> llvm.memmove(align 1 x, align 1 y, n) +  B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, +                  CI->getArgOperand(2));    return CI->getArgOperand(0);  } -// TODO: Does this belong in BuildLibCalls or should all of those similar -// functions be moved here? -static Value *emitCalloc(Value *Num, Value *Size, const AttributeList &Attrs, -                         IRBuilder<> &B, const TargetLibraryInfo &TLI) { -  LibFunc Func; -  if (!TLI.getLibFunc("calloc", Func) || !TLI.has(Func)) -    return nullptr; - -  Module *M = B.GetInsertBlock()->getModule(); -  const DataLayout &DL = M->getDataLayout(); -  IntegerType *PtrType = DL.getIntPtrType((B.GetInsertBlock()->getContext())); -  Value *Calloc = M->getOrInsertFunction("calloc", Attrs, B.getInt8PtrTy(), -                                         PtrType, PtrType); -  CallInst *CI = B.CreateCall(Calloc, { Num, Size }, "calloc"); - -  if (const auto *F = dyn_cast<Function>(Calloc->stripPointerCasts())) -    CI->setCallingConv(F->getCallingConv()); - -  return CI; -} -  /// Fold memset[_chk](malloc(n), 0, n) --> calloc(1, n).  static Value *foldMallocMemset(CallInst *Memset, IRBuilder<> &B,                                 const TargetLibraryInfo &TLI) { @@ -901,12 +910,19 @@ Value *LibCallSimplifier::optimizeMemSet(CallInst *CI, IRBuilder<> &B) {    if (auto *Calloc = foldMallocMemset(CI, B, *TLI))      return Calloc; -  // memset(p, v, n) -> llvm.memset(p, v, n, 1) +  // memset(p, v, n) -> llvm.memset(align 1 p, v, n)    Value *Val = B.CreateIntCast(CI->getArgOperand(1), B.getInt8Ty(), false);    B.CreateMemSet(CI->getArgOperand(0), Val, CI->getArgOperand(2), 1);    return CI->getArgOperand(0);  } +Value *LibCallSimplifier::optimizeRealloc(CallInst *CI, IRBuilder<> &B) { +  if (isa<ConstantPointerNull>(CI->getArgOperand(0))) +    return emitMalloc(CI->getArgOperand(1), B, DL, TLI); + +  return nullptr; +} +  //===----------------------------------------------------------------------===//  // Math Library Optimizations  //===----------------------------------------------------------------------===// @@ -1666,12 +1682,12 @@ Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {  }  Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) { -  // abs(x) -> x >s -1 ? x : -x -  Value *Op = CI->getArgOperand(0); -  Value *Pos = -      B.CreateICmpSGT(Op, Constant::getAllOnesValue(Op->getType()), "ispos"); -  Value *Neg = B.CreateNeg(Op, "neg"); -  return B.CreateSelect(Pos, Op, Neg); +  // abs(x) -> x <s 0 ? -x : x +  // The negation has 'nsw' because abs of INT_MIN is undefined. +  Value *X = CI->getArgOperand(0); +  Value *IsNeg = B.CreateICmpSLT(X, Constant::getNullValue(X->getType())); +  Value *NegX = B.CreateNSWNeg(X, "neg"); +  return B.CreateSelect(IsNeg, NegX, X);  }  Value *LibCallSimplifier::optimizeIsDigit(CallInst *CI, IRBuilder<> &B) { @@ -1695,6 +1711,29 @@ Value *LibCallSimplifier::optimizeToAscii(CallInst *CI, IRBuilder<> &B) {                       ConstantInt::get(CI->getType(), 0x7F));  } +Value *LibCallSimplifier::optimizeAtoi(CallInst *CI, IRBuilder<> &B) { +  StringRef Str; +  if (!getConstantStringInfo(CI->getArgOperand(0), Str)) +    return nullptr; + +  return convertStrToNumber(CI, Str, 10); +} + +Value *LibCallSimplifier::optimizeStrtol(CallInst *CI, IRBuilder<> &B) { +  StringRef Str; +  if (!getConstantStringInfo(CI->getArgOperand(0), Str)) +    return nullptr; + +  if (!isa<ConstantPointerNull>(CI->getArgOperand(1))) +    return nullptr; + +  if (ConstantInt *CInt = dyn_cast<ConstantInt>(CI->getArgOperand(2))) { +    return convertStrToNumber(CI, Str, CInt->getSExtValue()); +  } + +  return nullptr; +} +  //===----------------------------------------------------------------------===//  // Formatting and IO Library Call Optimizations  //===----------------------------------------------------------------------===// @@ -1826,15 +1865,13 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {    if (CI->getNumArgOperands() == 2) {      // Make sure there's no % in the constant array.  We could try to handle      // %% -> % in the future if we cared. -    for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) -      if (FormatStr[i] == '%') -        return nullptr; // we found a format specifier, bail out. +    if (FormatStr.find('%') != StringRef::npos) +      return nullptr; // we found a format specifier, bail out. -    // sprintf(str, fmt) -> llvm.memcpy(str, fmt, strlen(fmt)+1, 1) -    B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), +    // sprintf(str, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, strlen(fmt)+1) +    B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1,                     ConstantInt::get(DL.getIntPtrType(CI->getContext()), -                                    FormatStr.size() + 1), -                   1); // Copy the null byte. +                                    FormatStr.size() + 1)); // Copy the null byte.      return ConstantInt::get(CI->getType(), FormatStr.size());    } @@ -1868,7 +1905,7 @@ Value *LibCallSimplifier::optimizeSPrintFString(CallInst *CI, IRBuilder<> &B) {        return nullptr;      Value *IncLen =          B.CreateAdd(Len, ConstantInt::get(Len->getType(), 1), "leninc"); -    B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(2), IncLen, 1); +    B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, IncLen);      // The sprintf result is the unincremented number of bytes in the string.      return B.CreateIntCast(Len, CI->getType(), false); @@ -1897,6 +1934,93 @@ Value *LibCallSimplifier::optimizeSPrintF(CallInst *CI, IRBuilder<> &B) {    return nullptr;  } +Value *LibCallSimplifier::optimizeSnPrintFString(CallInst *CI, IRBuilder<> &B) { +  // Check for a fixed format string. +  StringRef FormatStr; +  if (!getConstantStringInfo(CI->getArgOperand(2), FormatStr)) +    return nullptr; + +  // Check for size +  ConstantInt *Size = dyn_cast<ConstantInt>(CI->getArgOperand(1)); +  if (!Size) +    return nullptr; + +  uint64_t N = Size->getZExtValue(); + +  // If we just have a format string (nothing else crazy) transform it. +  if (CI->getNumArgOperands() == 3) { +    // Make sure there's no % in the constant array.  We could try to handle +    // %% -> % in the future if we cared. +    if (FormatStr.find('%') != StringRef::npos) +      return nullptr; // we found a format specifier, bail out. + +    if (N == 0) +      return ConstantInt::get(CI->getType(), FormatStr.size()); +    else if (N < FormatStr.size() + 1) +      return nullptr; + +    // sprintf(str, size, fmt) -> llvm.memcpy(align 1 str, align 1 fmt, +    // strlen(fmt)+1) +    B.CreateMemCpy( +        CI->getArgOperand(0), 1, CI->getArgOperand(2), 1, +        ConstantInt::get(DL.getIntPtrType(CI->getContext()), +                         FormatStr.size() + 1)); // Copy the null byte. +    return ConstantInt::get(CI->getType(), FormatStr.size()); +  } + +  // The remaining optimizations require the format string to be "%s" or "%c" +  // and have an extra operand. +  if (FormatStr.size() == 2 && FormatStr[0] == '%' && +      CI->getNumArgOperands() == 4) { + +    // Decode the second character of the format string. +    if (FormatStr[1] == 'c') { +      if (N == 0) +        return ConstantInt::get(CI->getType(), 1); +      else if (N == 1) +        return nullptr; + +      // snprintf(dst, size, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0 +      if (!CI->getArgOperand(3)->getType()->isIntegerTy()) +        return nullptr; +      Value *V = B.CreateTrunc(CI->getArgOperand(3), B.getInt8Ty(), "char"); +      Value *Ptr = castToCStr(CI->getArgOperand(0), B); +      B.CreateStore(V, Ptr); +      Ptr = B.CreateGEP(B.getInt8Ty(), Ptr, B.getInt32(1), "nul"); +      B.CreateStore(B.getInt8(0), Ptr); + +      return ConstantInt::get(CI->getType(), 1); +    } + +    if (FormatStr[1] == 's') { +      // snprintf(dest, size, "%s", str) to llvm.memcpy(dest, str, len+1, 1) +      StringRef Str; +      if (!getConstantStringInfo(CI->getArgOperand(3), Str)) +        return nullptr; + +      if (N == 0) +        return ConstantInt::get(CI->getType(), Str.size()); +      else if (N < Str.size() + 1) +        return nullptr; + +      B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(3), 1, +                     ConstantInt::get(CI->getType(), Str.size() + 1)); + +      // The snprintf result is the unincremented number of bytes in the string. +      return ConstantInt::get(CI->getType(), Str.size()); +    } +  } +  return nullptr; +} + +Value *LibCallSimplifier::optimizeSnPrintF(CallInst *CI, IRBuilder<> &B) { +  if (Value *V = optimizeSnPrintFString(CI, B)) { +    return V; +  } + +  return nullptr; +} +  Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {    optimizeErrorReporting(CI, B, 0); @@ -1913,9 +2037,9 @@ Value *LibCallSimplifier::optimizeFPrintFString(CallInst *CI, IRBuilder<> &B) {    // fprintf(F, "foo") --> fwrite("foo", 3, 1, F)    if (CI->getNumArgOperands() == 2) { -    for (unsigned i = 0, e = FormatStr.size(); i != e; ++i) -      if (FormatStr[i] == '%') // Could handle %% -> % if we cared. -        return nullptr;        // We found a format specifier. +    // Could handle %% -> % if we cared. +    if (FormatStr.find('%') != StringRef::npos) +      return nullptr; // We found a format specifier.      return emitFWrite(          CI->getArgOperand(1), @@ -1973,22 +2097,27 @@ Value *LibCallSimplifier::optimizeFWrite(CallInst *CI, IRBuilder<> &B) {    // Get the element size and count.    ConstantInt *SizeC = dyn_cast<ConstantInt>(CI->getArgOperand(1));    ConstantInt *CountC = dyn_cast<ConstantInt>(CI->getArgOperand(2)); -  if (!SizeC || !CountC) -    return nullptr; -  uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue(); - -  // If this is writing zero records, remove the call (it's a noop). -  if (Bytes == 0) -    return ConstantInt::get(CI->getType(), 0); - -  // If this is writing one byte, turn it into fputc. -  // This optimisation is only valid, if the return value is unused. -  if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) -    Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char"); -    Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI); -    return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; +  if (SizeC && CountC) { +    uint64_t Bytes = SizeC->getZExtValue() * CountC->getZExtValue(); + +    // If this is writing zero records, remove the call (it's a noop). +    if (Bytes == 0) +      return ConstantInt::get(CI->getType(), 0); + +    // If this is writing one byte, turn it into fputc. +    // This optimisation is only valid, if the return value is unused. +    if (Bytes == 1 && CI->use_empty()) { // fwrite(S,1,1,F) -> fputc(S[0],F) +      Value *Char = B.CreateLoad(castToCStr(CI->getArgOperand(0), B), "char"); +      Value *NewCI = emitFPutC(Char, CI->getArgOperand(3), B, TLI); +      return NewCI ? ConstantInt::get(CI->getType(), 1) : nullptr; +    }    } +  if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI)) +    return emitFWriteUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), +                              CI->getArgOperand(2), CI->getArgOperand(3), B, DL, +                              TLI); +    return nullptr;  } @@ -1997,12 +2126,18 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {    // Don't rewrite fputs to fwrite when optimising for size because fwrite    // requires more arguments and thus extra MOVs are required. -  if (CI->getParent()->getParent()->optForSize()) +  if (CI->getFunction()->optForSize())      return nullptr; -  // We can't optimize if return value is used. -  if (!CI->use_empty()) -    return nullptr; +  // Check if has any use +  if (!CI->use_empty()) { +    if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI)) +      return emitFPutSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B, +                               TLI); +    else +      // We can't optimize if return value is used. +      return nullptr; +  }    // fputs(s,F) --> fwrite(s,1,strlen(s),F)    uint64_t Len = GetStringLength(CI->getArgOperand(0)); @@ -2016,6 +2151,40 @@ Value *LibCallSimplifier::optimizeFPuts(CallInst *CI, IRBuilder<> &B) {        CI->getArgOperand(1), B, DL, TLI);  } +Value *LibCallSimplifier::optimizeFPutc(CallInst *CI, IRBuilder<> &B) { +  optimizeErrorReporting(CI, B, 1); + +  if (isLocallyOpenedFile(CI->getArgOperand(1), CI, B, TLI)) +    return emitFPutCUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), B, +                             TLI); + +  return nullptr; +} + +Value *LibCallSimplifier::optimizeFGetc(CallInst *CI, IRBuilder<> &B) { +  if (isLocallyOpenedFile(CI->getArgOperand(0), CI, B, TLI)) +    return emitFGetCUnlocked(CI->getArgOperand(0), B, TLI); + +  return nullptr; +} + +Value *LibCallSimplifier::optimizeFGets(CallInst *CI, IRBuilder<> &B) { +  if (isLocallyOpenedFile(CI->getArgOperand(2), CI, B, TLI)) +    return emitFGetSUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), +                             CI->getArgOperand(2), B, TLI); + +  return nullptr; +} + +Value *LibCallSimplifier::optimizeFRead(CallInst *CI, IRBuilder<> &B) { +  if (isLocallyOpenedFile(CI->getArgOperand(3), CI, B, TLI)) +    return emitFReadUnlocked(CI->getArgOperand(0), CI->getArgOperand(1), +                             CI->getArgOperand(2), CI->getArgOperand(3), B, DL, +                             TLI); + +  return nullptr; +} +  Value *LibCallSimplifier::optimizePuts(CallInst *CI, IRBuilder<> &B) {    // Check for a constant string.    StringRef Str; @@ -2099,6 +2268,8 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,        return optimizeMemMove(CI, Builder);      case LibFunc_memset:        return optimizeMemSet(CI, Builder); +    case LibFunc_realloc: +      return optimizeRealloc(CI, Builder);      case LibFunc_wcslen:        return optimizeWcslen(CI, Builder);      default: @@ -2290,16 +2461,33 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {        return optimizeIsAscii(CI, Builder);      case LibFunc_toascii:        return optimizeToAscii(CI, Builder); +    case LibFunc_atoi: +    case LibFunc_atol: +    case LibFunc_atoll: +      return optimizeAtoi(CI, Builder); +    case LibFunc_strtol: +    case LibFunc_strtoll: +      return optimizeStrtol(CI, Builder);      case LibFunc_printf:        return optimizePrintF(CI, Builder);      case LibFunc_sprintf:        return optimizeSPrintF(CI, Builder); +    case LibFunc_snprintf: +      return optimizeSnPrintF(CI, Builder);      case LibFunc_fprintf:        return optimizeFPrintF(CI, Builder);      case LibFunc_fwrite:        return optimizeFWrite(CI, Builder); +    case LibFunc_fread: +      return optimizeFRead(CI, Builder);      case LibFunc_fputs:        return optimizeFPuts(CI, Builder); +    case LibFunc_fgets: +      return optimizeFGets(CI, Builder); +    case LibFunc_fputc: +      return optimizeFPutc(CI, Builder); +    case LibFunc_fgetc: +      return optimizeFGetc(CI, Builder);      case LibFunc_puts:        return optimizePuts(CI, Builder);      case LibFunc_perror: @@ -2307,8 +2495,6 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {      case LibFunc_vfprintf:      case LibFunc_fiprintf:        return optimizeErrorReporting(CI, Builder, 0); -    case LibFunc_fputc: -      return optimizeErrorReporting(CI, Builder, 1);      default:        return nullptr;      } @@ -2393,8 +2579,8 @@ bool FortifiedLibCallSimplifier::isFortifiedCallFoldable(CallInst *CI,  Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,                                                       IRBuilder<> &B) {    if (isFortifiedCallFoldable(CI, 3, 2, false)) { -    B.CreateMemCpy(CI->getArgOperand(0), CI->getArgOperand(1), -                   CI->getArgOperand(2), 1); +    B.CreateMemCpy(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, +                   CI->getArgOperand(2));      return CI->getArgOperand(0);    }    return nullptr; @@ -2403,8 +2589,8 @@ Value *FortifiedLibCallSimplifier::optimizeMemCpyChk(CallInst *CI,  Value *FortifiedLibCallSimplifier::optimizeMemMoveChk(CallInst *CI,                                                        IRBuilder<> &B) {    if (isFortifiedCallFoldable(CI, 3, 2, false)) { -    B.CreateMemMove(CI->getArgOperand(0), CI->getArgOperand(1), -                    CI->getArgOperand(2), 1); +    B.CreateMemMove(CI->getArgOperand(0), 1, CI->getArgOperand(1), 1, +                    CI->getArgOperand(2));      return CI->getArgOperand(0);    }    return nullptr; diff --git a/lib/Transforms/Utils/SplitModule.cpp b/lib/Transforms/Utils/SplitModule.cpp index 968eb0208f43..f8d758c54983 100644 --- a/lib/Transforms/Utils/SplitModule.cpp +++ b/lib/Transforms/Utils/SplitModule.cpp @@ -101,7 +101,8 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,    // At this point module should have the proper mix of globals and locals.    // As we attempt to partition this module, we must not change any    // locals to globals. -  DEBUG(dbgs() << "Partition module with (" << M->size() << ")functions\n"); +  LLVM_DEBUG(dbgs() << "Partition module with (" << M->size() +                    << ")functions\n");    ClusterMapType GVtoClusterMap;    ComdatMembersType ComdatMembers; @@ -180,28 +181,31 @@ static void findPartitions(Module *M, ClusterIDMapType &ClusterIDMap,            std::make_pair(std::distance(GVtoClusterMap.member_begin(I),                                         GVtoClusterMap.member_end()), I)); -  std::sort(Sets.begin(), Sets.end(), [](const SortType &a, const SortType &b) { -    if (a.first == b.first) -      return a.second->getData()->getName() > b.second->getData()->getName(); -    else -      return a.first > b.first; -  }); +  llvm::sort(Sets.begin(), Sets.end(), +             [](const SortType &a, const SortType &b) { +               if (a.first == b.first) +                 return a.second->getData()->getName() > +                        b.second->getData()->getName(); +               else +                 return a.first > b.first; +             });    for (auto &I : Sets) {      unsigned CurrentClusterID = BalancinQueue.top().first;      unsigned CurrentClusterSize = BalancinQueue.top().second;      BalancinQueue.pop(); -    DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" << I.first -                 << ") ----> " << I.second->getData()->getName() << "\n"); +    LLVM_DEBUG(dbgs() << "Root[" << CurrentClusterID << "] cluster_size(" +                      << I.first << ") ----> " << I.second->getData()->getName() +                      << "\n");      for (ClusterMapType::member_iterator MI =               GVtoClusterMap.findLeader(I.second);           MI != GVtoClusterMap.member_end(); ++MI) {        if (!Visited.insert(*MI).second)          continue; -      DEBUG(dbgs() << "----> " << (*MI)->getName() -                   << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n"); +      LLVM_DEBUG(dbgs() << "----> " << (*MI)->getName() +                        << ((*MI)->hasLocalLinkage() ? " l " : " e ") << "\n");        Visited.insert(*MI);        ClusterIDMap[*MI] = CurrentClusterID;        CurrentClusterSize++; @@ -270,7 +274,7 @@ void llvm::SplitModule(    for (unsigned I = 0; I < N; ++I) {      ValueToValueMapTy VMap;      std::unique_ptr<Module> MPart( -        CloneModule(M.get(), VMap, [&](const GlobalValue *GV) { +        CloneModule(*M, VMap, [&](const GlobalValue *GV) {            if (ClusterIDMap.count(GV))              return (ClusterIDMap[GV] == I);            else diff --git a/lib/Transforms/Utils/StripGCRelocates.cpp b/lib/Transforms/Utils/StripGCRelocates.cpp index 49dc15cf5e7c..ac0b519f4a77 100644 --- a/lib/Transforms/Utils/StripGCRelocates.cpp +++ b/lib/Transforms/Utils/StripGCRelocates.cpp @@ -21,7 +21,6 @@  #include "llvm/IR/Type.h"  #include "llvm/Pass.h"  #include "llvm/Support/raw_ostream.h" -#include "llvm/Transforms/Scalar.h"  using namespace llvm; @@ -75,6 +74,3 @@ bool StripGCRelocates::runOnFunction(Function &F) {  INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates",                  "Strip gc.relocates inserted through RewriteStatepointsForGC",                  true, false) -FunctionPass *llvm::createStripGCRelocatesPass() { -  return new StripGCRelocates(); -} diff --git a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp index cd0378e0140c..8956a089a99c 100644 --- a/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp +++ b/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp @@ -9,7 +9,7 @@  #include "llvm/IR/DebugInfo.h"  #include "llvm/Pass.h" -#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/Utils.h"  using namespace llvm;  namespace { diff --git a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index ed444e4cf43c..e633ac0c874d 100644 --- a/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -19,7 +19,7 @@  #include "llvm/IR/Function.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/Type.h" -#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils.h"  using namespace llvm;  char UnifyFunctionExitNodes::ID = 0; diff --git a/lib/Transforms/Utils/Utils.cpp b/lib/Transforms/Utils/Utils.cpp index f6c7d1c4989e..afd842f59911 100644 --- a/lib/Transforms/Utils/Utils.cpp +++ b/lib/Transforms/Utils/Utils.cpp @@ -12,7 +12,10 @@  //  //===----------------------------------------------------------------------===// +#include "llvm/Transforms/Utils.h"  #include "llvm-c/Initialization.h" +#include "llvm-c/Transforms/Utils.h" +#include "llvm/IR/LegacyPassManager.h"  #include "llvm/InitializePasses.h"  #include "llvm/PassRegistry.h" @@ -33,7 +36,6 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {    initializePromoteLegacyPassPass(Registry);    initializeStripNonLineTableDebugInfoPass(Registry);    initializeUnifyFunctionExitNodesPass(Registry); -  initializeInstSimplifierPass(Registry);    initializeMetaRenamerPass(Registry);    initializeStripGCRelocatesPass(Registry);    initializePredicateInfoPrinterLegacyPassPass(Registry); @@ -43,3 +45,12 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {  void LLVMInitializeTransformUtils(LLVMPassRegistryRef R) {    initializeTransformUtils(*unwrap(R));  } + +void LLVMAddLowerSwitchPass(LLVMPassManagerRef PM) { +  unwrap(PM)->add(createLowerSwitchPass()); +} + +void LLVMAddPromoteMemoryToRegisterPass(LLVMPassManagerRef PM) { +  unwrap(PM)->add(createPromoteMemoryToRegisterPass()); +} + diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp index c3feea6a0a41..948d9bd5baad 100644 --- a/lib/Transforms/Utils/VNCoercion.cpp +++ b/lib/Transforms/Utils/VNCoercion.cpp @@ -20,8 +20,14 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,        StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())      return false; +  uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType()); + +  // The store size must be byte-aligned to support future type casts. +  if (llvm::alignTo(StoreSize, 8) != StoreSize) +    return false; +    // The store has to be at least as big as the load. -  if (DL.getTypeSizeInBits(StoredVal->getType()) < DL.getTypeSizeInBits(LoadTy)) +  if (StoreSize < DL.getTypeSizeInBits(LoadTy))      return false;    // Don't coerce non-integral pointers to integers or vice versa. @@ -389,8 +395,8 @@ Value *getLoadValueForLoad(LoadInst *SrcVal, unsigned Offset, Type *LoadTy,      NewLoad->takeName(SrcVal);      NewLoad->setAlignment(SrcVal->getAlignment()); -    DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); -    DEBUG(dbgs() << "TO: " << *NewLoad << "\n"); +    LLVM_DEBUG(dbgs() << "GVN WIDENED LOAD: " << *SrcVal << "\n"); +    LLVM_DEBUG(dbgs() << "TO: " << *NewLoad << "\n");      // Replace uses of the original load with the wider load.  On a big endian      // system, we need to shift down to get the relevant bits. diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp index 8c9ecbc3503e..55fff3f3872a 100644 --- a/lib/Transforms/Utils/ValueMapper.cpp +++ b/lib/Transforms/Utils/ValueMapper.cpp @@ -25,6 +25,7 @@  #include "llvm/IR/CallSite.h"  #include "llvm/IR/Constant.h"  #include "llvm/IR/Constants.h" +#include "llvm/IR/DebugInfoMetadata.h"  #include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/GlobalAlias.h" @@ -536,13 +537,23 @@ Optional<Metadata *> MDNodeMapper::tryToMapOperand(const Metadata *Op) {    return None;  } +static Metadata *cloneOrBuildODR(const MDNode &N) { +  auto *CT = dyn_cast<DICompositeType>(&N); +  // If ODR type uniquing is enabled, we would have uniqued composite types +  // with identifiers during bitcode reading, so we can just use CT. +  if (CT && CT->getContext().isODRUniquingDebugTypes() && +      CT->getIdentifier() != "") +    return const_cast<DICompositeType *>(CT); +  return MDNode::replaceWithDistinct(N.clone()); +} +  MDNode *MDNodeMapper::mapDistinctNode(const MDNode &N) {    assert(N.isDistinct() && "Expected a distinct node");    assert(!M.getVM().getMappedMD(&N) && "Expected an unmapped node"); -  DistinctWorklist.push_back(cast<MDNode>( -      (M.Flags & RF_MoveDistinctMDs) -          ? M.mapToSelf(&N) -          : M.mapToMetadata(&N, MDNode::replaceWithDistinct(N.clone())))); +  DistinctWorklist.push_back( +      cast<MDNode>((M.Flags & RF_MoveDistinctMDs) +                       ? M.mapToSelf(&N) +                       : M.mapToMetadata(&N, cloneOrBuildODR(N))));    return DistinctWorklist.back();  } | 
