diff options
Diffstat (limited to 'contrib/llvm/lib/Analysis')
61 files changed, 9542 insertions, 5770 deletions
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index 44d137dffd22..35f2e97622fa 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -25,9 +25,16 @@  //===----------------------------------------------------------------------===//  #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BasicAliasAnalysis.h"  #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/CFLAliasAnalysis.h"  #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/ObjCARCAliasAnalysis.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScopedNoAliasAA.h"  #include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h"  #include "llvm/Analysis/ValueTracking.h"  #include "llvm/IR/BasicBlock.h"  #include "llvm/IR/DataLayout.h" @@ -40,44 +47,72 @@  #include "llvm/Pass.h"  using namespace llvm; -// Register the AliasAnalysis interface, providing a nice name to refer to. -INITIALIZE_ANALYSIS_GROUP(AliasAnalysis, "Alias Analysis", NoAA) -char AliasAnalysis::ID = 0; +/// Allow disabling BasicAA from the AA results. This is particularly useful +/// when testing to isolate a single AA implementation. +static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden, +                                    cl::init(false)); + +AAResults::AAResults(AAResults &&Arg) : AAs(std::move(Arg.AAs)) { +  for (auto &AA : AAs) +    AA->setAAResults(this); +} + +AAResults &AAResults::operator=(AAResults &&Arg) { +  AAs = std::move(Arg.AAs); +  for (auto &AA : AAs) +    AA->setAAResults(this); +  return *this; +} + +AAResults::~AAResults() { +// FIXME; It would be nice to at least clear out the pointers back to this +// aggregation here, but we end up with non-nesting lifetimes in the legacy +// pass manager that prevent this from working. In the legacy pass manager +// we'll end up with dangling references here in some cases. +#if 0 +  for (auto &AA : AAs) +    AA->setAAResults(nullptr); +#endif +}  //===----------------------------------------------------------------------===//  // Default chaining methods  //===----------------------------------------------------------------------===// -AliasResult AliasAnalysis::alias(const MemoryLocation &LocA, -                                 const MemoryLocation &LocB) { -  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); -  return AA->alias(LocA, LocB); +AliasResult AAResults::alias(const MemoryLocation &LocA, +                             const MemoryLocation &LocB) { +  for (const auto &AA : AAs) { +    auto Result = AA->alias(LocA, LocB); +    if (Result != MayAlias) +      return Result; +  } +  return MayAlias;  } -bool AliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, -                                           bool OrLocal) { -  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); -  return AA->pointsToConstantMemory(Loc, OrLocal); -} +bool AAResults::pointsToConstantMemory(const MemoryLocation &Loc, +                                       bool OrLocal) { +  for (const auto &AA : AAs) +    if (AA->pointsToConstantMemory(Loc, OrLocal)) +      return true; -AliasAnalysis::ModRefResult -AliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { -  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); -  return AA->getArgModRefInfo(CS, ArgIdx); +  return false;  } -void AliasAnalysis::deleteValue(Value *V) { -  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); -  AA->deleteValue(V); -} +ModRefInfo AAResults::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { +  ModRefInfo Result = MRI_ModRef; + +  for (const auto &AA : AAs) { +    Result = ModRefInfo(Result & AA->getArgModRefInfo(CS, ArgIdx)); -void AliasAnalysis::addEscapingUse(Use &U) { -  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); -  AA->addEscapingUse(U); +    // Early-exit the moment we reach the bottom of the lattice. +    if (Result == MRI_NoModRef) +      return Result; +  } + +  return Result;  } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) { +ModRefInfo AAResults::getModRefInfo(Instruction *I, ImmutableCallSite Call) {    // We may have two calls    if (auto CS = ImmutableCallSite(I)) {      // Check if the two calls modify the same memory @@ -88,289 +123,215 @@ AliasAnalysis::getModRefInfo(Instruction *I, ImmutableCallSite Call) {      // is that if the call references what this instruction      // defines, it must be clobbered by this location.      const MemoryLocation DefLoc = MemoryLocation::get(I); -    if (getModRefInfo(Call, DefLoc) != AliasAnalysis::NoModRef) -      return AliasAnalysis::ModRef; -  } -  return AliasAnalysis::NoModRef; -} - -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { -  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - -  ModRefBehavior MRB = getModRefBehavior(CS); -  if (MRB == DoesNotAccessMemory) -    return NoModRef; - -  ModRefResult Mask = ModRef; -  if (onlyReadsMemory(MRB)) -    Mask = Ref; - -  if (onlyAccessesArgPointees(MRB)) { -    bool doesAlias = false; -    ModRefResult AllArgsMask = NoModRef; -    if (doesAccessArgPointees(MRB)) { -      for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); -           AI != AE; ++AI) { -        const Value *Arg = *AI; -        if (!Arg->getType()->isPointerTy()) -          continue; -        unsigned ArgIdx = std::distance(CS.arg_begin(), AI); -        MemoryLocation ArgLoc = -            MemoryLocation::getForArgument(CS, ArgIdx, *TLI); -        if (!isNoAlias(ArgLoc, Loc)) { -          ModRefResult ArgMask = getArgModRefInfo(CS, ArgIdx); -          doesAlias = true; -          AllArgsMask = ModRefResult(AllArgsMask | ArgMask); -        } -      } -    } -    if (!doesAlias) -      return NoModRef; -    Mask = ModRefResult(Mask & AllArgsMask); +    if (getModRefInfo(Call, DefLoc) != MRI_NoModRef) +      return MRI_ModRef;    } +  return MRI_NoModRef; +} -  // If Loc is a constant memory location, the call definitely could not -  // modify the memory location. -  if ((Mask & Mod) && pointsToConstantMemory(Loc)) -    Mask = ModRefResult(Mask & ~Mod); - -  // If this is the end of the chain, don't forward. -  if (!AA) return Mask; - -  // Otherwise, fall back to the next AA in the chain. But we can merge -  // in any mask we've managed to compute. -  return ModRefResult(AA->getModRefInfo(CS, Loc) & Mask); -} - -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { -  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); - -  // If CS1 or CS2 are readnone, they don't interact. -  ModRefBehavior CS1B = getModRefBehavior(CS1); -  if (CS1B == DoesNotAccessMemory) return NoModRef; - -  ModRefBehavior CS2B = getModRefBehavior(CS2); -  if (CS2B == DoesNotAccessMemory) return NoModRef; - -  // If they both only read from memory, there is no dependence. -  if (onlyReadsMemory(CS1B) && onlyReadsMemory(CS2B)) -    return NoModRef; - -  AliasAnalysis::ModRefResult Mask = ModRef; - -  // If CS1 only reads memory, the only dependence on CS2 can be -  // from CS1 reading memory written by CS2. -  if (onlyReadsMemory(CS1B)) -    Mask = ModRefResult(Mask & Ref); - -  // If CS2 only access memory through arguments, accumulate the mod/ref -  // information from CS1's references to the memory referenced by -  // CS2's arguments. -  if (onlyAccessesArgPointees(CS2B)) { -    AliasAnalysis::ModRefResult R = NoModRef; -    if (doesAccessArgPointees(CS2B)) { -      for (ImmutableCallSite::arg_iterator -           I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { -        const Value *Arg = *I; -        if (!Arg->getType()->isPointerTy()) -          continue; -        unsigned CS2ArgIdx = std::distance(CS2.arg_begin(), I); -        auto CS2ArgLoc = MemoryLocation::getForArgument(CS2, CS2ArgIdx, *TLI); - -        // ArgMask indicates what CS2 might do to CS2ArgLoc, and the dependence of -        // CS1 on that location is the inverse. -        ModRefResult ArgMask = getArgModRefInfo(CS2, CS2ArgIdx); -        if (ArgMask == Mod) -          ArgMask = ModRef; -        else if (ArgMask == Ref) -          ArgMask = Mod; - -        R = ModRefResult((R | (getModRefInfo(CS1, CS2ArgLoc) & ArgMask)) & Mask); -        if (R == Mask) -          break; -      } -    } -    return R; -  } +ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS, +                                    const MemoryLocation &Loc) { +  ModRefInfo Result = MRI_ModRef; -  // If CS1 only accesses memory through arguments, check if CS2 references -  // any of the memory referenced by CS1's arguments. If not, return NoModRef. -  if (onlyAccessesArgPointees(CS1B)) { -    AliasAnalysis::ModRefResult R = NoModRef; -    if (doesAccessArgPointees(CS1B)) { -      for (ImmutableCallSite::arg_iterator -           I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { -        const Value *Arg = *I; -        if (!Arg->getType()->isPointerTy()) -          continue; -        unsigned CS1ArgIdx = std::distance(CS1.arg_begin(), I); -        auto CS1ArgLoc = MemoryLocation::getForArgument(CS1, CS1ArgIdx, *TLI); - -        // ArgMask indicates what CS1 might do to CS1ArgLoc; if CS1 might Mod -        // CS1ArgLoc, then we care about either a Mod or a Ref by CS2. If CS1 -        // might Ref, then we care only about a Mod by CS2. -        ModRefResult ArgMask = getArgModRefInfo(CS1, CS1ArgIdx); -        ModRefResult ArgR = getModRefInfo(CS2, CS1ArgLoc); -        if (((ArgMask & Mod) != NoModRef && (ArgR & ModRef) != NoModRef) || -            ((ArgMask & Ref) != NoModRef && (ArgR & Mod)    != NoModRef)) -          R = ModRefResult((R | ArgMask) & Mask); - -        if (R == Mask) -          break; -      } -    } -    return R; -  } +  for (const auto &AA : AAs) { +    Result = ModRefInfo(Result & AA->getModRefInfo(CS, Loc)); -  // If this is the end of the chain, don't forward. -  if (!AA) return Mask; +    // Early-exit the moment we reach the bottom of the lattice. +    if (Result == MRI_NoModRef) +      return Result; +  } -  // Otherwise, fall back to the next AA in the chain. But we can merge -  // in any mask we've managed to compute. -  return ModRefResult(AA->getModRefInfo(CS1, CS2) & Mask); +  return Result;  } -AliasAnalysis::ModRefBehavior -AliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { -  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); +ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS1, +                                    ImmutableCallSite CS2) { +  ModRefInfo Result = MRI_ModRef; + +  for (const auto &AA : AAs) { +    Result = ModRefInfo(Result & AA->getModRefInfo(CS1, CS2)); + +    // Early-exit the moment we reach the bottom of the lattice. +    if (Result == MRI_NoModRef) +      return Result; +  } + +  return Result; +} -  ModRefBehavior Min = UnknownModRefBehavior; +FunctionModRefBehavior AAResults::getModRefBehavior(ImmutableCallSite CS) { +  FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior; -  // Call back into the alias analysis with the other form of getModRefBehavior -  // to see if it can give a better response. -  if (const Function *F = CS.getCalledFunction()) -    Min = getModRefBehavior(F); +  for (const auto &AA : AAs) { +    Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(CS)); -  // If this is the end of the chain, don't forward. -  if (!AA) return Min; +    // Early-exit the moment we reach the bottom of the lattice. +    if (Result == FMRB_DoesNotAccessMemory) +      return Result; +  } -  // Otherwise, fall back to the next AA in the chain. But we can merge -  // in any result we've managed to compute. -  return ModRefBehavior(AA->getModRefBehavior(CS) & Min); +  return Result;  } -AliasAnalysis::ModRefBehavior -AliasAnalysis::getModRefBehavior(const Function *F) { -  assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); -  return AA->getModRefBehavior(F); +FunctionModRefBehavior AAResults::getModRefBehavior(const Function *F) { +  FunctionModRefBehavior Result = FMRB_UnknownModRefBehavior; + +  for (const auto &AA : AAs) { +    Result = FunctionModRefBehavior(Result & AA->getModRefBehavior(F)); + +    // Early-exit the moment we reach the bottom of the lattice. +    if (Result == FMRB_DoesNotAccessMemory) +      return Result; +  } + +  return Result;  }  //===----------------------------------------------------------------------===// -// AliasAnalysis non-virtual helper method implementation +// Helper method implementation  //===----------------------------------------------------------------------===// -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const LoadInst *L, const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const LoadInst *L, +                                    const MemoryLocation &Loc) {    // Be conservative in the face of volatile/atomic.    if (!L->isUnordered()) -    return ModRef; +    return MRI_ModRef;    // If the load address doesn't alias the given address, it doesn't read    // or write the specified memory.    if (Loc.Ptr && !alias(MemoryLocation::get(L), Loc)) -    return NoModRef; +    return MRI_NoModRef;    // Otherwise, a load just reads. -  return Ref; +  return MRI_Ref;  } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const StoreInst *S, const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const StoreInst *S, +                                    const MemoryLocation &Loc) {    // Be conservative in the face of volatile/atomic.    if (!S->isUnordered()) -    return ModRef; +    return MRI_ModRef;    if (Loc.Ptr) {      // If the store address cannot alias the pointer in question, then the      // specified memory cannot be modified by the store.      if (!alias(MemoryLocation::get(S), Loc)) -      return NoModRef; +      return MRI_NoModRef;      // If the pointer is a pointer to constant memory, then it could not have      // been modified by this store.      if (pointsToConstantMemory(Loc)) -      return NoModRef; - +      return MRI_NoModRef;    }    // Otherwise, a store just writes. -  return Mod; +  return MRI_Mod;  } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const VAArgInst *V, +                                    const MemoryLocation &Loc) {    if (Loc.Ptr) {      // If the va_arg address cannot alias the pointer in question, then the      // specified memory cannot be accessed by the va_arg.      if (!alias(MemoryLocation::get(V), Loc)) -      return NoModRef; +      return MRI_NoModRef;      // If the pointer is a pointer to constant memory, then it could not have      // been modified by this va_arg.      if (pointsToConstantMemory(Loc)) -      return NoModRef; +      return MRI_NoModRef;    }    // Otherwise, a va_arg reads and writes. -  return ModRef; +  return MRI_ModRef; +} + +ModRefInfo AAResults::getModRefInfo(const CatchPadInst *CatchPad, +                                    const MemoryLocation &Loc) { +  if (Loc.Ptr) { +    // If the pointer is a pointer to constant memory, +    // then it could not have been modified by this catchpad. +    if (pointsToConstantMemory(Loc)) +      return MRI_NoModRef; +  } + +  // Otherwise, a catchpad reads and writes. +  return MRI_ModRef; +} + +ModRefInfo AAResults::getModRefInfo(const CatchReturnInst *CatchRet, +                                    const MemoryLocation &Loc) { +  if (Loc.Ptr) { +    // If the pointer is a pointer to constant memory, +    // then it could not have been modified by this catchpad. +    if (pointsToConstantMemory(Loc)) +      return MRI_NoModRef; +  } + +  // Otherwise, a catchret reads and writes. +  return MRI_ModRef;  } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const AtomicCmpXchgInst *CX, -                             const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const AtomicCmpXchgInst *CX, +                                    const MemoryLocation &Loc) {    // Acquire/Release cmpxchg has properties that matter for arbitrary addresses.    if (CX->getSuccessOrdering() > Monotonic) -    return ModRef; +    return MRI_ModRef;    // If the cmpxchg address does not alias the location, it does not access it.    if (Loc.Ptr && !alias(MemoryLocation::get(CX), Loc)) -    return NoModRef; +    return MRI_NoModRef; -  return ModRef; +  return MRI_ModRef;  } -AliasAnalysis::ModRefResult -AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, -                             const MemoryLocation &Loc) { +ModRefInfo AAResults::getModRefInfo(const AtomicRMWInst *RMW, +                                    const MemoryLocation &Loc) {    // Acquire/Release atomicrmw has properties that matter for arbitrary addresses.    if (RMW->getOrdering() > Monotonic) -    return ModRef; +    return MRI_ModRef;    // If the atomicrmw address does not alias the location, it does not access it.    if (Loc.Ptr && !alias(MemoryLocation::get(RMW), Loc)) -    return NoModRef; +    return MRI_NoModRef; -  return ModRef; +  return MRI_ModRef;  } -// FIXME: this is really just shoring-up a deficiency in alias analysis. -// BasicAA isn't willing to spend linear time determining whether an alloca -// was captured before or after this particular call, while we are. However, -// with a smarter AA in place, this test is just wasting compile time. -AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore( -    const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT) { +/// \brief Return information about whether a particular call site modifies +/// or reads the specified memory location \p MemLoc before instruction \p I +/// in a BasicBlock. A ordered basic block \p OBB can be used to speed up +/// instruction-ordering queries inside the BasicBlock containing \p I. +/// FIXME: this is really just shoring-up a deficiency in alias analysis. +/// BasicAA isn't willing to spend linear time determining whether an alloca +/// was captured before or after this particular call, while we are. However, +/// with a smarter AA in place, this test is just wasting compile time. +ModRefInfo AAResults::callCapturesBefore(const Instruction *I, +                                         const MemoryLocation &MemLoc, +                                         DominatorTree *DT, +                                         OrderedBasicBlock *OBB) {    if (!DT) -    return AliasAnalysis::ModRef; +    return MRI_ModRef; -  const Value *Object = GetUnderlyingObject(MemLoc.Ptr, *DL); +  const Value *Object = +      GetUnderlyingObject(MemLoc.Ptr, I->getModule()->getDataLayout());    if (!isIdentifiedObject(Object) || isa<GlobalValue>(Object) ||        isa<Constant>(Object)) -    return AliasAnalysis::ModRef; +    return MRI_ModRef;    ImmutableCallSite CS(I);    if (!CS.getInstruction() || CS.getInstruction() == Object) -    return AliasAnalysis::ModRef; +    return MRI_ModRef;    if (llvm::PointerMayBeCapturedBefore(Object, /* ReturnCaptures */ true,                                         /* StoreCaptures */ true, I, DT, -                                       /* include Object */ true)) -    return AliasAnalysis::ModRef; +                                       /* include Object */ true, +                                       /* OrderedBasicBlock */ OBB)) +    return MRI_ModRef;    unsigned ArgNo = 0; -  AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef; +  ModRefInfo R = MRI_NoModRef;    for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();         CI != CE; ++CI, ++ArgNo) {      // Only look at the no-capture or byval pointer arguments.  If this @@ -389,50 +350,20 @@ AliasAnalysis::ModRefResult AliasAnalysis::callCapturesBefore(      if (CS.doesNotAccessMemory(ArgNo))        continue;      if (CS.onlyReadsMemory(ArgNo)) { -      R = AliasAnalysis::Ref; +      R = MRI_Ref;        continue;      } -    return AliasAnalysis::ModRef; +    return MRI_ModRef;    }    return R;  } -// AliasAnalysis destructor: DO NOT move this to the header file for -// AliasAnalysis or else clients of the AliasAnalysis class may not depend on -// the AliasAnalysis.o file in the current .a file, causing alias analysis -// support to not be included in the tool correctly! -// -AliasAnalysis::~AliasAnalysis() {} - -/// InitializeAliasAnalysis - Subclasses must call this method to initialize the -/// AliasAnalysis interface before any other methods are called. -/// -void AliasAnalysis::InitializeAliasAnalysis(Pass *P, const DataLayout *NewDL) { -  DL = NewDL; -  auto *TLIP = P->getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>(); -  TLI = TLIP ? &TLIP->getTLI() : nullptr; -  AA = &P->getAnalysis<AliasAnalysis>(); -} - -// getAnalysisUsage - All alias analysis implementations should invoke this -// directly (using AliasAnalysis::getAnalysisUsage(AU)). -void AliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.addRequired<AliasAnalysis>();         // All AA's chain -} - -/// getTypeStoreSize - Return the DataLayout store size for the given type, -/// if known, or a conservative value otherwise. -/// -uint64_t AliasAnalysis::getTypeStoreSize(Type *Ty) { -  return DL ? DL->getTypeStoreSize(Ty) : MemoryLocation::UnknownSize; -} -  /// canBasicBlockModify - Return true if it is possible for execution of the  /// specified basic block to modify the location Loc.  /// -bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB, -                                        const MemoryLocation &Loc) { -  return canInstructionRangeModRef(BB.front(), BB.back(), Loc, Mod); +bool AAResults::canBasicBlockModify(const BasicBlock &BB, +                                    const MemoryLocation &Loc) { +  return canInstructionRangeModRef(BB.front(), BB.back(), Loc, MRI_Mod);  }  /// canInstructionRangeModRef - Return true if it is possible for the @@ -440,28 +371,178 @@ bool AliasAnalysis::canBasicBlockModify(const BasicBlock &BB,  /// mode) the location Loc. The instructions to consider are all  /// of the instructions in the range of [I1,I2] INCLUSIVE.  /// I1 and I2 must be in the same basic block. -bool AliasAnalysis::canInstructionRangeModRef(const Instruction &I1, -                                              const Instruction &I2, -                                              const MemoryLocation &Loc, -                                              const ModRefResult Mode) { +bool AAResults::canInstructionRangeModRef(const Instruction &I1, +                                          const Instruction &I2, +                                          const MemoryLocation &Loc, +                                          const ModRefInfo Mode) {    assert(I1.getParent() == I2.getParent() &&           "Instructions not in same basic block!"); -  BasicBlock::const_iterator I = &I1; -  BasicBlock::const_iterator E = &I2; +  BasicBlock::const_iterator I = I1.getIterator(); +  BasicBlock::const_iterator E = I2.getIterator();    ++E;  // Convert from inclusive to exclusive range.    for (; I != E; ++I) // Check every instruction in range -    if (getModRefInfo(I, Loc) & Mode) +    if (getModRefInfo(&*I, Loc) & Mode)        return true;    return false;  } +// Provide a definition for the root virtual destructor. +AAResults::Concept::~Concept() {} + +namespace { +/// A wrapper pass for external alias analyses. This just squirrels away the +/// callback used to run any analyses and register their results. +struct ExternalAAWrapperPass : ImmutablePass { +  typedef std::function<void(Pass &, Function &, AAResults &)> CallbackT; + +  CallbackT CB; + +  static char ID; + +  ExternalAAWrapperPass() : ImmutablePass(ID) { +    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); +  } +  explicit ExternalAAWrapperPass(CallbackT CB) +      : ImmutablePass(ID), CB(std::move(CB)) { +    initializeExternalAAWrapperPassPass(*PassRegistry::getPassRegistry()); +  } + +  void getAnalysisUsage(AnalysisUsage &AU) const override { +    AU.setPreservesAll(); +  } +}; +} + +char ExternalAAWrapperPass::ID = 0; +INITIALIZE_PASS(ExternalAAWrapperPass, "external-aa", "External Alias Analysis", +                false, true) + +ImmutablePass * +llvm::createExternalAAWrapperPass(ExternalAAWrapperPass::CallbackT Callback) { +  return new ExternalAAWrapperPass(std::move(Callback)); +} + +AAResultsWrapperPass::AAResultsWrapperPass() : FunctionPass(ID) { +  initializeAAResultsWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +char AAResultsWrapperPass::ID = 0; + +INITIALIZE_PASS_BEGIN(AAResultsWrapperPass, "aa", +                      "Function Alias Analysis Results", false, true) +INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(CFLAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ExternalAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ObjCARCAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScopedNoAliasAAWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TypeBasedAAWrapperPass) +INITIALIZE_PASS_END(AAResultsWrapperPass, "aa", +                    "Function Alias Analysis Results", false, true) + +FunctionPass *llvm::createAAResultsWrapperPass() { +  return new AAResultsWrapperPass(); +} + +/// Run the wrapper pass to rebuild an aggregation over known AA passes. +/// +/// This is the legacy pass manager's interface to the new-style AA results +/// aggregation object. Because this is somewhat shoe-horned into the legacy +/// pass manager, we hard code all the specific alias analyses available into +/// it. While the particular set enabled is configured via commandline flags, +/// adding a new alias analysis to LLVM will require adding support for it to +/// this list. +bool AAResultsWrapperPass::runOnFunction(Function &F) { +  // NB! This *must* be reset before adding new AA results to the new +  // AAResults object because in the legacy pass manager, each instance +  // of these will refer to the *same* immutable analyses, registering and +  // unregistering themselves with them. We need to carefully tear down the +  // previous object first, in this case replacing it with an empty one, before +  // registering new results. +  AAR.reset(new AAResults()); + +  // BasicAA is always available for function analyses. Also, we add it first +  // so that it can trump TBAA results when it proves MustAlias. +  // FIXME: TBAA should have an explicit mode to support this and then we +  // should reconsider the ordering here. +  if (!DisableBasicAA) +    AAR->addAAResult(getAnalysis<BasicAAWrapperPass>().getResult()); + +  // Populate the results with the currently available AAs. +  if (auto *WrapperPass = getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>()) +    AAR->addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = getAnalysisIfAvailable<TypeBasedAAWrapperPass>()) +    AAR->addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = +          getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>()) +    AAR->addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = getAnalysisIfAvailable<GlobalsAAWrapperPass>()) +    AAR->addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = getAnalysisIfAvailable<SCEVAAWrapperPass>()) +    AAR->addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = getAnalysisIfAvailable<CFLAAWrapperPass>()) +    AAR->addAAResult(WrapperPass->getResult()); + +  // If available, run an external AA providing callback over the results as +  // well. +  if (auto *WrapperPass = getAnalysisIfAvailable<ExternalAAWrapperPass>()) +    if (WrapperPass->CB) +      WrapperPass->CB(*this, F, *AAR); + +  // Analyses don't mutate the IR, so return false. +  return false; +} + +void AAResultsWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequired<BasicAAWrapperPass>(); + +  // We also need to mark all the alias analysis passes we will potentially +  // probe in runOnFunction as used here to ensure the legacy pass manager +  // preserves them. This hard coding of lists of alias analyses is specific to +  // the legacy pass manager. +  AU.addUsedIfAvailable<ScopedNoAliasAAWrapperPass>(); +  AU.addUsedIfAvailable<TypeBasedAAWrapperPass>(); +  AU.addUsedIfAvailable<objcarc::ObjCARCAAWrapperPass>(); +  AU.addUsedIfAvailable<GlobalsAAWrapperPass>(); +  AU.addUsedIfAvailable<SCEVAAWrapperPass>(); +  AU.addUsedIfAvailable<CFLAAWrapperPass>(); +} + +AAResults llvm::createLegacyPMAAResults(Pass &P, Function &F, +                                        BasicAAResult &BAR) { +  AAResults AAR; + +  // Add in our explicitly constructed BasicAA results. +  if (!DisableBasicAA) +    AAR.addAAResult(BAR); + +  // Populate the results with the other currently available AAs. +  if (auto *WrapperPass = +          P.getAnalysisIfAvailable<ScopedNoAliasAAWrapperPass>()) +    AAR.addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = P.getAnalysisIfAvailable<TypeBasedAAWrapperPass>()) +    AAR.addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = +          P.getAnalysisIfAvailable<objcarc::ObjCARCAAWrapperPass>()) +    AAR.addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = P.getAnalysisIfAvailable<GlobalsAAWrapperPass>()) +    AAR.addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = P.getAnalysisIfAvailable<SCEVAAWrapperPass>()) +    AAR.addAAResult(WrapperPass->getResult()); +  if (auto *WrapperPass = P.getAnalysisIfAvailable<CFLAAWrapperPass>()) +    AAR.addAAResult(WrapperPass->getResult()); + +  return AAR; +} +  /// isNoAliasCall - Return true if this pointer is returned by a noalias  /// function.  bool llvm::isNoAliasCall(const Value *V) { -  if (isa<CallInst>(V) || isa<InvokeInst>(V)) -    return ImmutableCallSite(cast<Instruction>(V)) -      .paramHasAttr(0, Attribute::NoAlias); +  if (auto CS = ImmutableCallSite(V)) +    return CS.paramHasAttr(0, Attribute::NoAlias);    return false;  } diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp deleted file mode 100644 index 9b6a5a44d80c..000000000000 --- a/contrib/llvm/lib/Analysis/AliasAnalysisCounter.cpp +++ /dev/null @@ -1,173 +0,0 @@ -//===- AliasAnalysisCounter.cpp - Alias Analysis Query Counter ------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass which can be used to count how many alias queries -// are being made and how the alias analysis implementation being used responds. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -static cl::opt<bool> -PrintAll("count-aa-print-all-queries", cl::ReallyHidden, cl::init(true)); -static cl::opt<bool> -PrintAllFailures("count-aa-print-all-failed-queries", cl::ReallyHidden); - -namespace { -  class AliasAnalysisCounter : public ModulePass, public AliasAnalysis { -    unsigned No, May, Partial, Must; -    unsigned NoMR, JustRef, JustMod, MR; -    Module *M; -  public: -    static char ID; // Class identification, replacement for typeinfo -    AliasAnalysisCounter() : ModulePass(ID) { -      initializeAliasAnalysisCounterPass(*PassRegistry::getPassRegistry()); -      No = May = Partial = Must = 0; -      NoMR = JustRef = JustMod = MR = 0; -    } - -    void printLine(const char *Desc, unsigned Val, unsigned Sum) { -      errs() <<  "  " << Val << " " << Desc << " responses (" -             << Val*100/Sum << "%)\n"; -    } -    ~AliasAnalysisCounter() override { -      unsigned AASum = No+May+Partial+Must; -      unsigned MRSum = NoMR+JustRef+JustMod+MR; -      if (AASum + MRSum) { // Print a report if any counted queries occurred... -        errs() << "\n===== Alias Analysis Counter Report =====\n" -               << "  Analysis counted:\n" -               << "  " << AASum << " Total Alias Queries Performed\n"; -        if (AASum) { -          printLine("no alias",     No, AASum); -          printLine("may alias",   May, AASum); -          printLine("partial alias", Partial, AASum); -          printLine("must alias", Must, AASum); -          errs() << "  Alias Analysis Counter Summary: " << No*100/AASum << "%/" -                 << May*100/AASum << "%/" -                 << Partial*100/AASum << "%/" -                 << Must*100/AASum<<"%\n\n"; -        } - -        errs() << "  " << MRSum    << " Total Mod/Ref Queries Performed\n"; -        if (MRSum) { -          printLine("no mod/ref",    NoMR, MRSum); -          printLine("ref",        JustRef, MRSum); -          printLine("mod",        JustMod, MRSum); -          printLine("mod/ref",         MR, MRSum); -          errs() << "  Mod/Ref Analysis Counter Summary: " <<NoMR*100/MRSum -                 << "%/" << JustRef*100/MRSum << "%/" << JustMod*100/MRSum -                 << "%/" << MR*100/MRSum <<"%\n\n"; -        } -      } -    } - -    bool runOnModule(Module &M) override { -      this->M = &M; -      InitializeAliasAnalysis(this, &M.getDataLayout()); -      return false; -    } - -    void getAnalysisUsage(AnalysisUsage &AU) const override { -      AliasAnalysis::getAnalysisUsage(AU); -      AU.addRequired<AliasAnalysis>(); -      AU.setPreservesAll(); -    } - -    /// getAdjustedAnalysisPointer - This method is used when a pass implements -    /// an analysis interface through multiple inheritance.  If needed, it -    /// should override this to adjust the this pointer as needed for the -    /// specified pass info. -    void *getAdjustedAnalysisPointer(AnalysisID PI) override { -      if (PI == &AliasAnalysis::ID) -        return (AliasAnalysis*)this; -      return this; -    } -     -    // FIXME: We could count these too... -    bool pointsToConstantMemory(const MemoryLocation &Loc, -                                bool OrLocal) override { -      return getAnalysis<AliasAnalysis>().pointsToConstantMemory(Loc, OrLocal); -    } - -    // Forwarding functions: just delegate to a real AA implementation, counting -    // the number of responses... -    AliasResult alias(const MemoryLocation &LocA, -                      const MemoryLocation &LocB) override; - -    ModRefResult getModRefInfo(ImmutableCallSite CS, -                               const MemoryLocation &Loc) override; -    ModRefResult getModRefInfo(ImmutableCallSite CS1, -                               ImmutableCallSite CS2) override { -      return AliasAnalysis::getModRefInfo(CS1,CS2); -    } -  }; -} - -char AliasAnalysisCounter::ID = 0; -INITIALIZE_AG_PASS(AliasAnalysisCounter, AliasAnalysis, "count-aa", -                   "Count Alias Analysis Query Responses", false, true, false) - -ModulePass *llvm::createAliasAnalysisCounterPass() { -  return new AliasAnalysisCounter(); -} - -AliasResult AliasAnalysisCounter::alias(const MemoryLocation &LocA, -                                        const MemoryLocation &LocB) { -  AliasResult R = getAnalysis<AliasAnalysis>().alias(LocA, LocB); - -  const char *AliasString = nullptr; -  switch (R) { -  case NoAlias:   No++;   AliasString = "No alias"; break; -  case MayAlias:  May++;  AliasString = "May alias"; break; -  case PartialAlias: Partial++; AliasString = "Partial alias"; break; -  case MustAlias: Must++; AliasString = "Must alias"; break; -  } - -  if (PrintAll || (PrintAllFailures && R == MayAlias)) { -    errs() << AliasString << ":\t"; -    errs() << "[" << LocA.Size << "B] "; -    LocA.Ptr->printAsOperand(errs(), true, M); -    errs() << ", "; -    errs() << "[" << LocB.Size << "B] "; -    LocB.Ptr->printAsOperand(errs(), true, M); -    errs() << "\n"; -  } - -  return R; -} - -AliasAnalysis::ModRefResult -AliasAnalysisCounter::getModRefInfo(ImmutableCallSite CS, -                                    const MemoryLocation &Loc) { -  ModRefResult R = getAnalysis<AliasAnalysis>().getModRefInfo(CS, Loc); - -  const char *MRString = nullptr; -  switch (R) { -  case NoModRef: NoMR++;     MRString = "NoModRef"; break; -  case Ref:      JustRef++;  MRString = "JustRef"; break; -  case Mod:      JustMod++;  MRString = "JustMod"; break; -  case ModRef:   MR++;       MRString = "ModRef"; break; -  } - -  if (PrintAll || (PrintAllFailures && R == ModRef)) { -    errs() << MRString << ":  Ptr: "; -    errs() << "[" << Loc.Size << "B] "; -    Loc.Ptr->printAsOperand(errs(), true, M); -    errs() << "\t<->" << *CS.getInstruction() << '\n'; -  } -  return R; -} diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp index 5d1b001fe161..12917b650e5e 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -21,8 +21,10 @@  #include "llvm/ADT/SetVector.h"  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h"  #include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h"  #include "llvm/IR/InstIterator.h"  #include "llvm/IR/Instructions.h"  #include "llvm/Pass.h" @@ -57,7 +59,7 @@ namespace {      }      void getAnalysisUsage(AnalysisUsage &AU) const override { -      AU.addRequired<AliasAnalysis>(); +      AU.addRequired<AAResultsWrapperPass>();        AU.setPreservesAll();      } @@ -81,7 +83,7 @@ namespace {  char AAEval::ID = 0;  INITIALIZE_PASS_BEGIN(AAEval, "aa-eval",                  "Exhaustive Alias Analysis Precision Evaluator", false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)  INITIALIZE_PASS_END(AAEval, "aa-eval",                  "Exhaustive Alias Analysis Precision Evaluator", false, true) @@ -139,16 +141,17 @@ static inline bool isInterestingPointer(Value *V) {  }  bool AAEval::runOnFunction(Function &F) { -  AliasAnalysis &AA = getAnalysis<AliasAnalysis>(); +  const DataLayout &DL = F.getParent()->getDataLayout(); +  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();    SetVector<Value *> Pointers; -  SetVector<CallSite> CallSites; +  SmallSetVector<CallSite, 16> CallSites;    SetVector<Value *> Loads;    SetVector<Value *> Stores; -  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) -    if (I->getType()->isPointerTy())    // Add all pointer arguments. -      Pointers.insert(I); +  for (auto &I : F.args()) +    if (I.getType()->isPointerTy())    // Add all pointer arguments. +      Pointers.insert(&I);    for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {      if (I->getType()->isPointerTy()) // Add all pointer instructions. @@ -164,10 +167,9 @@ bool AAEval::runOnFunction(Function &F) {        if (!isa<Function>(Callee) && isInterestingPointer(Callee))          Pointers.insert(Callee);        // Consider formals. -      for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); -           AI != AE; ++AI) -        if (isInterestingPointer(*AI)) -          Pointers.insert(*AI); +      for (Use &DataOp : CS.data_ops()) +        if (isInterestingPointer(DataOp)) +          Pointers.insert(DataOp);        CallSites.insert(CS);      } else {        // Consider all operands. @@ -188,12 +190,12 @@ bool AAEval::runOnFunction(Function &F) {         I1 != E; ++I1) {      uint64_t I1Size = MemoryLocation::UnknownSize;      Type *I1ElTy = cast<PointerType>((*I1)->getType())->getElementType(); -    if (I1ElTy->isSized()) I1Size = AA.getTypeStoreSize(I1ElTy); +    if (I1ElTy->isSized()) I1Size = DL.getTypeStoreSize(I1ElTy);      for (SetVector<Value *>::iterator I2 = Pointers.begin(); I2 != I1; ++I2) {        uint64_t I2Size = MemoryLocation::UnknownSize;        Type *I2ElTy =cast<PointerType>((*I2)->getType())->getElementType(); -      if (I2ElTy->isSized()) I2Size = AA.getTypeStoreSize(I2ElTy); +      if (I2ElTy->isSized()) I2Size = DL.getTypeStoreSize(I2ElTy);        switch (AA.alias(*I1, I1Size, *I2, I2Size)) {        case NoAlias: @@ -281,30 +283,29 @@ bool AAEval::runOnFunction(Function &F) {    }    // Mod/ref alias analysis: compare all pairs of calls and values -  for (SetVector<CallSite>::iterator C = CallSites.begin(), -         Ce = CallSites.end(); C != Ce; ++C) { +  for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) {      Instruction *I = C->getInstruction();      for (SetVector<Value *>::iterator V = Pointers.begin(), Ve = Pointers.end();           V != Ve; ++V) {        uint64_t Size = MemoryLocation::UnknownSize;        Type *ElTy = cast<PointerType>((*V)->getType())->getElementType(); -      if (ElTy->isSized()) Size = AA.getTypeStoreSize(ElTy); +      if (ElTy->isSized()) Size = DL.getTypeStoreSize(ElTy);        switch (AA.getModRefInfo(*C, *V, Size)) { -      case AliasAnalysis::NoModRef: +      case MRI_NoModRef:          PrintModRefResults("NoModRef", PrintNoModRef, I, *V, F.getParent());          ++NoModRefCount;          break; -      case AliasAnalysis::Mod: +      case MRI_Mod:          PrintModRefResults("Just Mod", PrintMod, I, *V, F.getParent());          ++ModCount;          break; -      case AliasAnalysis::Ref: +      case MRI_Ref:          PrintModRefResults("Just Ref", PrintRef, I, *V, F.getParent());          ++RefCount;          break; -      case AliasAnalysis::ModRef: +      case MRI_ModRef:          PrintModRefResults("Both ModRef", PrintModRef, I, *V, F.getParent());          ++ModRefCount;          break; @@ -313,25 +314,24 @@ bool AAEval::runOnFunction(Function &F) {    }    // Mod/ref alias analysis: compare all pairs of calls -  for (SetVector<CallSite>::iterator C = CallSites.begin(), -         Ce = CallSites.end(); C != Ce; ++C) { -    for (SetVector<CallSite>::iterator D = CallSites.begin(); D != Ce; ++D) { +  for (auto C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { +    for (auto D = CallSites.begin(); D != Ce; ++D) {        if (D == C)          continue;        switch (AA.getModRefInfo(*C, *D)) { -      case AliasAnalysis::NoModRef: +      case MRI_NoModRef:          PrintModRefResults("NoModRef", PrintNoModRef, *C, *D, F.getParent());          ++NoModRefCount;          break; -      case AliasAnalysis::Mod: +      case MRI_Mod:          PrintModRefResults("Just Mod", PrintMod, *C, *D, F.getParent());          ++ModCount;          break; -      case AliasAnalysis::Ref: +      case MRI_Ref:          PrintModRefResults("Just Ref", PrintRef, *C, *D, F.getParent());          ++RefCount;          break; -      case AliasAnalysis::ModRef: +      case MRI_ModRef:          PrintModRefResults("Both ModRef", PrintModRef, *C, *D, F.getParent());          ++ModRefCount;          break; diff --git a/contrib/llvm/lib/Analysis/AliasDebugger.cpp b/contrib/llvm/lib/Analysis/AliasDebugger.cpp deleted file mode 100644 index e5107b3bc827..000000000000 --- a/contrib/llvm/lib/Analysis/AliasDebugger.cpp +++ /dev/null @@ -1,136 +0,0 @@ -//===- AliasDebugger.cpp - Simple Alias Analysis Use Checker --------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This simple pass checks alias analysis users to ensure that if they -// create a new value, they do not query AA without informing it of the value. -// It acts as a shim over any other AA pass you want. -// -// Yes keeping track of every value in the program is expensive, but this is  -// a debugging pass. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include <set> -using namespace llvm; - -namespace { -   -  class AliasDebugger : public ModulePass, public AliasAnalysis { - -    //What we do is simple.  Keep track of every value the AA could -    //know about, and verify that queries are one of those. -    //A query to a value that didn't exist when the AA was created -    //means someone forgot to update the AA when creating new values - -    std::set<const Value*> Vals; -     -  public: -    static char ID; // Class identification, replacement for typeinfo -    AliasDebugger() : ModulePass(ID) { -      initializeAliasDebuggerPass(*PassRegistry::getPassRegistry()); -    } - -    bool runOnModule(Module &M) override { -      InitializeAliasAnalysis(this, &M.getDataLayout()); // set up super class - -      for(Module::global_iterator I = M.global_begin(), -            E = M.global_end(); I != E; ++I) { -        Vals.insert(&*I); -        for (User::const_op_iterator OI = I->op_begin(), -             OE = I->op_end(); OI != OE; ++OI) -          Vals.insert(*OI); -      } - -      for(Module::iterator I = M.begin(), -            E = M.end(); I != E; ++I){ -        Vals.insert(&*I); -        if(!I->isDeclaration()) { -          for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); -               AI != AE; ++AI)  -            Vals.insert(&*AI);      -          for (Function::const_iterator FI = I->begin(), FE = I->end(); -               FI != FE; ++FI)  -            for (BasicBlock::const_iterator BI = FI->begin(), BE = FI->end(); -                 BI != BE; ++BI) { -              Vals.insert(&*BI); -              for (User::const_op_iterator OI = BI->op_begin(), -                   OE = BI->op_end(); OI != OE; ++OI) -                Vals.insert(*OI); -            } -        } -         -      } -      return false; -    } - -    void getAnalysisUsage(AnalysisUsage &AU) const override { -      AliasAnalysis::getAnalysisUsage(AU); -      AU.setPreservesAll();                         // Does not transform code -    } - -    /// getAdjustedAnalysisPointer - This method is used when a pass implements -    /// an analysis interface through multiple inheritance.  If needed, it -    /// should override this to adjust the this pointer as needed for the -    /// specified pass info. -    void *getAdjustedAnalysisPointer(AnalysisID PI) override { -      if (PI == &AliasAnalysis::ID) -        return (AliasAnalysis*)this; -      return this; -    } -     -    //------------------------------------------------ -    // Implement the AliasAnalysis API -    // -    AliasResult alias(const MemoryLocation &LocA, -                      const MemoryLocation &LocB) override { -      assert(Vals.find(LocA.Ptr) != Vals.end() && -             "Never seen value in AA before"); -      assert(Vals.find(LocB.Ptr) != Vals.end() && -             "Never seen value in AA before"); -      return AliasAnalysis::alias(LocA, LocB); -    } - -    ModRefResult getModRefInfo(ImmutableCallSite CS, -                               const MemoryLocation &Loc) override { -      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); -      return AliasAnalysis::getModRefInfo(CS, Loc); -    } - -    ModRefResult getModRefInfo(ImmutableCallSite CS1, -                               ImmutableCallSite CS2) override { -      return AliasAnalysis::getModRefInfo(CS1,CS2); -    } - -    bool pointsToConstantMemory(const MemoryLocation &Loc, -                                bool OrLocal) override { -      assert(Vals.find(Loc.Ptr) != Vals.end() && "Never seen value in AA before"); -      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); -    } - -    void deleteValue(Value *V) override { -      assert(Vals.find(V) != Vals.end() && "Never seen value in AA before"); -      AliasAnalysis::deleteValue(V); -    } - -  }; -} - -char AliasDebugger::ID = 0; -INITIALIZE_AG_PASS(AliasDebugger, AliasAnalysis, "debug-aa", -                   "AA use debugger", false, true, false) - -Pass *llvm::createAliasDebugger() { return new AliasDebugger(); } - diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 54d0f4304e1f..3094049b3cc3 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -17,6 +17,7 @@  #include "llvm/IR/InstIterator.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/Type.h"  #include "llvm/Pass.h" @@ -167,8 +168,7 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,    if (!UnknownInsts.empty()) {      for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i)        if (AA.getModRefInfo(UnknownInsts[i], -                           MemoryLocation(Ptr, Size, AAInfo)) != -          AliasAnalysis::NoModRef) +                           MemoryLocation(Ptr, Size, AAInfo)) != MRI_NoModRef)          return true;    } @@ -182,16 +182,14 @@ bool AliasSet::aliasesUnknownInst(const Instruction *Inst,    for (unsigned i = 0, e = UnknownInsts.size(); i != e; ++i) {      ImmutableCallSite C1(getUnknownInst(i)), C2(Inst); -    if (!C1 || !C2 || -        AA.getModRefInfo(C1, C2) != AliasAnalysis::NoModRef || -        AA.getModRefInfo(C2, C1) != AliasAnalysis::NoModRef) +    if (!C1 || !C2 || AA.getModRefInfo(C1, C2) != MRI_NoModRef || +        AA.getModRefInfo(C2, C1) != MRI_NoModRef)        return true;    }    for (iterator I = begin(), E = end(); I != E; ++I) -    if (AA.getModRefInfo( -            Inst, MemoryLocation(I.getPointer(), I.getSize(), I.getAAInfo())) != -        AliasAnalysis::NoModRef) +    if (AA.getModRefInfo(Inst, MemoryLocation(I.getPointer(), I.getSize(), +                                              I.getAAInfo())) != MRI_NoModRef)        return true;    return false; @@ -223,7 +221,7 @@ AliasSet *AliasSetTracker::findAliasSetForPointer(const Value *Ptr,      if (Cur->Forward || !Cur->aliasesPointer(Ptr, Size, AAInfo, AA)) continue;      if (!FoundSet) {      // If this is the first alias set ptr can go into. -      FoundSet = Cur;     // Remember it. +      FoundSet = &*Cur;   // Remember it.      } else {              // Otherwise, we must merge the sets.        FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.      } @@ -257,7 +255,7 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {      if (Cur->Forward || !Cur->aliasesUnknownInst(Inst, AA))        continue;      if (!FoundSet)            // If this is the first alias set ptr can go into. -      FoundSet = Cur;         // Remember it. +      FoundSet = &*Cur;       // Remember it.      else if (!Cur->Forward)   // Otherwise, we must merge the sets.        FoundSet->mergeSetIn(*Cur, *this);     // Merge in contents.    } @@ -309,8 +307,9 @@ bool AliasSetTracker::add(LoadInst *LI) {    AliasSet::AccessLattice Access = AliasSet::RefAccess;    bool NewPtr; +  const DataLayout &DL = LI->getModule()->getDataLayout();    AliasSet &AS = addPointer(LI->getOperand(0), -                            AA.getTypeStoreSize(LI->getType()), +                            DL.getTypeStoreSize(LI->getType()),                              AAInfo, Access, NewPtr);    if (LI->isVolatile()) AS.setVolatile();    return NewPtr; @@ -324,9 +323,10 @@ bool AliasSetTracker::add(StoreInst *SI) {    AliasSet::AccessLattice Access = AliasSet::ModAccess;    bool NewPtr; +  const DataLayout &DL = SI->getModule()->getDataLayout();    Value *Val = SI->getOperand(0);    AliasSet &AS = addPointer(SI->getOperand(1), -                            AA.getTypeStoreSize(Val->getType()), +                            DL.getTypeStoreSize(Val->getType()),                              AAInfo, Access, NewPtr);    if (SI->isVolatile()) AS.setVolatile();    return NewPtr; @@ -372,8 +372,8 @@ bool AliasSetTracker::add(Instruction *I) {  }  void AliasSetTracker::add(BasicBlock &BB) { -  for (BasicBlock::iterator I = BB.begin(), E = BB.end(); I != E; ++I) -    add(I); +  for (auto &I : BB) +    add(&I);  }  void AliasSetTracker::add(const AliasSetTracker &AST) { @@ -443,7 +443,8 @@ AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {  }  bool AliasSetTracker::remove(LoadInst *LI) { -  uint64_t Size = AA.getTypeStoreSize(LI->getType()); +  const DataLayout &DL = LI->getModule()->getDataLayout(); +  uint64_t Size = DL.getTypeStoreSize(LI->getType());    AAMDNodes AAInfo;    LI->getAAMetadata(AAInfo); @@ -455,7 +456,8 @@ bool AliasSetTracker::remove(LoadInst *LI) {  }  bool AliasSetTracker::remove(StoreInst *SI) { -  uint64_t Size = AA.getTypeStoreSize(SI->getOperand(0)->getType()); +  const DataLayout &DL = SI->getModule()->getDataLayout(); +  uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType());    AAMDNodes AAInfo;    SI->getAAMetadata(AAInfo); @@ -505,9 +507,6 @@ bool AliasSetTracker::remove(Instruction *I) {  // dangling pointers to deleted instructions.  //  void AliasSetTracker::deleteValue(Value *PtrVal) { -  // Notify the alias analysis implementation that this value is gone. -  AA.deleteValue(PtrVal); -    // If this is a call instruction, remove the callsite from the appropriate    // AliasSet (if present).    if (Instruction *Inst = dyn_cast<Instruction>(PtrVal)) { @@ -650,11 +649,12 @@ namespace {      void getAnalysisUsage(AnalysisUsage &AU) const override {        AU.setPreservesAll(); -      AU.addRequired<AliasAnalysis>(); +      AU.addRequired<AAResultsWrapperPass>();      }      bool runOnFunction(Function &F) override { -      Tracker = new AliasSetTracker(getAnalysis<AliasAnalysis>()); +      auto &AAWP = getAnalysis<AAResultsWrapperPass>(); +      Tracker = new AliasSetTracker(AAWP.getAAResults());        for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)          Tracker->add(&*I); @@ -668,6 +668,6 @@ namespace {  char AliasSetPrinter::ID = 0;  INITIALIZE_PASS_BEGIN(AliasSetPrinter, "print-alias-sets",                  "Alias Set Printer", false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)  INITIALIZE_PASS_END(AliasSetPrinter, "print-alias-sets",                  "Alias Set Printer", false, true) diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index 842ff0a14e2f..9c1ac000be2c 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -20,23 +20,23 @@ using namespace llvm;  /// initializeAnalysis - Initialize all passes linked into the Analysis library.  void llvm::initializeAnalysis(PassRegistry &Registry) { -  initializeAliasAnalysisAnalysisGroup(Registry); -  initializeAliasAnalysisCounterPass(Registry);    initializeAAEvalPass(Registry); -  initializeAliasDebuggerPass(Registry);    initializeAliasSetPrinterPass(Registry); -  initializeNoAAPass(Registry); -  initializeBasicAliasAnalysisPass(Registry); -  initializeBlockFrequencyInfoPass(Registry); -  initializeBranchProbabilityInfoPass(Registry); +  initializeBasicAAWrapperPassPass(Registry); +  initializeBlockFrequencyInfoWrapperPassPass(Registry); +  initializeBranchProbabilityInfoWrapperPassPass(Registry); +  initializeCallGraphWrapperPassPass(Registry); +  initializeCallGraphPrinterPass(Registry); +  initializeCallGraphViewerPass(Registry);    initializeCostModelAnalysisPass(Registry);    initializeCFGViewerPass(Registry);    initializeCFGPrinterPass(Registry);    initializeCFGOnlyViewerPass(Registry);    initializeCFGOnlyPrinterPass(Registry); -  initializeCFLAliasAnalysisPass(Registry); +  initializeCFLAAWrapperPassPass(Registry);    initializeDependenceAnalysisPass(Registry);    initializeDelinearizationPass(Registry); +  initializeDemandedBitsPass(Registry);    initializeDivergenceAnalysisPass(Registry);    initializeDominanceFrontierPass(Registry);    initializeDomViewerPass(Registry); @@ -47,34 +47,40 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {    initializePostDomPrinterPass(Registry);    initializePostDomOnlyViewerPass(Registry);    initializePostDomOnlyPrinterPass(Registry); +  initializeAAResultsWrapperPassPass(Registry); +  initializeGlobalsAAWrapperPassPass(Registry);    initializeIVUsersPass(Registry);    initializeInstCountPass(Registry);    initializeIntervalPartitionPass(Registry);    initializeLazyValueInfoPass(Registry); -  initializeLibCallAliasAnalysisPass(Registry);    initializeLintPass(Registry);    initializeLoopInfoWrapperPassPass(Registry);    initializeMemDepPrinterPass(Registry);    initializeMemDerefPrinterPass(Registry);    initializeMemoryDependenceAnalysisPass(Registry);    initializeModuleDebugInfoPrinterPass(Registry); +  initializeObjCARCAAWrapperPassPass(Registry);    initializePostDominatorTreePass(Registry);    initializeRegionInfoPassPass(Registry);    initializeRegionViewerPass(Registry);    initializeRegionPrinterPass(Registry);    initializeRegionOnlyViewerPass(Registry);    initializeRegionOnlyPrinterPass(Registry); -  initializeScalarEvolutionPass(Registry); -  initializeScalarEvolutionAliasAnalysisPass(Registry); +  initializeSCEVAAWrapperPassPass(Registry); +  initializeScalarEvolutionWrapperPassPass(Registry);    initializeTargetTransformInfoWrapperPassPass(Registry); -  initializeTypeBasedAliasAnalysisPass(Registry); -  initializeScopedNoAliasAAPass(Registry); +  initializeTypeBasedAAWrapperPassPass(Registry); +  initializeScopedNoAliasAAWrapperPassPass(Registry);  }  void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {    initializeAnalysis(*unwrap(R));  } +void LLVMInitializeIPA(LLVMPassRegistryRef R) { +  initializeAnalysis(*unwrap(R)); +} +  LLVMBool LLVMVerifyModule(LLVMModuleRef M, LLVMVerifierFailureAction Action,                            char **OutMessages) {    raw_ostream *DebugOS = Action != LLVMReturnStatusAction ? &errs() : nullptr; diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index 35863542f437..00f346ea115d 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -13,24 +13,21 @@  //  //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/BasicAliasAnalysis.h"  #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h"  #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/CaptureTracking.h"  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/AssumptionCache.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/DerivedTypes.h"  #include "llvm/IR/Dominators.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GetElementPtrTypeIterator.h"  #include "llvm/IR/GlobalAlias.h"  #include "llvm/IR/GlobalVariable.h"  #include "llvm/IR/Instructions.h" @@ -42,6 +39,18 @@  #include <algorithm>  using namespace llvm; +/// Enable analysis of recursive PHI nodes. +static cl::opt<bool> EnableRecPhiAnalysis("basicaa-recphi", cl::Hidden, +                                          cl::init(false)); + +/// SearchLimitReached / SearchTimes shows how often the limit of +/// to decompose GEPs is reached. It will affect the precision +/// of basic alias analysis. +#define DEBUG_TYPE "basicaa" +STATISTIC(SearchLimitReached, "Number of times the limit to " +                              "decompose GEPs is reached"); +STATISTIC(SearchTimes, "Number of times a GEP is decomposed"); +  /// Cutoff after which to stop analysing a set of phi nodes potentially involved  /// in a cycle. Because we are analysing 'through' phi nodes we need to be  /// careful with value equivalence. We use reachability to make sure a value @@ -57,8 +66,8 @@ static const unsigned MaxLookupSearchDepth = 6;  // Useful predicates  //===----------------------------------------------------------------------===// -/// isNonEscapingLocalObject - Return true if the pointer is to a function-local -/// object that never escapes from the function. +/// Returns true if the pointer is to a function-local object that never +/// escapes from the function.  static bool isNonEscapingLocalObject(const Value *V) {    // If this is a local allocation, check to see if it escapes.    if (isa<AllocaInst>(V) || isNoAliasCall(V)) @@ -82,8 +91,8 @@ static bool isNonEscapingLocalObject(const Value *V) {    return false;  } -/// isEscapeSource - Return true if the pointer is one which would have -/// been considered an escape by isNonEscapingLocalObject. +/// Returns true if the pointer is one which would have been considered an +/// escape by isNonEscapingLocalObject.  static bool isEscapeSource(const Value *V) {    if (isa<CallInst>(V) || isa<InvokeInst>(V) || isa<Argument>(V))      return true; @@ -97,8 +106,7 @@ static bool isEscapeSource(const Value *V) {    return false;  } -/// getObjectSize - Return the size of the object specified by V, or -/// UnknownSize if unknown. +/// Returns the size of the object specified by V, or UnknownSize if unknown.  static uint64_t getObjectSize(const Value *V, const DataLayout &DL,                                const TargetLibraryInfo &TLI,                                bool RoundToAlign = false) { @@ -108,8 +116,8 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &DL,    return MemoryLocation::UnknownSize;  } -/// isObjectSmallerThan - Return true if we can prove that the object specified -/// by V is smaller than Size. +/// Returns true if we can prove that the object specified by V is smaller than +/// Size.  static bool isObjectSmallerThan(const Value *V, uint64_t Size,                                  const DataLayout &DL,                                  const TargetLibraryInfo &TLI) { @@ -144,15 +152,14 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size,    // This function needs to use the aligned object size because we allow    // reads a bit past the end given sufficient alignment. -  uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/true); +  uint64_t ObjectSize = getObjectSize(V, DL, TLI, /*RoundToAlign*/ true);    return ObjectSize != MemoryLocation::UnknownSize && ObjectSize < Size;  } -/// isObjectSize - Return true if we can prove that the object specified -/// by V has size Size. -static bool isObjectSize(const Value *V, uint64_t Size, -                         const DataLayout &DL, const TargetLibraryInfo &TLI) { +/// Returns true if we can prove that the object specified by V has size Size. +static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL, +                         const TargetLibraryInfo &TLI) {    uint64_t ObjectSize = getObjectSize(V, DL, TLI);    return ObjectSize != MemoryLocation::UnknownSize && ObjectSize == Size;  } @@ -161,42 +168,20 @@ static bool isObjectSize(const Value *V, uint64_t Size,  // GetElementPtr Instruction Decomposition and Analysis  //===----------------------------------------------------------------------===// -namespace { -  enum ExtensionKind { -    EK_NotExtended, -    EK_SignExt, -    EK_ZeroExt -  }; - -  struct VariableGEPIndex { -    const Value *V; -    ExtensionKind Extension; -    int64_t Scale; - -    bool operator==(const VariableGEPIndex &Other) const { -      return V == Other.V && Extension == Other.Extension && -        Scale == Other.Scale; -    } - -    bool operator!=(const VariableGEPIndex &Other) const { -      return !operator==(Other); -    } -  }; -} - - -/// GetLinearExpression - Analyze the specified value as a linear expression: -/// "A*V + B", where A and B are constant integers.  Return the scale and offset -/// values as APInts and return V as a Value*, and return whether we looked -/// through any sign or zero extends.  The incoming Value is known to have -/// IntegerType and it may already be sign or zero extended. +/// Analyzes the specified value as a linear expression: "A*V + B", where A and +/// B are constant integers. +/// +/// Returns the scale and offset values as APInts and return V as a Value*, and +/// return whether we looked through any sign or zero extends.  The incoming +/// Value is known to have IntegerType and it may already be sign or zero +/// extended.  ///  /// Note that this looks through extends, so the high bits may not be  /// represented in the result. -static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, -                                  ExtensionKind &Extension, -                                  const DataLayout &DL, unsigned Depth, -                                  AssumptionCache *AC, DominatorTree *DT) { +/*static*/ const Value *BasicAAResult::GetLinearExpression( +    const Value *V, APInt &Scale, APInt &Offset, unsigned &ZExtBits, +    unsigned &SExtBits, const DataLayout &DL, unsigned Depth, +    AssumptionCache *AC, DominatorTree *DT, bool &NSW, bool &NUW) {    assert(V->getType()->isIntegerTy() && "Not an integer value");    // Limit our recursion depth. @@ -206,54 +191,125 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,      return V;    } -  if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) { +  if (const ConstantInt *Const = dyn_cast<ConstantInt>(V)) { +    // if it's a constant, just convert it to an offset and remove the variable. +    // If we've been called recursively the Offset bit width will be greater +    // than the constant's (the Offset's always as wide as the outermost call), +    // so we'll zext here and process any extension in the isa<SExtInst> & +    // isa<ZExtInst> cases below. +    Offset += Const->getValue().zextOrSelf(Offset.getBitWidth()); +    assert(Scale == 0 && "Constant values don't have a scale"); +    return V; +  } + +  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(V)) {      if (ConstantInt *RHSC = dyn_cast<ConstantInt>(BOp->getOperand(1))) { + +      // If we've been called recursively then Offset and Scale will be wider +      // that the BOp operands. We'll always zext it here as we'll process sign +      // extensions below (see the isa<SExtInst> / isa<ZExtInst> cases). +      APInt RHS = RHSC->getValue().zextOrSelf(Offset.getBitWidth()); +        switch (BOp->getOpcode()) { -      default: break; +      default: +        // We don't understand this instruction, so we can't decompose it any +        // further. +        Scale = 1; +        Offset = 0; +        return V;        case Instruction::Or:          // X|C == X+C if all the bits in C are unset in X.  Otherwise we can't          // analyze it.          if (!MaskedValueIsZero(BOp->getOperand(0), RHSC->getValue(), DL, 0, AC, -                               BOp, DT)) -          break; -        // FALL THROUGH. +                               BOp, DT)) { +          Scale = 1; +          Offset = 0; +          return V; +        } +      // FALL THROUGH.        case Instruction::Add: -        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, -                                DL, Depth + 1, AC, DT); -        Offset += RHSC->getValue(); -        return V; +        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, +                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); +        Offset += RHS; +        break; +      case Instruction::Sub: +        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, +                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); +        Offset -= RHS; +        break;        case Instruction::Mul: -        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, -                                DL, Depth + 1, AC, DT); -        Offset *= RHSC->getValue(); -        Scale *= RHSC->getValue(); -        return V; +        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, +                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); +        Offset *= RHS; +        Scale *= RHS; +        break;        case Instruction::Shl: -        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, Extension, -                                DL, Depth + 1, AC, DT); -        Offset <<= RHSC->getValue().getLimitedValue(); -        Scale <<= RHSC->getValue().getLimitedValue(); +        V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits, +                                SExtBits, DL, Depth + 1, AC, DT, NSW, NUW); +        Offset <<= RHS.getLimitedValue(); +        Scale <<= RHS.getLimitedValue(); +        // the semantics of nsw and nuw for left shifts don't match those of +        // multiplications, so we won't propagate them. +        NSW = NUW = false;          return V;        } + +      if (isa<OverflowingBinaryOperator>(BOp)) { +        NUW &= BOp->hasNoUnsignedWrap(); +        NSW &= BOp->hasNoSignedWrap(); +      } +      return V;      }    }    // Since GEP indices are sign extended anyway, we don't care about the high    // bits of a sign or zero extended value - just scales and offsets.  The    // extensions have to be consistent though. -  if ((isa<SExtInst>(V) && Extension != EK_ZeroExt) || -      (isa<ZExtInst>(V) && Extension != EK_SignExt)) { +  if (isa<SExtInst>(V) || isa<ZExtInst>(V)) {      Value *CastOp = cast<CastInst>(V)->getOperand(0); -    unsigned OldWidth = Scale.getBitWidth(); +    unsigned NewWidth = V->getType()->getPrimitiveSizeInBits();      unsigned SmallWidth = CastOp->getType()->getPrimitiveSizeInBits(); -    Scale = Scale.trunc(SmallWidth); -    Offset = Offset.trunc(SmallWidth); -    Extension = isa<SExtInst>(V) ? EK_SignExt : EK_ZeroExt; - -    Value *Result = GetLinearExpression(CastOp, Scale, Offset, Extension, DL, -                                        Depth + 1, AC, DT); -    Scale = Scale.zext(OldWidth); -    Offset = Offset.zext(OldWidth); +    unsigned OldZExtBits = ZExtBits, OldSExtBits = SExtBits; +    const Value *Result = +        GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL, +                            Depth + 1, AC, DT, NSW, NUW); + +    // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this +    // by just incrementing the number of bits we've extended by. +    unsigned ExtendedBy = NewWidth - SmallWidth; + +    if (isa<SExtInst>(V) && ZExtBits == 0) { +      // sext(sext(%x, a), b) == sext(%x, a + b) + +      if (NSW) { +        // We haven't sign-wrapped, so it's valid to decompose sext(%x + c) +        // into sext(%x) + sext(c). We'll sext the Offset ourselves: +        unsigned OldWidth = Offset.getBitWidth(); +        Offset = Offset.trunc(SmallWidth).sext(NewWidth).zextOrSelf(OldWidth); +      } else { +        // We may have signed-wrapped, so don't decompose sext(%x + c) into +        // sext(%x) + sext(c) +        Scale = 1; +        Offset = 0; +        Result = CastOp; +        ZExtBits = OldZExtBits; +        SExtBits = OldSExtBits; +      } +      SExtBits += ExtendedBy; +    } else { +      // sext(zext(%x, a), b) = zext(zext(%x, a), b) = zext(%x, a + b) + +      if (!NUW) { +        // We may have unsigned-wrapped, so don't decompose zext(%x + c) into +        // zext(%x) + zext(c) +        Scale = 1; +        Offset = 0; +        Result = CastOp; +        ZExtBits = OldZExtBits; +        SExtBits = OldSExtBits; +      } +      ZExtBits += ExtendedBy; +    }      return Result;    } @@ -263,29 +319,27 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,    return V;  } -/// DecomposeGEPExpression - If V is a symbolic pointer expression, decompose it -/// into a base pointer with a constant offset and a number of scaled symbolic -/// offsets. +/// If V is a symbolic pointer expression, decompose it into a base pointer +/// with a constant offset and a number of scaled symbolic offsets.  /// -/// The scaled symbolic offsets (represented by pairs of a Value* and a scale in -/// the VarIndices vector) are Value*'s that are known to be scaled by the -/// specified amount, but which may have other unrepresented high bits. As such, -/// the gep cannot necessarily be reconstructed from its decomposed form. +/// The scaled symbolic offsets (represented by pairs of a Value* and a scale +/// in the VarIndices vector) are Value*'s that are known to be scaled by the +/// specified amount, but which may have other unrepresented high bits. As +/// such, the gep cannot necessarily be reconstructed from its decomposed form.  ///  /// When DataLayout is around, this function is capable of analyzing everything  /// that GetUnderlyingObject can look through. To be able to do that  /// GetUnderlyingObject and DecomposeGEPExpression must use the same search -/// depth (MaxLookupSearchDepth). -/// When DataLayout not is around, it just looks through pointer casts. -/// -static const Value * -DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, -                       SmallVectorImpl<VariableGEPIndex> &VarIndices, -                       bool &MaxLookupReached, const DataLayout &DL, -                       AssumptionCache *AC, DominatorTree *DT) { +/// depth (MaxLookupSearchDepth). When DataLayout not is around, it just looks +/// through pointer casts. +/*static*/ const Value *BasicAAResult::DecomposeGEPExpression( +    const Value *V, int64_t &BaseOffs, +    SmallVectorImpl<VariableGEPIndex> &VarIndices, bool &MaxLookupReached, +    const DataLayout &DL, AssumptionCache *AC, DominatorTree *DT) {    // Limit recursion depth to limit compile time in crazy cases.    unsigned MaxLookup = MaxLookupSearchDepth;    MaxLookupReached = false; +  SearchTimes++;    BaseOffs = 0;    do { @@ -318,7 +372,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,          // updated when GetUnderlyingObject is updated). TLI should be          // provided also.          if (const Value *Simplified = -              SimplifyInstruction(const_cast<Instruction *>(I), DL)) { +                SimplifyInstruction(const_cast<Instruction *>(I), DL)) {            V = Simplified;            continue;          } @@ -333,43 +387,47 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,      unsigned AS = GEPOp->getPointerAddressSpace();      // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.      gep_type_iterator GTI = gep_type_begin(GEPOp); -    for (User::const_op_iterator I = GEPOp->op_begin()+1, -         E = GEPOp->op_end(); I != E; ++I) { -      Value *Index = *I; +    for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end(); +         I != E; ++I) { +      const Value *Index = *I;        // Compute the (potentially symbolic) offset in bytes for this index.        if (StructType *STy = dyn_cast<StructType>(*GTI++)) {          // For a struct, add the member offset.          unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue(); -        if (FieldNo == 0) continue; +        if (FieldNo == 0) +          continue;          BaseOffs += DL.getStructLayout(STy)->getElementOffset(FieldNo);          continue;        }        // For an array/pointer, add the element offset, explicitly scaled. -      if (ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { -        if (CIdx->isZero()) continue; +      if (const ConstantInt *CIdx = dyn_cast<ConstantInt>(Index)) { +        if (CIdx->isZero()) +          continue;          BaseOffs += DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();          continue;        }        uint64_t Scale = DL.getTypeAllocSize(*GTI); -      ExtensionKind Extension = EK_NotExtended; +      unsigned ZExtBits = 0, SExtBits = 0;        // If the integer type is smaller than the pointer size, it is implicitly        // sign extended to pointer size.        unsigned Width = Index->getType()->getIntegerBitWidth(); -      if (DL.getPointerSizeInBits(AS) > Width) -        Extension = EK_SignExt; +      unsigned PointerSize = DL.getPointerSizeInBits(AS); +      if (PointerSize > Width) +        SExtBits += PointerSize - Width;        // Use GetLinearExpression to decompose the index into a C1*V+C2 form.        APInt IndexScale(Width, 0), IndexOffset(Width, 0); -      Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, DL, -                                  0, AC, DT); +      bool NSW = true, NUW = true; +      Index = GetLinearExpression(Index, IndexScale, IndexOffset, ZExtBits, +                                  SExtBits, DL, 0, AC, DT, NSW, NUW);        // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale.        // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. -      BaseOffs += IndexOffset.getSExtValue()*Scale; +      BaseOffs += IndexOffset.getSExtValue() * Scale;        Scale *= IndexScale.getSExtValue();        // If we already had an occurrence of this index variable, merge this @@ -377,23 +435,23 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,        //   A[x][x] -> x*16 + x*4 -> x*20        // This also ensures that 'x' only appears in the index list once.        for (unsigned i = 0, e = VarIndices.size(); i != e; ++i) { -        if (VarIndices[i].V == Index && -            VarIndices[i].Extension == Extension) { +        if (VarIndices[i].V == Index && VarIndices[i].ZExtBits == ZExtBits && +            VarIndices[i].SExtBits == SExtBits) {            Scale += VarIndices[i].Scale; -          VarIndices.erase(VarIndices.begin()+i); +          VarIndices.erase(VarIndices.begin() + i);            break;          }        }        // Make sure that we have a scale that makes sense for this target's        // pointer size. -      if (unsigned ShiftBits = 64 - DL.getPointerSizeInBits(AS)) { +      if (unsigned ShiftBits = 64 - PointerSize) {          Scale <<= ShiftBits;          Scale = (int64_t)Scale >> ShiftBits;        }        if (Scale) { -        VariableGEPIndex Entry = {Index, Extension, +        VariableGEPIndex Entry = {Index, ZExtBits, SExtBits,                                    static_cast<int64_t>(Scale)};          VarIndices.push_back(Entry);        } @@ -405,196 +463,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs,    // If the chain of expressions is too deep, just return early.    MaxLookupReached = true; +  SearchLimitReached++;    return V;  } -//===----------------------------------------------------------------------===// -// BasicAliasAnalysis Pass -//===----------------------------------------------------------------------===// - -#ifndef NDEBUG -static const Function *getParent(const Value *V) { -  if (const Instruction *inst = dyn_cast<Instruction>(V)) -    return inst->getParent()->getParent(); - -  if (const Argument *arg = dyn_cast<Argument>(V)) -    return arg->getParent(); - -  return nullptr; -} - -static bool notDifferentParent(const Value *O1, const Value *O2) { - -  const Function *F1 = getParent(O1); -  const Function *F2 = getParent(O2); - -  return !F1 || !F2 || F1 == F2; -} -#endif - -namespace { -  /// BasicAliasAnalysis - This is the primary alias analysis implementation. -  struct BasicAliasAnalysis : public ImmutablePass, public AliasAnalysis { -    static char ID; // Class identification, replacement for typeinfo -    BasicAliasAnalysis() : ImmutablePass(ID) { -      initializeBasicAliasAnalysisPass(*PassRegistry::getPassRegistry()); -    } - -    bool doInitialization(Module &M) override; - -    void getAnalysisUsage(AnalysisUsage &AU) const override { -      AU.addRequired<AliasAnalysis>(); -      AU.addRequired<AssumptionCacheTracker>(); -      AU.addRequired<TargetLibraryInfoWrapperPass>(); -    } - -    AliasResult alias(const MemoryLocation &LocA, -                      const MemoryLocation &LocB) override { -      assert(AliasCache.empty() && "AliasCache must be cleared after use!"); -      assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && -             "BasicAliasAnalysis doesn't support interprocedural queries."); -      AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, -                                     LocB.Ptr, LocB.Size, LocB.AATags); -      // AliasCache rarely has more than 1 or 2 elements, always use -      // shrink_and_clear so it quickly returns to the inline capacity of the -      // SmallDenseMap if it ever grows larger. -      // FIXME: This should really be shrink_to_inline_capacity_and_clear(). -      AliasCache.shrink_and_clear(); -      VisitedPhiBBs.clear(); -      return Alias; -    } - -    ModRefResult getModRefInfo(ImmutableCallSite CS, -                               const MemoryLocation &Loc) override; - -    ModRefResult getModRefInfo(ImmutableCallSite CS1, -                               ImmutableCallSite CS2) override; - -    /// pointsToConstantMemory - Chase pointers until we find a (constant -    /// global) or not. -    bool pointsToConstantMemory(const MemoryLocation &Loc, -                                bool OrLocal) override; - -    /// Get the location associated with a pointer argument of a callsite. -    ModRefResult getArgModRefInfo(ImmutableCallSite CS, -                                  unsigned ArgIdx) override; - -    /// getModRefBehavior - Return the behavior when calling the given -    /// call site. -    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; - -    /// getModRefBehavior - Return the behavior when calling the given function. -    /// For use when the call site is not known. -    ModRefBehavior getModRefBehavior(const Function *F) override; - -    /// getAdjustedAnalysisPointer - This method is used when a pass implements -    /// an analysis interface through multiple inheritance.  If needed, it -    /// should override this to adjust the this pointer as needed for the -    /// specified pass info. -    void *getAdjustedAnalysisPointer(const void *ID) override { -      if (ID == &AliasAnalysis::ID) -        return (AliasAnalysis*)this; -      return this; -    } - -  private: -    // AliasCache - Track alias queries to guard against recursion. -    typedef std::pair<MemoryLocation, MemoryLocation> LocPair; -    typedef SmallDenseMap<LocPair, AliasResult, 8> AliasCacheTy; -    AliasCacheTy AliasCache; - -    /// \brief Track phi nodes we have visited. When interpret "Value" pointer -    /// equality as value equality we need to make sure that the "Value" is not -    /// part of a cycle. Otherwise, two uses could come from different -    /// "iterations" of a cycle and see different values for the same "Value" -    /// pointer. -    /// The following example shows the problem: -    ///   %p = phi(%alloca1, %addr2) -    ///   %l = load %ptr -    ///   %addr1 = gep, %alloca2, 0, %l -    ///   %addr2 = gep  %alloca2, 0, (%l + 1) -    ///      alias(%p, %addr1) -> MayAlias ! -    ///   store %l, ... -    SmallPtrSet<const BasicBlock*, 8> VisitedPhiBBs; - -    // Visited - Track instructions visited by pointsToConstantMemory. -    SmallPtrSet<const Value*, 16> Visited; - -    /// \brief Check whether two Values can be considered equivalent. -    /// -    /// In addition to pointer equivalence of \p V1 and \p V2 this checks -    /// whether they can not be part of a cycle in the value graph by looking at -    /// all visited phi nodes an making sure that the phis cannot reach the -    /// value. We have to do this because we are looking through phi nodes (That -    /// is we say noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB). -    bool isValueEqualInPotentialCycles(const Value *V1, const Value *V2); - -    /// \brief Dest and Src are the variable indices from two decomposed -    /// GetElementPtr instructions GEP1 and GEP2 which have common base -    /// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic -    /// difference between the two pointers. -    void GetIndexDifference(SmallVectorImpl<VariableGEPIndex> &Dest, -                            const SmallVectorImpl<VariableGEPIndex> &Src); - -    // aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP -    // instruction against another. -    AliasResult aliasGEP(const GEPOperator *V1, uint64_t V1Size, -                         const AAMDNodes &V1AAInfo, -                         const Value *V2, uint64_t V2Size, -                         const AAMDNodes &V2AAInfo, -                         const Value *UnderlyingV1, const Value *UnderlyingV2); - -    // aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI -    // instruction against another. -    AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize, -                         const AAMDNodes &PNAAInfo, -                         const Value *V2, uint64_t V2Size, -                         const AAMDNodes &V2AAInfo); - -    /// aliasSelect - Disambiguate a Select instruction against another value. -    AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize, -                            const AAMDNodes &SIAAInfo, -                            const Value *V2, uint64_t V2Size, -                            const AAMDNodes &V2AAInfo); - -    AliasResult aliasCheck(const Value *V1, uint64_t V1Size, -                           AAMDNodes V1AATag, -                           const Value *V2, uint64_t V2Size, -                           AAMDNodes V2AATag); -  }; -}  // End of anonymous namespace - -// Register this pass... -char BasicAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS_BEGIN(BasicAliasAnalysis, AliasAnalysis, "basicaa", -                   "Basic Alias Analysis (stateless AA impl)", -                   false, true, false) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_AG_PASS_END(BasicAliasAnalysis, AliasAnalysis, "basicaa", -                   "Basic Alias Analysis (stateless AA impl)", -                   false, true, false) - - -ImmutablePass *llvm::createBasicAliasAnalysisPass() { -  return new BasicAliasAnalysis(); -} - -/// pointsToConstantMemory - Returns whether the given pointer value -/// points to memory that is local to the function, with global constants being -/// considered local to all functions. -bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, -                                                bool OrLocal) { +/// Returns whether the given pointer value points to memory that is local to +/// the function, with global constants being considered local to all +/// functions. +bool BasicAAResult::pointsToConstantMemory(const MemoryLocation &Loc, +                                           bool OrLocal) {    assert(Visited.empty() && "Visited must be cleared after use!");    unsigned MaxLookup = 8;    SmallVector<const Value *, 16> Worklist;    Worklist.push_back(Loc.Ptr);    do { -    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), *DL); +    const Value *V = GetUnderlyingObject(Worklist.pop_back_val(), DL);      if (!Visited.insert(V).second) {        Visited.clear(); -      return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +      return AAResultBase::pointsToConstantMemory(Loc, OrLocal);      }      // An alloca instruction defines local memory. @@ -608,7 +495,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,        // others.  GV may even be a declaration, not a definition.        if (!GV->isConstant()) {          Visited.clear(); -        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +        return AAResultBase::pointsToConstantMemory(Loc, OrLocal);        }        continue;      } @@ -626,7 +513,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,        // Don't bother inspecting phi nodes with many operands.        if (PN->getNumIncomingValues() > MaxLookup) {          Visited.clear(); -        return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +        return AAResultBase::pointsToConstantMemory(Loc, OrLocal);        }        for (Value *IncValue : PN->incoming_values())          Worklist.push_back(IncValue); @@ -635,7 +522,7 @@ bool BasicAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,      // Otherwise be conservative.      Visited.clear(); -    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);    } while (!Worklist.empty() && --MaxLookup); @@ -660,62 +547,51 @@ static bool isMemsetPattern16(const Function *MS,    return false;  } -/// getModRefBehavior - Return the behavior when calling the given call site. -AliasAnalysis::ModRefBehavior -BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { +/// Returns the behavior when calling the given call site. +FunctionModRefBehavior BasicAAResult::getModRefBehavior(ImmutableCallSite CS) {    if (CS.doesNotAccessMemory())      // Can't do better than this. -    return DoesNotAccessMemory; +    return FMRB_DoesNotAccessMemory; -  ModRefBehavior Min = UnknownModRefBehavior; +  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;    // If the callsite knows it only reads memory, don't return worse    // than that.    if (CS.onlyReadsMemory()) -    Min = OnlyReadsMemory; +    Min = FMRB_OnlyReadsMemory;    if (CS.onlyAccessesArgMemory()) -    Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); +    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); -  // The AliasAnalysis base class has some smarts, lets use them. -  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); +  // The AAResultBase base class has some smarts, lets use them. +  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);  } -/// getModRefBehavior - Return the behavior when calling the given function. -/// For use when the call site is not known. -AliasAnalysis::ModRefBehavior -BasicAliasAnalysis::getModRefBehavior(const Function *F) { +/// Returns the behavior when calling the given function. For use when the call +/// site is not known. +FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {    // If the function declares it doesn't access memory, we can't do better.    if (F->doesNotAccessMemory()) -    return DoesNotAccessMemory; - -  // For intrinsics, we can check the table. -  if (Intrinsic::ID iid = F->getIntrinsicID()) { -#define GET_INTRINSIC_MODREF_BEHAVIOR -#include "llvm/IR/Intrinsics.gen" -#undef GET_INTRINSIC_MODREF_BEHAVIOR -  } +    return FMRB_DoesNotAccessMemory; -  ModRefBehavior Min = UnknownModRefBehavior; +  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;    // If the function declares it only reads memory, go with that.    if (F->onlyReadsMemory()) -    Min = OnlyReadsMemory; +    Min = FMRB_OnlyReadsMemory;    if (F->onlyAccessesArgMemory()) -    Min = ModRefBehavior(Min & OnlyAccessesArgumentPointees); +    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees); -  const TargetLibraryInfo &TLI = -      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();    if (isMemsetPattern16(F, TLI)) -    Min = OnlyAccessesArgumentPointees; +    Min = FMRB_OnlyAccessesArgumentPointees;    // Otherwise be conservative. -  return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); +  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);  } -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) { +ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS, +                                           unsigned ArgIdx) {    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))      switch (II->getIntrinsicID()) {      default: @@ -725,7 +601,7 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {      case Intrinsic::memmove:        assert((ArgIdx == 0 || ArgIdx == 1) &&               "Invalid argument index for memory intrinsic"); -      return ArgIdx ? Ref : Mod; +      return ArgIdx ? MRI_Ref : MRI_Mod;      }    // We can bound the aliasing properties of memset_pattern16 just as we can @@ -733,40 +609,82 @@ BasicAliasAnalysis::getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx) {    // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16    // whenever possible.    if (CS.getCalledFunction() && -      isMemsetPattern16(CS.getCalledFunction(), *TLI)) { +      isMemsetPattern16(CS.getCalledFunction(), TLI)) {      assert((ArgIdx == 0 || ArgIdx == 1) &&             "Invalid argument index for memset_pattern16"); -    return ArgIdx ? Ref : Mod; +    return ArgIdx ? MRI_Ref : MRI_Mod;    }    // FIXME: Handle memset_pattern4 and memset_pattern8 also. -  return AliasAnalysis::getArgModRefInfo(CS, ArgIdx); +  if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly)) +    return MRI_Ref; + +  if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadNone)) +    return MRI_NoModRef; + +  return AAResultBase::getArgModRefInfo(CS, ArgIdx);  }  static bool isAssumeIntrinsic(ImmutableCallSite CS) {    const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); -  if (II && II->getIntrinsicID() == Intrinsic::assume) -    return true; +  return II && II->getIntrinsicID() == Intrinsic::assume; +} -  return false; +#ifndef NDEBUG +static const Function *getParent(const Value *V) { +  if (const Instruction *inst = dyn_cast<Instruction>(V)) +    return inst->getParent()->getParent(); + +  if (const Argument *arg = dyn_cast<Argument>(V)) +    return arg->getParent(); + +  return nullptr;  } -bool BasicAliasAnalysis::doInitialization(Module &M) { -  InitializeAliasAnalysis(this, &M.getDataLayout()); -  return true; +static bool notDifferentParent(const Value *O1, const Value *O2) { + +  const Function *F1 = getParent(O1); +  const Function *F2 = getParent(O2); + +  return !F1 || !F2 || F1 == F2; +} +#endif + +AliasResult BasicAAResult::alias(const MemoryLocation &LocA, +                                 const MemoryLocation &LocB) { +  assert(notDifferentParent(LocA.Ptr, LocB.Ptr) && +         "BasicAliasAnalysis doesn't support interprocedural queries."); + +  // If we have a directly cached entry for these locations, we have recursed +  // through this once, so just return the cached results. Notably, when this +  // happens, we don't clear the cache. +  auto CacheIt = AliasCache.find(LocPair(LocA, LocB)); +  if (CacheIt != AliasCache.end()) +    return CacheIt->second; + +  AliasResult Alias = aliasCheck(LocA.Ptr, LocA.Size, LocA.AATags, LocB.Ptr, +                                 LocB.Size, LocB.AATags); +  // AliasCache rarely has more than 1 or 2 elements, always use +  // shrink_and_clear so it quickly returns to the inline capacity of the +  // SmallDenseMap if it ever grows larger. +  // FIXME: This should really be shrink_to_inline_capacity_and_clear(). +  AliasCache.shrink_and_clear(); +  VisitedPhiBBs.clear(); +  return Alias;  } -/// getModRefInfo - Check to see if the specified callsite can clobber the -/// specified memory object.  Since we only look at local properties of this -/// function, we really can't say much about this query.  We do, however, use -/// simple "address taken" analysis on local objects. -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, -                                  const MemoryLocation &Loc) { +/// Checks to see if the specified callsite can clobber the specified memory +/// object. +/// +/// Since we only look at local properties of this function, we really can't +/// say much about this query.  We do, however, use simple "address taken" +/// analysis on local objects. +ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS, +                                        const MemoryLocation &Loc) {    assert(notDifferentParent(CS.getInstruction(), Loc.Ptr) &&           "AliasAnalysis query involving multiple functions!"); -  const Value *Object = GetUnderlyingObject(Loc.Ptr, *DL); +  const Value *Object = GetUnderlyingObject(Loc.Ptr, DL);    // If this is a tail call and Loc.Ptr points to a stack location, we know that    // the tail call cannot access or modify the local stack. @@ -776,7 +694,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,    if (isa<AllocaInst>(Object))      if (const CallInst *CI = dyn_cast<CallInst>(CS.getInstruction()))        if (CI->isTailCall()) -        return NoModRef; +        return MRI_NoModRef;    // If the pointer is to a locally allocated object that does not escape,    // then the call can not mod/ref the pointer unless the call takes the pointer @@ -798,41 +716,42 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS,        // is impossible to alias the pointer we're checking.  If not, we have to        // assume that the call could touch the pointer, even though it doesn't        // escape. -      if (!isNoAlias(MemoryLocation(*CI), MemoryLocation(Object))) { +      AliasResult AR = +          getBestAAResults().alias(MemoryLocation(*CI), MemoryLocation(Object)); +      if (AR) {          PassedAsArg = true;          break;        }      }      if (!PassedAsArg) -      return NoModRef; +      return MRI_NoModRef;    }    // While the assume intrinsic is marked as arbitrarily writing so that    // proper control dependencies will be maintained, it never aliases any    // particular memory location.    if (isAssumeIntrinsic(CS)) -    return NoModRef; +    return MRI_NoModRef; -  // The AliasAnalysis base class has some smarts, lets use them. -  return AliasAnalysis::getModRefInfo(CS, Loc); +  // The AAResultBase base class has some smarts, lets use them. +  return AAResultBase::getModRefInfo(CS, Loc);  } -AliasAnalysis::ModRefResult -BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, -                                  ImmutableCallSite CS2) { +ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS1, +                                        ImmutableCallSite CS2) {    // While the assume intrinsic is marked as arbitrarily writing so that    // proper control dependencies will be maintained, it never aliases any    // particular memory location.    if (isAssumeIntrinsic(CS1) || isAssumeIntrinsic(CS2)) -    return NoModRef; +    return MRI_NoModRef; -  // The AliasAnalysis base class has some smarts, lets use them. -  return AliasAnalysis::getModRefInfo(CS1, CS2); +  // The AAResultBase base class has some smarts, lets use them. +  return AAResultBase::getModRefInfo(CS1, CS2);  } -/// \brief Provide ad-hoc rules to disambiguate accesses through two GEP -/// operators, both having the exact same pointer operand. +/// Provide ad-hoc rules to disambiguate accesses through two GEP operators, +/// both having the exact same pointer operand.  static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,                                              uint64_t V1Size,                                              const GEPOperator *GEP2, @@ -860,10 +779,9 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,    ConstantInt *C2 =        dyn_cast<ConstantInt>(GEP2->getOperand(GEP2->getNumOperands() - 1)); -  // If the last (struct) indices aren't constants, we can't say anything. -  // If they're identical, the other indices might be also be dynamically -  // equal, so the GEPs can alias. -  if (!C1 || !C2 || C1 == C2) +  // If the last (struct) indices are constants and are equal, the other indices +  // might be also be dynamically equal, so the GEPs can alias. +  if (C1 && C2 && C1 == C2)      return MayAlias;    // Find the last-indexed type of the GEP, i.e., the type you'd get if @@ -886,12 +804,49 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,      IntermediateIndices.push_back(GEP1->getOperand(i + 1));    } -  StructType *LastIndexedStruct = -      dyn_cast<StructType>(GetElementPtrInst::getIndexedType( -          GEP1->getSourceElementType(), IntermediateIndices)); +  auto *Ty = GetElementPtrInst::getIndexedType( +    GEP1->getSourceElementType(), IntermediateIndices); +  StructType *LastIndexedStruct = dyn_cast<StructType>(Ty); + +  if (isa<SequentialType>(Ty)) { +    // We know that: +    // - both GEPs begin indexing from the exact same pointer; +    // - the last indices in both GEPs are constants, indexing into a sequential +    //   type (array or pointer); +    // - both GEPs only index through arrays prior to that. +    // +    // Because array indices greater than the number of elements are valid in +    // GEPs, unless we know the intermediate indices are identical between +    // GEP1 and GEP2 we cannot guarantee that the last indexed arrays don't +    // partially overlap. We also need to check that the loaded size matches +    // the element size, otherwise we could still have overlap. +    const uint64_t ElementSize = +        DL.getTypeStoreSize(cast<SequentialType>(Ty)->getElementType()); +    if (V1Size != ElementSize || V2Size != ElementSize) +      return MayAlias; + +    for (unsigned i = 0, e = GEP1->getNumIndices() - 1; i != e; ++i) +      if (GEP1->getOperand(i + 1) != GEP2->getOperand(i + 1)) +        return MayAlias; -  if (!LastIndexedStruct) +    // Now we know that the array/pointer that GEP1 indexes into and that +    // that GEP2 indexes into must either precisely overlap or be disjoint. +    // Because they cannot partially overlap and because fields in an array +    // cannot overlap, if we can prove the final indices are different between +    // GEP1 and GEP2, we can conclude GEP1 and GEP2 don't alias. +     +    // If the last indices are constants, we've already checked they don't +    // equal each other so we can exit early. +    if (C1 && C2) +      return NoAlias; +    if (isKnownNonEqual(GEP1->getOperand(GEP1->getNumOperands() - 1), +                        GEP2->getOperand(GEP2->getNumOperands() - 1), +                        DL)) +      return NoAlias; +    return MayAlias; +  } else if (!LastIndexedStruct || !C1 || !C2) {      return MayAlias; +  }    // We know that:    // - both GEPs begin indexing from the exact same pointer; @@ -925,39 +880,21 @@ static AliasResult aliasSameBasePointerGEPs(const GEPOperator *GEP1,    return MayAlias;  } -/// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction -/// against another pointer.  We know that V1 is a GEP, but we don't know -/// anything about V2.  UnderlyingV1 is GetUnderlyingObject(GEP1, DL), -/// UnderlyingV2 is the same for V2. +/// Provides a bunch of ad-hoc rules to disambiguate a GEP instruction against +/// another pointer.  /// -AliasResult BasicAliasAnalysis::aliasGEP( -    const GEPOperator *GEP1, uint64_t V1Size, const AAMDNodes &V1AAInfo, -    const Value *V2, uint64_t V2Size, const AAMDNodes &V2AAInfo, -    const Value *UnderlyingV1, const Value *UnderlyingV2) { +/// We know that V1 is a GEP, but we don't know anything about V2. +/// UnderlyingV1 is GetUnderlyingObject(GEP1, DL), UnderlyingV2 is the same for +/// V2. +AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, +                                    const AAMDNodes &V1AAInfo, const Value *V2, +                                    uint64_t V2Size, const AAMDNodes &V2AAInfo, +                                    const Value *UnderlyingV1, +                                    const Value *UnderlyingV2) {    int64_t GEP1BaseOffset;    bool GEP1MaxLookupReached;    SmallVector<VariableGEPIndex, 4> GEP1VariableIndices; -  // We have to get two AssumptionCaches here because GEP1 and V2 may be from -  // different functions. -  // FIXME: This really doesn't make any sense. We get a dominator tree below -  // that can only refer to a single function. But this function (aliasGEP) is -  // a method on an immutable pass that can be called when there *isn't* -  // a single function. The old pass management layer makes this "work", but -  // this isn't really a clean solution. -  AssumptionCacheTracker &ACT = getAnalysis<AssumptionCacheTracker>(); -  AssumptionCache *AC1 = nullptr, *AC2 = nullptr; -  if (auto *GEP1I = dyn_cast<Instruction>(GEP1)) -    AC1 = &ACT.getAssumptionCache( -        const_cast<Function &>(*GEP1I->getParent()->getParent())); -  if (auto *I2 = dyn_cast<Instruction>(V2)) -    AC2 = &ACT.getAssumptionCache( -        const_cast<Function &>(*I2->getParent()->getParent())); - -  DominatorTreeWrapperPass *DTWP = -      getAnalysisIfAvailable<DominatorTreeWrapperPass>(); -  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; -    // If we have two gep instructions with must-alias or not-alias'ing base    // pointers, figure out if the indexes to the GEP tell us anything about the    // derived pointer. @@ -971,9 +908,8 @@ AliasResult BasicAliasAnalysis::aliasGEP(      // identical.      if ((BaseAlias == MayAlias) && V1Size == V2Size) {        // Do the base pointers alias assuming type and size. -      AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, -                                                V1AAInfo, UnderlyingV2, -                                                V2Size, V2AAInfo); +      AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1AAInfo, +                                                UnderlyingV2, V2Size, V2AAInfo);        if (PreciseBaseAlias == NoAlias) {          // See if the computed offset from the common pointer tells us about the          // relation of the resulting pointer. @@ -982,15 +918,15 @@ AliasResult BasicAliasAnalysis::aliasGEP(          SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;          const Value *GEP2BasePtr =              DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, -                                   GEP2MaxLookupReached, *DL, AC2, DT); +                                   GEP2MaxLookupReached, DL, &AC, DT);          const Value *GEP1BasePtr =              DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, -                                   GEP1MaxLookupReached, *DL, AC1, DT); +                                   GEP1MaxLookupReached, DL, &AC, DT);          // DecomposeGEPExpression and GetUnderlyingObject should return the          // same result except when DecomposeGEPExpression has no DataLayout. +        // FIXME: They always have a DataLayout so this should become an +        // assert.          if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { -          assert(!DL && -                 "DecomposeGEPExpression and GetUnderlyingObject disagree!");            return MayAlias;          }          // If the max search depth is reached the result is undefined @@ -1007,35 +943,35 @@ AliasResult BasicAliasAnalysis::aliasGEP(      // If we get a No or May, then return it immediately, no amount of analysis      // will improve this situation. -    if (BaseAlias != MustAlias) return BaseAlias; +    if (BaseAlias != MustAlias) +      return BaseAlias;      // Otherwise, we have a MustAlias.  Since the base pointers alias each other      // exactly, see if the computed offset from the common pointer tells us      // about the relation of the resulting pointer.      const Value *GEP1BasePtr =          DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, -                               GEP1MaxLookupReached, *DL, AC1, DT); +                               GEP1MaxLookupReached, DL, &AC, DT);      int64_t GEP2BaseOffset;      bool GEP2MaxLookupReached;      SmallVector<VariableGEPIndex, 4> GEP2VariableIndices;      const Value *GEP2BasePtr =          DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, -                               GEP2MaxLookupReached, *DL, AC2, DT); +                               GEP2MaxLookupReached, DL, &AC, DT);      // DecomposeGEPExpression and GetUnderlyingObject should return the      // same result except when DecomposeGEPExpression has no DataLayout. +    // FIXME: They always have a DataLayout so this should become an assert.      if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { -      assert(!DL && -             "DecomposeGEPExpression and GetUnderlyingObject disagree!");        return MayAlias;      }      // If we know the two GEPs are based off of the exact same pointer (and not      // just the same underlying object), see if that tells us anything about      // the resulting pointers. -    if (DL && GEP1->getPointerOperand() == GEP2->getPointerOperand()) { -      AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, *DL); +    if (GEP1->getPointerOperand() == GEP2->getPointerOperand()) { +      AliasResult R = aliasSameBasePointerGEPs(GEP1, V1Size, GEP2, V2Size, DL);        // If we couldn't find anything interesting, don't abandon just yet.        if (R != MayAlias)          return R; @@ -1072,13 +1008,12 @@ AliasResult BasicAliasAnalysis::aliasGEP(      const Value *GEP1BasePtr =          DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, -                               GEP1MaxLookupReached, *DL, AC1, DT); +                               GEP1MaxLookupReached, DL, &AC, DT);      // DecomposeGEPExpression and GetUnderlyingObject should return the      // same result except when DecomposeGEPExpression has no DataLayout. +    // FIXME: They always have a DataLayout so this should become an assert.      if (GEP1BasePtr != UnderlyingV1) { -      assert(!DL && -             "DecomposeGEPExpression and GetUnderlyingObject disagree!");        return MayAlias;      }      // If the max search depth is reached the result is undefined @@ -1124,12 +1059,42 @@ AliasResult BasicAliasAnalysis::aliasGEP(      }    } -  // Try to distinguish something like &A[i][1] against &A[42][0]. -  // Grab the least significant bit set in any of the scales.    if (!GEP1VariableIndices.empty()) {      uint64_t Modulo = 0; -    for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) -      Modulo |= (uint64_t) GEP1VariableIndices[i].Scale; +    bool AllPositive = true; +    for (unsigned i = 0, e = GEP1VariableIndices.size(); i != e; ++i) { + +      // Try to distinguish something like &A[i][1] against &A[42][0]. +      // Grab the least significant bit set in any of the scales. We +      // don't need std::abs here (even if the scale's negative) as we'll +      // be ^'ing Modulo with itself later. +      Modulo |= (uint64_t)GEP1VariableIndices[i].Scale; + +      if (AllPositive) { +        // If the Value could change between cycles, then any reasoning about +        // the Value this cycle may not hold in the next cycle. We'll just +        // give up if we can't determine conditions that hold for every cycle: +        const Value *V = GEP1VariableIndices[i].V; + +        bool SignKnownZero, SignKnownOne; +        ComputeSignBit(const_cast<Value *>(V), SignKnownZero, SignKnownOne, DL, +                       0, &AC, nullptr, DT); + +        // Zero-extension widens the variable, and so forces the sign +        // bit to zero. +        bool IsZExt = GEP1VariableIndices[i].ZExtBits > 0 || isa<ZExtInst>(V); +        SignKnownZero |= IsZExt; +        SignKnownOne &= !IsZExt; + +        // If the variable begins with a zero then we know it's +        // positive, regardless of whether the value is signed or +        // unsigned. +        int64_t Scale = GEP1VariableIndices[i].Scale; +        AllPositive = +            (SignKnownZero && Scale >= 0) || (SignKnownOne && Scale < 0); +      } +    } +      Modulo = Modulo ^ (Modulo & (Modulo - 1));      // We can compute the difference between the two addresses @@ -1140,6 +1105,16 @@ AliasResult BasicAliasAnalysis::aliasGEP(          V2Size != MemoryLocation::UnknownSize && ModOffset >= V2Size &&          V1Size <= Modulo - ModOffset)        return NoAlias; + +    // If we know all the variables are positive, then GEP1 >= GEP1BasePtr. +    // If GEP1BasePtr > V2 (GEP1BaseOffset > 0) then we know the pointers +    // don't alias if V2Size can fit in the gap between V2 and GEP1BasePtr. +    if (AllPositive && GEP1BaseOffset > 0 && V2Size <= (uint64_t)GEP1BaseOffset) +      return NoAlias; + +    if (constantOffsetHeuristic(GEP1VariableIndices, V1Size, V2Size, +                                GEP1BaseOffset, &AC, DT)) +      return NoAlias;    }    // Statically, we can see that the base objects are the same, but the @@ -1164,46 +1139,44 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {    return MayAlias;  } -/// aliasSelect - Provide a bunch of ad-hoc rules to disambiguate a Select -/// instruction against another. -AliasResult BasicAliasAnalysis::aliasSelect(const SelectInst *SI, -                                            uint64_t SISize, -                                            const AAMDNodes &SIAAInfo, -                                            const Value *V2, uint64_t V2Size, -                                            const AAMDNodes &V2AAInfo) { +/// Provides a bunch of ad-hoc rules to disambiguate a Select instruction +/// against another. +AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize, +                                       const AAMDNodes &SIAAInfo, +                                       const Value *V2, uint64_t V2Size, +                                       const AAMDNodes &V2AAInfo) {    // If the values are Selects with the same condition, we can do a more precise    // check: just check for aliases between the values on corresponding arms.    if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))      if (SI->getCondition() == SI2->getCondition()) { -      AliasResult Alias = -        aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, -                   SI2->getTrueValue(), V2Size, V2AAInfo); +      AliasResult Alias = aliasCheck(SI->getTrueValue(), SISize, SIAAInfo, +                                     SI2->getTrueValue(), V2Size, V2AAInfo);        if (Alias == MayAlias)          return MayAlias;        AliasResult ThisAlias = -        aliasCheck(SI->getFalseValue(), SISize, SIAAInfo, -                   SI2->getFalseValue(), V2Size, V2AAInfo); +          aliasCheck(SI->getFalseValue(), SISize, SIAAInfo, +                     SI2->getFalseValue(), V2Size, V2AAInfo);        return MergeAliasResults(ThisAlias, Alias);      }    // If both arms of the Select node NoAlias or MustAlias V2, then returns    // NoAlias / MustAlias. Otherwise, returns MayAlias.    AliasResult Alias = -    aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo); +      aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);    if (Alias == MayAlias)      return MayAlias;    AliasResult ThisAlias = -    aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo); +      aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);    return MergeAliasResults(ThisAlias, Alias);  } -// aliasPHI - Provide a bunch of ad-hoc rules to disambiguate a PHI instruction -// against another. -AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, -                                         const AAMDNodes &PNAAInfo, -                                         const Value *V2, uint64_t V2Size, -                                         const AAMDNodes &V2AAInfo) { +/// Provide a bunch of ad-hoc rules to disambiguate a PHI instruction against +/// another. +AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize, +                                    const AAMDNodes &PNAAInfo, const Value *V2, +                                    uint64_t V2Size, +                                    const AAMDNodes &V2AAInfo) {    // Track phi nodes we have visited. We use this information when we determine    // value equivalence.    VisitedPhiBBs.insert(PN->getParent()); @@ -1232,9 +1205,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,        for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {          AliasResult ThisAlias = -          aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo, -                     PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), -                     V2Size, V2AAInfo); +            aliasCheck(PN->getIncomingValue(i), PNSize, PNAAInfo, +                       PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), +                       V2Size, V2AAInfo);          Alias = MergeAliasResults(ThisAlias, Alias);          if (Alias == MayAlias)            break; @@ -1247,8 +1220,9 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,        return Alias;      } -  SmallPtrSet<Value*, 4> UniqueSrc; -  SmallVector<Value*, 4> V1Srcs; +  SmallPtrSet<Value *, 4> UniqueSrc; +  SmallVector<Value *, 4> V1Srcs; +  bool isRecursive = false;    for (Value *PV1 : PN->incoming_values()) {      if (isa<PHINode>(PV1))        // If any of the source itself is a PHI, return MayAlias conservatively @@ -1256,12 +1230,33 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,        // sides are PHI nodes. In which case, this is O(m x n) time where 'm'        // and 'n' are the number of PHI sources.        return MayAlias; + +    if (EnableRecPhiAnalysis) +      if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) { +        // Check whether the incoming value is a GEP that advances the pointer +        // result of this PHI node (e.g. in a loop). If this is the case, we +        // would recurse and always get a MayAlias. Handle this case specially +        // below. +        if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 && +            isa<ConstantInt>(PV1GEP->idx_begin())) { +          isRecursive = true; +          continue; +        } +      } +      if (UniqueSrc.insert(PV1).second)        V1Srcs.push_back(PV1);    } -  AliasResult Alias = aliasCheck(V2, V2Size, V2AAInfo, -                                 V1Srcs[0], PNSize, PNAAInfo); +  // If this PHI node is recursive, set the size of the accessed memory to +  // unknown to represent all the possible values the GEP could advance the +  // pointer to. +  if (isRecursive) +    PNSize = MemoryLocation::UnknownSize; + +  AliasResult Alias = +      aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo); +    // Early exit if the check of the first PHI source against V2 is MayAlias.    // Other results are not possible.    if (Alias == MayAlias) @@ -1272,8 +1267,8 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,    for (unsigned i = 1, e = V1Srcs.size(); i != e; ++i) {      Value *V = V1Srcs[i]; -    AliasResult ThisAlias = aliasCheck(V2, V2Size, V2AAInfo, -                                       V, PNSize, PNAAInfo); +    AliasResult ThisAlias = +        aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo);      Alias = MergeAliasResults(ThisAlias, Alias);      if (Alias == MayAlias)        break; @@ -1282,13 +1277,11 @@ AliasResult BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize,    return Alias;  } -// aliasCheck - Provide a bunch of ad-hoc rules to disambiguate in common cases, -// such as array references. -// -AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, -                                           AAMDNodes V1AAInfo, const Value *V2, -                                           uint64_t V2Size, -                                           AAMDNodes V2AAInfo) { +/// Provides a bunch of ad-hoc rules to disambiguate in common cases, such as +/// array references. +AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size, +                                      AAMDNodes V1AAInfo, const Value *V2, +                                      uint64_t V2Size, AAMDNodes V2AAInfo) {    // If either of the memory references is empty, it doesn't matter what the    // pointer values are.    if (V1Size == 0 || V2Size == 0) @@ -1313,11 +1306,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,      return MustAlias;    if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy()) -    return NoAlias;  // Scalars cannot alias each other +    return NoAlias; // Scalars cannot alias each other    // Figure out what objects these things are pointing to if we can. -  const Value *O1 = GetUnderlyingObject(V1, *DL, MaxLookupSearchDepth); -  const Value *O2 = GetUnderlyingObject(V2, *DL, MaxLookupSearchDepth); +  const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth); +  const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);    // Null values in the default address space don't point to any object, so they    // don't alias any other pointer. @@ -1366,12 +1359,11 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,    // If the size of one access is larger than the entire object on the other    // side, then we know such behavior is undefined and can assume no alias. -  if (DL) -    if ((V1Size != MemoryLocation::UnknownSize && -         isObjectSmallerThan(O2, V1Size, *DL, *TLI)) || -        (V2Size != MemoryLocation::UnknownSize && -         isObjectSmallerThan(O1, V2Size, *DL, *TLI))) -      return NoAlias; +  if ((V1Size != MemoryLocation::UnknownSize && +       isObjectSmallerThan(O2, V1Size, DL, TLI)) || +      (V2Size != MemoryLocation::UnknownSize && +       isObjectSmallerThan(O1, V2Size, DL, TLI))) +    return NoAlias;    // Check the cache before climbing up use-def chains. This also terminates    // otherwise infinitely recursive queries. @@ -1380,7 +1372,7 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,    if (V1 > V2)      std::swap(Locs.first, Locs.second);    std::pair<AliasCacheTy::iterator, bool> Pair = -    AliasCache.insert(std::make_pair(Locs, MayAlias)); +      AliasCache.insert(std::make_pair(Locs, MayAlias));    if (!Pair.second)      return Pair.first->second; @@ -1393,8 +1385,10 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,      std::swap(V1AAInfo, V2AAInfo);    }    if (const GEPOperator *GV1 = dyn_cast<GEPOperator>(V1)) { -    AliasResult Result = aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2); -    if (Result != MayAlias) return AliasCache[Locs] = Result; +    AliasResult Result = +        aliasGEP(GV1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O1, O2); +    if (Result != MayAlias) +      return AliasCache[Locs] = Result;    }    if (isa<PHINode>(V2) && !isa<PHINode>(V1)) { @@ -1403,9 +1397,9 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,      std::swap(V1AAInfo, V2AAInfo);    }    if (const PHINode *PN = dyn_cast<PHINode>(V1)) { -    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, -                                  V2, V2Size, V2AAInfo); -    if (Result != MayAlias) return AliasCache[Locs] = Result; +    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo); +    if (Result != MayAlias) +      return AliasCache[Locs] = Result;    }    if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) { @@ -1414,29 +1408,38 @@ AliasResult BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size,      std::swap(V1AAInfo, V2AAInfo);    }    if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) { -    AliasResult Result = aliasSelect(S1, V1Size, V1AAInfo, -                                     V2, V2Size, V2AAInfo); -    if (Result != MayAlias) return AliasCache[Locs] = Result; +    AliasResult Result = +        aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo); +    if (Result != MayAlias) +      return AliasCache[Locs] = Result;    }    // If both pointers are pointing into the same object and one of them    // accesses is accessing the entire object, then the accesses must    // overlap in some way. -  if (DL && O1 == O2) +  if (O1 == O2)      if ((V1Size != MemoryLocation::UnknownSize && -         isObjectSize(O1, V1Size, *DL, *TLI)) || +         isObjectSize(O1, V1Size, DL, TLI)) ||          (V2Size != MemoryLocation::UnknownSize && -         isObjectSize(O2, V2Size, *DL, *TLI))) +         isObjectSize(O2, V2Size, DL, TLI)))        return AliasCache[Locs] = PartialAlias; -  AliasResult Result = -      AliasAnalysis::alias(MemoryLocation(V1, V1Size, V1AAInfo), -                           MemoryLocation(V2, V2Size, V2AAInfo)); +  // Recurse back into the best AA results we have, potentially with refined +  // memory locations. We have already ensured that BasicAA has a MayAlias +  // cache result for these, so any recursion back into BasicAA won't loop. +  AliasResult Result = getBestAAResults().alias(Locs.first, Locs.second);    return AliasCache[Locs] = Result;  } -bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V, -                                                       const Value *V2) { +/// Check whether two Values can be considered equivalent. +/// +/// In addition to pointer equivalence of \p V1 and \p V2 this checks whether +/// they can not be part of a cycle in the value graph by looking at all +/// visited phi nodes an making sure that the phis cannot reach the value. We +/// have to do this because we are looking through phi nodes (That is we say +/// noalias(V, phi(VA, VB)) if noalias(V, VA) and noalias(V, VB). +bool BasicAAResult::isValueEqualInPotentialCycles(const Value *V, +                                                  const Value *V2) {    if (V != V2)      return false; @@ -1450,28 +1453,21 @@ bool BasicAliasAnalysis::isValueEqualInPotentialCycles(const Value *V,    if (VisitedPhiBBs.size() > MaxNumPhiBBsValueReachabilityCheck)      return false; -  // Use dominance or loop info if available. -  DominatorTreeWrapperPass *DTWP = -      getAnalysisIfAvailable<DominatorTreeWrapperPass>(); -  DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr; -  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); -  LoopInfo *LI = LIWP ? &LIWP->getLoopInfo() : nullptr; -    // Make sure that the visited phis cannot reach the Value. This ensures that    // the Values cannot come from different iterations of a potential cycle the    // phi nodes could be involved in.    for (auto *P : VisitedPhiBBs) -    if (isPotentiallyReachable(P->begin(), Inst, DT, LI)) +    if (isPotentiallyReachable(&P->front(), Inst, DT, LI))        return false;    return true;  } -/// GetIndexDifference - Dest and Src are the variable indices from two -/// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base -/// pointers.  Subtract the GEP2 indices from GEP1 to find the symbolic -/// difference between the two pointers. -void BasicAliasAnalysis::GetIndexDifference( +/// Computes the symbolic difference between two de-composed GEPs. +/// +/// Dest and Src are the variable indices from two decomposed GetElementPtr +/// instructions GEP1 and GEP2 which have common base pointers. +void BasicAAResult::GetIndexDifference(      SmallVectorImpl<VariableGEPIndex> &Dest,      const SmallVectorImpl<VariableGEPIndex> &Src) {    if (Src.empty()) @@ -1479,14 +1475,14 @@ void BasicAliasAnalysis::GetIndexDifference(    for (unsigned i = 0, e = Src.size(); i != e; ++i) {      const Value *V = Src[i].V; -    ExtensionKind Extension = Src[i].Extension; +    unsigned ZExtBits = Src[i].ZExtBits, SExtBits = Src[i].SExtBits;      int64_t Scale = Src[i].Scale;      // Find V in Dest.  This is N^2, but pointer indices almost never have more      // than a few variable indexes.      for (unsigned j = 0, e = Dest.size(); j != e; ++j) {        if (!isValueEqualInPotentialCycles(Dest[j].V, V) || -          Dest[j].Extension != Extension) +          Dest[j].ZExtBits != ZExtBits || Dest[j].SExtBits != SExtBits)          continue;        // If we found it, subtract off Scale V's from the entry in Dest.  If it @@ -1501,8 +1497,120 @@ void BasicAliasAnalysis::GetIndexDifference(      // If we didn't consume this entry, add it to the end of the Dest list.      if (Scale) { -      VariableGEPIndex Entry = { V, Extension, -Scale }; +      VariableGEPIndex Entry = {V, ZExtBits, SExtBits, -Scale};        Dest.push_back(Entry);      }    }  } + +bool BasicAAResult::constantOffsetHeuristic( +    const SmallVectorImpl<VariableGEPIndex> &VarIndices, uint64_t V1Size, +    uint64_t V2Size, int64_t BaseOffset, AssumptionCache *AC, +    DominatorTree *DT) { +  if (VarIndices.size() != 2 || V1Size == MemoryLocation::UnknownSize || +      V2Size == MemoryLocation::UnknownSize) +    return false; + +  const VariableGEPIndex &Var0 = VarIndices[0], &Var1 = VarIndices[1]; + +  if (Var0.ZExtBits != Var1.ZExtBits || Var0.SExtBits != Var1.SExtBits || +      Var0.Scale != -Var1.Scale) +    return false; + +  unsigned Width = Var1.V->getType()->getIntegerBitWidth(); + +  // We'll strip off the Extensions of Var0 and Var1 and do another round +  // of GetLinearExpression decomposition. In the example above, if Var0 +  // is zext(%x + 1) we should get V1 == %x and V1Offset == 1. + +  APInt V0Scale(Width, 0), V0Offset(Width, 0), V1Scale(Width, 0), +      V1Offset(Width, 0); +  bool NSW = true, NUW = true; +  unsigned V0ZExtBits = 0, V0SExtBits = 0, V1ZExtBits = 0, V1SExtBits = 0; +  const Value *V0 = GetLinearExpression(Var0.V, V0Scale, V0Offset, V0ZExtBits, +                                        V0SExtBits, DL, 0, AC, DT, NSW, NUW); +  NSW = true, NUW = true; +  const Value *V1 = GetLinearExpression(Var1.V, V1Scale, V1Offset, V1ZExtBits, +                                        V1SExtBits, DL, 0, AC, DT, NSW, NUW); + +  if (V0Scale != V1Scale || V0ZExtBits != V1ZExtBits || +      V0SExtBits != V1SExtBits || !isValueEqualInPotentialCycles(V0, V1)) +    return false; + +  // We have a hit - Var0 and Var1 only differ by a constant offset! + +  // If we've been sext'ed then zext'd the maximum difference between Var0 and +  // Var1 is possible to calculate, but we're just interested in the absolute +  // minimum difference between the two. The minimum distance may occur due to +  // wrapping; consider "add i3 %i, 5": if %i == 7 then 7 + 5 mod 8 == 4, and so +  // the minimum distance between %i and %i + 5 is 3. +  APInt MinDiff = V0Offset - V1Offset, Wrapped = -MinDiff; +  MinDiff = APIntOps::umin(MinDiff, Wrapped); +  uint64_t MinDiffBytes = MinDiff.getZExtValue() * std::abs(Var0.Scale); + +  // We can't definitely say whether GEP1 is before or after V2 due to wrapping +  // arithmetic (i.e. for some values of GEP1 and V2 GEP1 < V2, and for other +  // values GEP1 > V2). We'll therefore only declare NoAlias if both V1Size and +  // V2Size can fit in the MinDiffBytes gap. +  return V1Size + std::abs(BaseOffset) <= MinDiffBytes && +         V2Size + std::abs(BaseOffset) <= MinDiffBytes; +} + +//===----------------------------------------------------------------------===// +// BasicAliasAnalysis Pass +//===----------------------------------------------------------------------===// + +char BasicAA::PassID; + +BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> *AM) { +  return BasicAAResult(F.getParent()->getDataLayout(), +                       AM->getResult<TargetLibraryAnalysis>(F), +                       AM->getResult<AssumptionAnalysis>(F), +                       AM->getCachedResult<DominatorTreeAnalysis>(F), +                       AM->getCachedResult<LoopAnalysis>(F)); +} + +BasicAAWrapperPass::BasicAAWrapperPass() : FunctionPass(ID) { +    initializeBasicAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +char BasicAAWrapperPass::ID = 0; +void BasicAAWrapperPass::anchor() {} + +INITIALIZE_PASS_BEGIN(BasicAAWrapperPass, "basicaa", +                      "Basic Alias Analysis (stateless AA impl)", true, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(BasicAAWrapperPass, "basicaa", +                    "Basic Alias Analysis (stateless AA impl)", true, true) + +FunctionPass *llvm::createBasicAAWrapperPass() { +  return new BasicAAWrapperPass(); +} + +bool BasicAAWrapperPass::runOnFunction(Function &F) { +  auto &ACT = getAnalysis<AssumptionCacheTracker>(); +  auto &TLIWP = getAnalysis<TargetLibraryInfoWrapperPass>(); +  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); +  auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>(); + +  Result.reset(new BasicAAResult(F.getParent()->getDataLayout(), TLIWP.getTLI(), +                                 ACT.getAssumptionCache(F), +                                 DTWP ? &DTWP->getDomTree() : nullptr, +                                 LIWP ? &LIWP->getLoopInfo() : nullptr)); + +  return false; +} + +void BasicAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequired<AssumptionCacheTracker>(); +  AU.addRequired<TargetLibraryInfoWrapperPass>(); +} + +BasicAAResult llvm::createLegacyPMBasicAAResult(Pass &P, Function &F) { +  return BasicAAResult( +      F.getParent()->getDataLayout(), +      P.getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), +      P.getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F)); +} diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 3d819eb596d4..90b7a339a0fe 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -55,7 +55,7 @@ struct GraphTraits<BlockFrequencyInfo *> {    typedef Function::const_iterator nodes_iterator;    static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) { -    return G->getFunction()->begin(); +    return &G->getFunction()->front();    }    static ChildIteratorType child_begin(const NodeType *N) {      return succ_begin(N); @@ -105,51 +105,36 @@ struct DOTGraphTraits<BlockFrequencyInfo*> : public DefaultDOTGraphTraits {  } // end namespace llvm  #endif -INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", -                      "Block Frequency Analysis", true, true) -INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", -                    "Block Frequency Analysis", true, true) - -char BlockFrequencyInfo::ID = 0; - +BlockFrequencyInfo::BlockFrequencyInfo() {} -BlockFrequencyInfo::BlockFrequencyInfo() : FunctionPass(ID) { -  initializeBlockFrequencyInfoPass(*PassRegistry::getPassRegistry()); -} - -BlockFrequencyInfo::~BlockFrequencyInfo() {} - -void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.addRequired<BranchProbabilityInfo>(); -  AU.addRequired<LoopInfoWrapperPass>(); -  AU.setPreservesAll(); +BlockFrequencyInfo::BlockFrequencyInfo(const Function &F, +                                       const BranchProbabilityInfo &BPI, +                                       const LoopInfo &LI) { +  calculate(F, BPI, LI);  } -bool BlockFrequencyInfo::runOnFunction(Function &F) { -  BranchProbabilityInfo &BPI = getAnalysis<BranchProbabilityInfo>(); -  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); +void BlockFrequencyInfo::calculate(const Function &F, +                                   const BranchProbabilityInfo &BPI, +                                   const LoopInfo &LI) {    if (!BFI)      BFI.reset(new ImplType); -  BFI->doFunction(&F, &BPI, &LI); +  BFI->calculate(F, BPI, LI);  #ifndef NDEBUG    if (ViewBlockFreqPropagationDAG != GVDT_None)      view();  #endif -  return false; -} - -void BlockFrequencyInfo::releaseMemory() { BFI.reset(); } - -void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { -  if (BFI) BFI->print(O);  }  BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const {    return BFI ? BFI->getBlockFreq(BB) : 0;  } +void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, +                                      uint64_t Freq) { +  assert(BFI && "Expected analysis to be available"); +  BFI->setBlockFreq(BB, Freq); +} +  /// Pop up a ghostview window with the current block frequency propagation  /// rendered using dot.  void BlockFrequencyInfo::view() const { @@ -180,3 +165,49 @@ BlockFrequencyInfo::printBlockFreq(raw_ostream &OS,  uint64_t BlockFrequencyInfo::getEntryFreq() const {    return BFI ? BFI->getEntryFreq() : 0;  } + +void BlockFrequencyInfo::releaseMemory() { BFI.reset(); } + +void BlockFrequencyInfo::print(raw_ostream &OS) const { +  if (BFI) +    BFI->print(OS); +} + + +INITIALIZE_PASS_BEGIN(BlockFrequencyInfoWrapperPass, "block-freq", +                      "Block Frequency Analysis", true, true) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(BlockFrequencyInfoWrapperPass, "block-freq", +                    "Block Frequency Analysis", true, true) + +char BlockFrequencyInfoWrapperPass::ID = 0; + + +BlockFrequencyInfoWrapperPass::BlockFrequencyInfoWrapperPass() +    : FunctionPass(ID) { +  initializeBlockFrequencyInfoWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +BlockFrequencyInfoWrapperPass::~BlockFrequencyInfoWrapperPass() {} + +void BlockFrequencyInfoWrapperPass::print(raw_ostream &OS, +                                          const Module *) const { +  BFI.print(OS); +} + +void BlockFrequencyInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.addRequired<BranchProbabilityInfoWrapperPass>(); +  AU.addRequired<LoopInfoWrapperPass>(); +  AU.setPreservesAll(); +} + +void BlockFrequencyInfoWrapperPass::releaseMemory() { BFI.releaseMemory(); } + +bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) { +  BranchProbabilityInfo &BPI = +      getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI(); +  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); +  BFI.calculate(F, BPI, LI); +  return false; +} diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp index 6ceda06aac14..48e23af2690a 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -530,6 +530,13 @@ BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const {    return Freqs[Node.Index].Scaled;  } +void BlockFrequencyInfoImplBase::setBlockFreq(const BlockNode &Node, +                                              uint64_t Freq) { +  assert(Node.isValid() && "Expected valid node"); +  assert(Node.Index < Freqs.size() && "Expected legal index"); +  Freqs[Node.Index].Integer = Freq; +} +  std::string  BlockFrequencyInfoImplBase::getBlockName(const BlockNode &Node) const {    return std::string(); @@ -743,7 +750,10 @@ void BlockFrequencyInfoImplBase::adjustLoopHeaderMass(LoopData &Loop) {      auto &BackedgeMass = Loop.BackedgeMass[Loop.getHeaderIndex(HeaderNode)];      DEBUG(dbgs() << " - Add back edge mass for node "                   << getBlockName(HeaderNode) << ": " << BackedgeMass << "\n"); -    Dist.addLocal(HeaderNode, BackedgeMass.getMass()); +    if (BackedgeMass.getMass() > 0) +      Dist.addLocal(HeaderNode, BackedgeMass.getMass()); +    else +      DEBUG(dbgs() << "   Nothing added. Back edge mass is zero\n");    }    DitheringDistributer D(Dist, LoopMass); diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 430b41241edf..cf0cc8da6ef8 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -27,13 +27,13 @@ using namespace llvm;  #define DEBUG_TYPE "branch-prob" -INITIALIZE_PASS_BEGIN(BranchProbabilityInfo, "branch-prob", +INITIALIZE_PASS_BEGIN(BranchProbabilityInfoWrapperPass, "branch-prob",                        "Branch Probability Analysis", false, true)  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", +INITIALIZE_PASS_END(BranchProbabilityInfoWrapperPass, "branch-prob",                      "Branch Probability Analysis", false, true) -char BranchProbabilityInfo::ID = 0; +char BranchProbabilityInfoWrapperPass::ID = 0;  // Weights are for internal use only. They are used by heuristics to help to  // estimate edges' probability. Example: @@ -108,13 +108,6 @@ static const uint32_t IH_TAKEN_WEIGHT = 1024 * 1024 - 1;  /// instruction. This is essentially never taken.  static const uint32_t IH_NONTAKEN_WEIGHT = 1; -// Standard weight value. Used when none of the heuristics set weight for -// the edge. -static const uint32_t NORMAL_WEIGHT = 16; - -// Minimum weight of an edge. Please note, that weight is NEVER 0. -static const uint32_t MIN_WEIGHT = 1; -  /// \brief Calculate edge weights for successors lead to unreachable.  ///  /// Predict that a successor which leads necessarily to an @@ -147,22 +140,34 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) {    if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty())      return false; -  uint32_t UnreachableWeight = -    std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT); -  for (SmallVectorImpl<unsigned>::iterator I = UnreachableEdges.begin(), -                                           E = UnreachableEdges.end(); -       I != E; ++I) -    setEdgeWeight(BB, *I, UnreachableWeight); +  // If the terminator is an InvokeInst, check only the normal destination block +  // as the unwind edge of InvokeInst is also very unlikely taken. +  if (auto *II = dyn_cast<InvokeInst>(TI)) +    if (PostDominatedByUnreachable.count(II->getNormalDest())) { +      PostDominatedByUnreachable.insert(BB); +      // Return false here so that edge weights for InvokeInst could be decided +      // in calcInvokeHeuristics(). +      return false; +    } -  if (ReachableEdges.empty()) +  if (ReachableEdges.empty()) { +    BranchProbability Prob(1, UnreachableEdges.size()); +    for (unsigned SuccIdx : UnreachableEdges) +      setEdgeProbability(BB, SuccIdx, Prob);      return true; -  uint32_t ReachableWeight = -    std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(), -             NORMAL_WEIGHT); -  for (SmallVectorImpl<unsigned>::iterator I = ReachableEdges.begin(), -                                           E = ReachableEdges.end(); -       I != E; ++I) -    setEdgeWeight(BB, *I, ReachableWeight); +  } + +  BranchProbability UnreachableProb(UR_TAKEN_WEIGHT, +                                    (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * +                                        UnreachableEdges.size()); +  BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT, +                                  (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * +                                      ReachableEdges.size()); + +  for (unsigned SuccIdx : UnreachableEdges) +    setEdgeProbability(BB, SuccIdx, UnreachableProb); +  for (unsigned SuccIdx : ReachableEdges) +    setEdgeProbability(BB, SuccIdx, ReachableProb);    return true;  } @@ -213,10 +218,18 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) {    WeightSum = 0;    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) { -    uint32_t W = Weights[i] / ScalingFactor; -    WeightSum += W; -    setEdgeWeight(BB, i, W); +    Weights[i] /= ScalingFactor; +    WeightSum += Weights[i];    } + +  if (WeightSum == 0) { +    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) +      setEdgeProbability(BB, i, {1, e}); +  } else { +    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) +      setEdgeProbability(BB, i, {Weights[i], static_cast<uint32_t>(WeightSum)}); +  } +    assert(WeightSum <= UINT32_MAX &&           "Expected weights to scale down to 32 bits"); @@ -265,21 +278,24 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) {    if (TI->getNumSuccessors() == 1 || ColdEdges.empty())      return false; -  uint32_t ColdWeight = -      std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT); -  for (SmallVectorImpl<unsigned>::iterator I = ColdEdges.begin(), -                                           E = ColdEdges.end(); -       I != E; ++I) -    setEdgeWeight(BB, *I, ColdWeight); - -  if (NormalEdges.empty()) +  if (NormalEdges.empty()) { +    BranchProbability Prob(1, ColdEdges.size()); +    for (unsigned SuccIdx : ColdEdges) +      setEdgeProbability(BB, SuccIdx, Prob);      return true; -  uint32_t NormalWeight = std::max( -      CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT); -  for (SmallVectorImpl<unsigned>::iterator I = NormalEdges.begin(), -                                           E = NormalEdges.end(); -       I != E; ++I) -    setEdgeWeight(BB, *I, NormalWeight); +  } + +  BranchProbability ColdProb(CC_TAKEN_WEIGHT, +                             (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * +                                 ColdEdges.size()); +  BranchProbability NormalProb(CC_NONTAKEN_WEIGHT, +                               (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * +                                   NormalEdges.size()); + +  for (unsigned SuccIdx : ColdEdges) +    setEdgeProbability(BB, SuccIdx, ColdProb); +  for (unsigned SuccIdx : NormalEdges) +    setEdgeProbability(BB, SuccIdx, NormalProb);    return true;  } @@ -312,15 +328,18 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) {    if (!isProb)      std::swap(TakenIdx, NonTakenIdx); -  setEdgeWeight(BB, TakenIdx, PH_TAKEN_WEIGHT); -  setEdgeWeight(BB, NonTakenIdx, PH_NONTAKEN_WEIGHT); +  BranchProbability TakenProb(PH_TAKEN_WEIGHT, +                              PH_TAKEN_WEIGHT + PH_NONTAKEN_WEIGHT); +  setEdgeProbability(BB, TakenIdx, TakenProb); +  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());    return true;  }  // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges  // as taken, exiting edges as not-taken. -bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { -  Loop *L = LI->getLoopFor(BB); +bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB, +                                                     const LoopInfo &LI) { +  Loop *L = LI.getLoopFor(BB);    if (!L)      return false; @@ -340,37 +359,35 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) {    if (BackEdges.empty() && ExitingEdges.empty())      return false; -  if (uint32_t numBackEdges = BackEdges.size()) { -    uint32_t backWeight = LBH_TAKEN_WEIGHT / numBackEdges; -    if (backWeight < NORMAL_WEIGHT) -      backWeight = NORMAL_WEIGHT; +  // Collect the sum of probabilities of back-edges/in-edges/exiting-edges, and +  // normalize them so that they sum up to one. +  SmallVector<BranchProbability, 4> Probs(3, BranchProbability::getZero()); +  unsigned Denom = (BackEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + +                   (InEdges.empty() ? 0 : LBH_TAKEN_WEIGHT) + +                   (ExitingEdges.empty() ? 0 : LBH_NONTAKEN_WEIGHT); +  if (!BackEdges.empty()) +    Probs[0] = BranchProbability(LBH_TAKEN_WEIGHT, Denom); +  if (!InEdges.empty()) +    Probs[1] = BranchProbability(LBH_TAKEN_WEIGHT, Denom); +  if (!ExitingEdges.empty()) +    Probs[2] = BranchProbability(LBH_NONTAKEN_WEIGHT, Denom); -    for (SmallVectorImpl<unsigned>::iterator EI = BackEdges.begin(), -         EE = BackEdges.end(); EI != EE; ++EI) { -      setEdgeWeight(BB, *EI, backWeight); -    } +  if (uint32_t numBackEdges = BackEdges.size()) { +    auto Prob = Probs[0] / numBackEdges; +    for (unsigned SuccIdx : BackEdges) +      setEdgeProbability(BB, SuccIdx, Prob);    }    if (uint32_t numInEdges = InEdges.size()) { -    uint32_t inWeight = LBH_TAKEN_WEIGHT / numInEdges; -    if (inWeight < NORMAL_WEIGHT) -      inWeight = NORMAL_WEIGHT; - -    for (SmallVectorImpl<unsigned>::iterator EI = InEdges.begin(), -         EE = InEdges.end(); EI != EE; ++EI) { -      setEdgeWeight(BB, *EI, inWeight); -    } +    auto Prob = Probs[1] / numInEdges; +    for (unsigned SuccIdx : InEdges) +      setEdgeProbability(BB, SuccIdx, Prob);    }    if (uint32_t numExitingEdges = ExitingEdges.size()) { -    uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges; -    if (exitWeight < MIN_WEIGHT) -      exitWeight = MIN_WEIGHT; - -    for (SmallVectorImpl<unsigned>::iterator EI = ExitingEdges.begin(), -         EE = ExitingEdges.end(); EI != EE; ++EI) { -      setEdgeWeight(BB, *EI, exitWeight); -    } +    auto Prob = Probs[2] / numExitingEdges; +    for (unsigned SuccIdx : ExitingEdges) +      setEdgeProbability(BB, SuccIdx, Prob);    }    return true; @@ -452,9 +469,10 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) {    if (!isProb)      std::swap(TakenIdx, NonTakenIdx); -  setEdgeWeight(BB, TakenIdx, ZH_TAKEN_WEIGHT); -  setEdgeWeight(BB, NonTakenIdx, ZH_NONTAKEN_WEIGHT); - +  BranchProbability TakenProb(ZH_TAKEN_WEIGHT, +                              ZH_TAKEN_WEIGHT + ZH_NONTAKEN_WEIGHT); +  setEdgeProbability(BB, TakenIdx, TakenProb); +  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());    return true;  } @@ -488,9 +506,10 @@ bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) {    if (!isProb)      std::swap(TakenIdx, NonTakenIdx); -  setEdgeWeight(BB, TakenIdx, FPH_TAKEN_WEIGHT); -  setEdgeWeight(BB, NonTakenIdx, FPH_NONTAKEN_WEIGHT); - +  BranchProbability TakenProb(FPH_TAKEN_WEIGHT, +                              FPH_TAKEN_WEIGHT + FPH_NONTAKEN_WEIGHT); +  setEdgeProbability(BB, TakenIdx, TakenProb); +  setEdgeProbability(BB, NonTakenIdx, TakenProb.getCompl());    return true;  } @@ -499,82 +518,30 @@ bool BranchProbabilityInfo::calcInvokeHeuristics(BasicBlock *BB) {    if (!II)      return false; -  setEdgeWeight(BB, 0/*Index for Normal*/, IH_TAKEN_WEIGHT); -  setEdgeWeight(BB, 1/*Index for Unwind*/, IH_NONTAKEN_WEIGHT); +  BranchProbability TakenProb(IH_TAKEN_WEIGHT, +                              IH_TAKEN_WEIGHT + IH_NONTAKEN_WEIGHT); +  setEdgeProbability(BB, 0 /*Index for Normal*/, TakenProb); +  setEdgeProbability(BB, 1 /*Index for Unwind*/, TakenProb.getCompl());    return true;  } -void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.addRequired<LoopInfoWrapperPass>(); -  AU.setPreservesAll(); -} - -bool BranchProbabilityInfo::runOnFunction(Function &F) { -  DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() -               << " ----\n\n"); -  LastF = &F; // Store the last function we ran on for printing. -  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); -  assert(PostDominatedByUnreachable.empty()); -  assert(PostDominatedByColdCall.empty()); - -  // Walk the basic blocks in post-order so that we can build up state about -  // the successors of a block iteratively. -  for (auto BB : post_order(&F.getEntryBlock())) { -    DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); -    if (calcUnreachableHeuristics(BB)) -      continue; -    if (calcMetadataWeights(BB)) -      continue; -    if (calcColdCallHeuristics(BB)) -      continue; -    if (calcLoopBranchHeuristics(BB)) -      continue; -    if (calcPointerHeuristics(BB)) -      continue; -    if (calcZeroHeuristics(BB)) -      continue; -    if (calcFloatingPointHeuristics(BB)) -      continue; -    calcInvokeHeuristics(BB); -  } - -  PostDominatedByUnreachable.clear(); -  PostDominatedByColdCall.clear(); -  return false; -} -  void BranchProbabilityInfo::releaseMemory() { -  Weights.clear(); +  Probs.clear();  } -void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { +void BranchProbabilityInfo::print(raw_ostream &OS) const {    OS << "---- Branch Probabilities ----\n";    // We print the probabilities from the last function the analysis ran over,    // or the function it is currently running over.    assert(LastF && "Cannot print prior to running over a function"); -  for (Function::const_iterator BI = LastF->begin(), BE = LastF->end(); -       BI != BE; ++BI) { -    for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI); -         SI != SE; ++SI) { -      printEdgeProbability(OS << "  ", BI, *SI); +  for (const auto &BI : *LastF) { +    for (succ_const_iterator SI = succ_begin(&BI), SE = succ_end(&BI); SI != SE; +         ++SI) { +      printEdgeProbability(OS << "  ", &BI, *SI);      }    }  } -uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { -  uint32_t Sum = 0; - -  for (succ_const_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { -    uint32_t Weight = getEdgeWeight(BB, I.getSuccessorIndex()); -    uint32_t PrevSum = Sum; - -    Sum += Weight; -    assert(Sum >= PrevSum); (void) PrevSum; -  } - -  return Sum; -} -  bool BranchProbabilityInfo::  isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {    // Hot probability is at least 4/5 = 80% @@ -583,97 +550,74 @@ isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const {  }  BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { -  uint32_t Sum = 0; -  uint32_t MaxWeight = 0; +  auto MaxProb = BranchProbability::getZero();    BasicBlock *MaxSucc = nullptr;    for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {      BasicBlock *Succ = *I; -    uint32_t Weight = getEdgeWeight(BB, Succ); -    uint32_t PrevSum = Sum; - -    Sum += Weight; -    assert(Sum > PrevSum); (void) PrevSum; - -    if (Weight > MaxWeight) { -      MaxWeight = Weight; +    auto Prob = getEdgeProbability(BB, Succ); +    if (Prob > MaxProb) { +      MaxProb = Prob;        MaxSucc = Succ;      }    }    // Hot probability is at least 4/5 = 80% -  if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) +  if (MaxProb > BranchProbability(4, 5))      return MaxSucc;    return nullptr;  } -/// Get the raw edge weight for the edge. If can't find it, return -/// DEFAULT_WEIGHT value. Here an edge is specified using PredBlock and an index -/// to the successors. -uint32_t BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors) const { -  DenseMap<Edge, uint32_t>::const_iterator I = -      Weights.find(std::make_pair(Src, IndexInSuccessors)); +/// Get the raw edge probability for the edge. If can't find it, return a +/// default probability 1/N where N is the number of successors. Here an edge is +/// specified using PredBlock and an +/// index to the successors. +BranchProbability +BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, +                                          unsigned IndexInSuccessors) const { +  auto I = Probs.find(std::make_pair(Src, IndexInSuccessors)); -  if (I != Weights.end()) +  if (I != Probs.end())      return I->second; -  return DEFAULT_WEIGHT; +  return {1, +          static_cast<uint32_t>(std::distance(succ_begin(Src), succ_end(Src)))};  } -uint32_t BranchProbabilityInfo::getEdgeWeight(const BasicBlock *Src, -                                              succ_const_iterator Dst) const { -  return getEdgeWeight(Src, Dst.getSuccessorIndex()); +BranchProbability +BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, +                                          succ_const_iterator Dst) const { +  return getEdgeProbability(Src, Dst.getSuccessorIndex());  } -/// Get the raw edge weight calculated for the block pair. This returns the sum -/// of all raw edge weights from Src to Dst. -uint32_t BranchProbabilityInfo:: -getEdgeWeight(const BasicBlock *Src, const BasicBlock *Dst) const { -  uint32_t Weight = 0; -  bool FoundWeight = false; -  DenseMap<Edge, uint32_t>::const_iterator MapI; +/// Get the raw edge probability calculated for the block pair. This returns the +/// sum of all raw edge probabilities from Src to Dst. +BranchProbability +BranchProbabilityInfo::getEdgeProbability(const BasicBlock *Src, +                                          const BasicBlock *Dst) const { +  auto Prob = BranchProbability::getZero(); +  bool FoundProb = false;    for (succ_const_iterator I = succ_begin(Src), E = succ_end(Src); I != E; ++I)      if (*I == Dst) { -      MapI = Weights.find(std::make_pair(Src, I.getSuccessorIndex())); -      if (MapI != Weights.end()) { -        FoundWeight = true; -        Weight += MapI->second; +      auto MapI = Probs.find(std::make_pair(Src, I.getSuccessorIndex())); +      if (MapI != Probs.end()) { +        FoundProb = true; +        Prob += MapI->second;        }      } -  return (!FoundWeight) ? DEFAULT_WEIGHT : Weight; +  uint32_t succ_num = std::distance(succ_begin(Src), succ_end(Src)); +  return FoundProb ? Prob : BranchProbability(1, succ_num);  } -/// Set the edge weight for a given edge specified by PredBlock and an index -/// to the successors. -void BranchProbabilityInfo:: -setEdgeWeight(const BasicBlock *Src, unsigned IndexInSuccessors, -              uint32_t Weight) { -  Weights[std::make_pair(Src, IndexInSuccessors)] = Weight; -  DEBUG(dbgs() << "set edge " << Src->getName() << " -> " -               << IndexInSuccessors << " successor weight to " -               << Weight << "\n"); -} - -/// Get an edge's probability, relative to other out-edges from Src. -BranchProbability BranchProbabilityInfo:: -getEdgeProbability(const BasicBlock *Src, unsigned IndexInSuccessors) const { -  uint32_t N = getEdgeWeight(Src, IndexInSuccessors); -  uint32_t D = getSumForBlock(Src); - -  return BranchProbability(N, D); -} - -/// Get the probability of going from Src to Dst. It returns the sum of all -/// probabilities for edges from Src to Dst. -BranchProbability BranchProbabilityInfo:: -getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { - -  uint32_t N = getEdgeWeight(Src, Dst); -  uint32_t D = getSumForBlock(Src); - -  return BranchProbability(N, D); +/// Set the edge probability for a given edge specified by PredBlock and an +/// index to the successors. +void BranchProbabilityInfo::setEdgeProbability(const BasicBlock *Src, +                                               unsigned IndexInSuccessors, +                                               BranchProbability Prob) { +  Probs[std::make_pair(Src, IndexInSuccessors)] = Prob; +  DEBUG(dbgs() << "set edge " << Src->getName() << " -> " << IndexInSuccessors +               << " successor probability to " << Prob << "\n");  }  raw_ostream & @@ -688,3 +632,54 @@ BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS,    return OS;  } + +void BranchProbabilityInfo::calculate(Function &F, const LoopInfo& LI) { +  DEBUG(dbgs() << "---- Branch Probability Info : " << F.getName() +               << " ----\n\n"); +  LastF = &F; // Store the last function we ran on for printing. +  assert(PostDominatedByUnreachable.empty()); +  assert(PostDominatedByColdCall.empty()); + +  // Walk the basic blocks in post-order so that we can build up state about +  // the successors of a block iteratively. +  for (auto BB : post_order(&F.getEntryBlock())) { +    DEBUG(dbgs() << "Computing probabilities for " << BB->getName() << "\n"); +    if (calcUnreachableHeuristics(BB)) +      continue; +    if (calcMetadataWeights(BB)) +      continue; +    if (calcColdCallHeuristics(BB)) +      continue; +    if (calcLoopBranchHeuristics(BB, LI)) +      continue; +    if (calcPointerHeuristics(BB)) +      continue; +    if (calcZeroHeuristics(BB)) +      continue; +    if (calcFloatingPointHeuristics(BB)) +      continue; +    calcInvokeHeuristics(BB); +  } + +  PostDominatedByUnreachable.clear(); +  PostDominatedByColdCall.clear(); +} + +void BranchProbabilityInfoWrapperPass::getAnalysisUsage( +    AnalysisUsage &AU) const { +  AU.addRequired<LoopInfoWrapperPass>(); +  AU.setPreservesAll(); +} + +bool BranchProbabilityInfoWrapperPass::runOnFunction(Function &F) { +  const LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); +  BPI.calculate(F, LI); +  return false; +} + +void BranchProbabilityInfoWrapperPass::releaseMemory() { BPI.releaseMemory(); } + +void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS, +                                             const Module *) const { +  BPI.print(OS); +} diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp index e15109bd2702..0dfd57d3cb6b 100644 --- a/contrib/llvm/lib/Analysis/CFG.cpp +++ b/contrib/llvm/lib/Analysis/CFG.cpp @@ -69,8 +69,9 @@ void llvm::FindFunctionBackedges(const Function &F,  /// and return its position in the terminator instruction's list of  /// successors.  It is an error to call this with a block that is not a  /// successor. -unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { -  TerminatorInst *Term = BB->getTerminator(); +unsigned llvm::GetSuccessorNumber(const BasicBlock *BB, +    const BasicBlock *Succ) { +  const TerminatorInst *Term = BB->getTerminator();  #ifndef NDEBUG    unsigned e = Term->getNumSuccessors();  #endif @@ -203,7 +204,8 @@ bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B,        return true;      // Linear scan, start at 'A', see whether we hit 'B' or the end first. -    for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) { +    for (BasicBlock::const_iterator I = A->getIterator(), E = BB->end(); I != E; +         ++I) {        if (&*I == B)          return true;      } diff --git a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp index fe1c088886bc..4843ed6587a8 100644 --- a/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/CFLAliasAnalysis.cpp @@ -27,18 +27,17 @@  // time.  //===----------------------------------------------------------------------===// +#include "llvm/Analysis/CFLAliasAnalysis.h"  #include "StratifiedSets.h"  #include "llvm/ADT/BitVector.h"  #include "llvm/ADT/DenseMap.h"  #include "llvm/ADT/None.h"  #include "llvm/ADT/Optional.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/Function.h"  #include "llvm/IR/InstVisitor.h"  #include "llvm/IR/Instructions.h" -#include "llvm/IR/ValueHandle.h"  #include "llvm/Pass.h"  #include "llvm/Support/Allocator.h"  #include "llvm/Support/Compiler.h" @@ -47,7 +46,6 @@  #include "llvm/Support/raw_ostream.h"  #include <algorithm>  #include <cassert> -#include <forward_list>  #include <memory>  #include <tuple> @@ -55,6 +53,19 @@ using namespace llvm;  #define DEBUG_TYPE "cfl-aa" +CFLAAResult::CFLAAResult(const TargetLibraryInfo &TLI) : AAResultBase(TLI) {} +CFLAAResult::CFLAAResult(CFLAAResult &&Arg) : AAResultBase(std::move(Arg)) {} + +// \brief Information we have about a function and would like to keep around +struct CFLAAResult::FunctionInfo { +  StratifiedSets<Value *> Sets; +  // Lots of functions have < 4 returns. Adjust as necessary. +  SmallVector<Value *, 4> ReturnedValues; + +  FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV) +      : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} +}; +  // Try to go from a Value* to a Function*. Never returns nullptr.  static Optional<Function *> parentFunctionOfValue(Value *); @@ -141,129 +152,13 @@ struct Edge {        : From(From), To(To), Weight(W), AdditionalAttrs(A) {}  }; -// \brief Information we have about a function and would like to keep around -struct FunctionInfo { -  StratifiedSets<Value *> Sets; -  // Lots of functions have < 4 returns. Adjust as necessary. -  SmallVector<Value *, 4> ReturnedValues; - -  FunctionInfo(StratifiedSets<Value *> &&S, SmallVector<Value *, 4> &&RV) -      : Sets(std::move(S)), ReturnedValues(std::move(RV)) {} -}; - -struct CFLAliasAnalysis; - -struct FunctionHandle : public CallbackVH { -  FunctionHandle(Function *Fn, CFLAliasAnalysis *CFLAA) -      : CallbackVH(Fn), CFLAA(CFLAA) { -    assert(Fn != nullptr); -    assert(CFLAA != nullptr); -  } - -  ~FunctionHandle() override {} - -  void deleted() override { removeSelfFromCache(); } -  void allUsesReplacedWith(Value *) override { removeSelfFromCache(); } - -private: -  CFLAliasAnalysis *CFLAA; - -  void removeSelfFromCache(); -}; - -struct CFLAliasAnalysis : public ImmutablePass, public AliasAnalysis { -private: -  /// \brief Cached mapping of Functions to their StratifiedSets. -  /// If a function's sets are currently being built, it is marked -  /// in the cache as an Optional without a value. This way, if we -  /// have any kind of recursion, it is discernable from a function -  /// that simply has empty sets. -  DenseMap<Function *, Optional<FunctionInfo>> Cache; -  std::forward_list<FunctionHandle> Handles; - -public: -  static char ID; - -  CFLAliasAnalysis() : ImmutablePass(ID) { -    initializeCFLAliasAnalysisPass(*PassRegistry::getPassRegistry()); -  } - -  ~CFLAliasAnalysis() override {} - -  void getAnalysisUsage(AnalysisUsage &AU) const override { -    AliasAnalysis::getAnalysisUsage(AU); -  } - -  void *getAdjustedAnalysisPointer(const void *ID) override { -    if (ID == &AliasAnalysis::ID) -      return (AliasAnalysis *)this; -    return this; -  } - -  /// \brief Inserts the given Function into the cache. -  void scan(Function *Fn); - -  void evict(Function *Fn) { Cache.erase(Fn); } - -  /// \brief Ensures that the given function is available in the cache. -  /// Returns the appropriate entry from the cache. -  const Optional<FunctionInfo> &ensureCached(Function *Fn) { -    auto Iter = Cache.find(Fn); -    if (Iter == Cache.end()) { -      scan(Fn); -      Iter = Cache.find(Fn); -      assert(Iter != Cache.end()); -      assert(Iter->second.hasValue()); -    } -    return Iter->second; -  } - -  AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB); - -  AliasResult alias(const MemoryLocation &LocA, -                    const MemoryLocation &LocB) override { -    if (LocA.Ptr == LocB.Ptr) { -      if (LocA.Size == LocB.Size) { -        return MustAlias; -      } else { -        return PartialAlias; -      } -    } - -    // Comparisons between global variables and other constants should be -    // handled by BasicAA. -    // TODO: ConstantExpr handling -- CFLAA may report NoAlias when comparing -    // a GlobalValue and ConstantExpr, but every query needs to have at least -    // one Value tied to a Function, and neither GlobalValues nor ConstantExprs -    // are. -    if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr)) { -      return AliasAnalysis::alias(LocA, LocB); -    } - -    AliasResult QueryResult = query(LocA, LocB); -    if (QueryResult == MayAlias) -      return AliasAnalysis::alias(LocA, LocB); - -    return QueryResult; -  } - -  bool doInitialization(Module &M) override; -}; - -void FunctionHandle::removeSelfFromCache() { -  assert(CFLAA != nullptr); -  auto *Val = getValPtr(); -  CFLAA->evict(cast<Function>(Val)); -  setValPtr(nullptr); -} -  // \brief Gets the edges our graph should have, based on an Instruction*  class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> { -  CFLAliasAnalysis &AA; +  CFLAAResult &AA;    SmallVectorImpl<Edge> &Output;  public: -  GetEdgesVisitor(CFLAliasAnalysis &AA, SmallVectorImpl<Edge> &Output) +  GetEdgesVisitor(CFLAAResult &AA, SmallVectorImpl<Edge> &Output)        : AA(AA), Output(Output) {}    void visitInstruction(Instruction &) { @@ -480,6 +375,8 @@ public:    }    template <typename InstT> void visitCallLikeInst(InstT &Inst) { +    // TODO: Add support for noalias args/all the other fun function attributes +    // that we can tack on.      SmallVector<Function *, 4> Targets;      if (getPossibleTargets(&Inst, Targets)) {        if (tryInterproceduralAnalysis(Targets, &Inst, Inst.arg_operands())) @@ -488,8 +385,16 @@ public:        Output.clear();      } +    // Because the function is opaque, we need to note that anything +    // could have happened to the arguments, and that the result could alias +    // just about anything, too. +    // The goal of the loop is in part to unify many Values into one set, so we +    // don't care if the function is void there.      for (Value *V : Inst.arg_operands())        Output.push_back(Edge(&Inst, V, EdgeType::Assign, AttrAll)); +    if (Inst.getNumArgOperands() == 0 && +        Inst.getType() != Type::getVoidTy(Inst.getContext())) +      Output.push_back(Edge(&Inst, &Inst, EdgeType::Assign, AttrAll));    }    void visitCallInst(CallInst &Inst) { visitCallLikeInst(Inst); } @@ -624,7 +529,7 @@ public:    // ----- Various Edge iterators for the graph ----- //    // \brief Iterator for edges. Because this graph is bidirected, we don't -  // allow modificaiton of the edges using this iterator. Additionally, the +  // allow modification of the edges using this iterator. Additionally, the    // iterator becomes invalid if you add edges to or from the node you're    // getting the edges of.    struct EdgeIterator : public std::iterator<std::forward_iterator_tag, @@ -727,16 +632,6 @@ typedef WeightedBidirectionalGraph<std::pair<EdgeType, StratifiedAttrs>> GraphT;  typedef DenseMap<Value *, GraphT::Node> NodeMapT;  } -// -- Setting up/registering CFLAA pass -- // -char CFLAliasAnalysis::ID = 0; - -INITIALIZE_AG_PASS(CFLAliasAnalysis, AliasAnalysis, "cfl-aa", -                   "CFL-Based AA implementation", false, true, false) - -ImmutablePass *llvm::createCFLAliasAnalysisPass() { -  return new CFLAliasAnalysis(); -} -  //===----------------------------------------------------------------------===//  // Function declarations that require types defined in the namespace above  //===----------------------------------------------------------------------===// @@ -751,12 +646,10 @@ static Optional<StratifiedAttr> valueToAttrIndex(Value *Val);  static EdgeType flipWeight(EdgeType);  // Gets edges of the given Instruction*, writing them to the SmallVector*. -static void argsToEdges(CFLAliasAnalysis &, Instruction *, -                        SmallVectorImpl<Edge> &); +static void argsToEdges(CFLAAResult &, Instruction *, SmallVectorImpl<Edge> &);  // Gets edges of the given ConstantExpr*, writing them to the SmallVector*. -static void argsToEdges(CFLAliasAnalysis &, ConstantExpr *, -                        SmallVectorImpl<Edge> &); +static void argsToEdges(CFLAAResult &, ConstantExpr *, SmallVectorImpl<Edge> &);  // Gets the "Level" that one should travel in StratifiedSets  // given an EdgeType. @@ -764,13 +657,13 @@ static Level directionOfEdgeType(EdgeType);  // Builds the graph needed for constructing the StratifiedSets for the  // given function -static void buildGraphFrom(CFLAliasAnalysis &, Function *, +static void buildGraphFrom(CFLAAResult &, Function *,                             SmallVectorImpl<Value *> &, NodeMapT &, GraphT &);  // Gets the edges of a ConstantExpr as if it was an Instruction. This  // function also acts on any nested ConstantExprs, adding the edges  // of those to the given SmallVector as well. -static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &, +static void constexprToEdges(CFLAAResult &, ConstantExpr &,                               SmallVectorImpl<Edge> &);  // Given an Instruction, this will add it to the graph, along with any @@ -779,16 +672,13 @@ static void constexprToEdges(CFLAliasAnalysis &, ConstantExpr &,  //   %0 = load i16* getelementptr ([1 x i16]* @a, 0, 0), align 2  // addInstructionToGraph would add both the `load` and `getelementptr`  // instructions to the graph appropriately. -static void addInstructionToGraph(CFLAliasAnalysis &, Instruction &, +static void addInstructionToGraph(CFLAAResult &, Instruction &,                                    SmallVectorImpl<Value *> &, NodeMapT &,                                    GraphT &);  // Notes whether it would be pointless to add the given Value to our sets.  static bool canSkipAddingToSets(Value *Val); -// Builds the graph + StratifiedSets for a function. -static FunctionInfo buildSetsFrom(CFLAliasAnalysis &, Function *); -  static Optional<Function *> parentFunctionOfValue(Value *Val) {    if (auto *Inst = dyn_cast<Instruction>(Val)) {      auto *Bb = Inst->getParent(); @@ -825,7 +715,7 @@ static bool hasUsefulEdges(Instruction *Inst) {  }  static bool hasUsefulEdges(ConstantExpr *CE) { -  // ConstantExpr doens't have terminators, invokes, or fences, so only needs +  // ConstantExpr doesn't have terminators, invokes, or fences, so only needs    // to check for compares.    return CE->getOpcode() != Instruction::ICmp &&           CE->getOpcode() != Instruction::FCmp; @@ -862,7 +752,7 @@ static EdgeType flipWeight(EdgeType Initial) {    llvm_unreachable("Incomplete coverage of EdgeType enum");  } -static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst, +static void argsToEdges(CFLAAResult &Analysis, Instruction *Inst,                          SmallVectorImpl<Edge> &Output) {    assert(hasUsefulEdges(Inst) &&           "Expected instructions to have 'useful' edges"); @@ -870,7 +760,7 @@ static void argsToEdges(CFLAliasAnalysis &Analysis, Instruction *Inst,    v.visit(Inst);  } -static void argsToEdges(CFLAliasAnalysis &Analysis, ConstantExpr *CE, +static void argsToEdges(CFLAAResult &Analysis, ConstantExpr *CE,                          SmallVectorImpl<Edge> &Output) {    assert(hasUsefulEdges(CE) && "Expected constant expr to have 'useful' edges");    GetEdgesVisitor v(Analysis, Output); @@ -889,7 +779,7 @@ static Level directionOfEdgeType(EdgeType Weight) {    llvm_unreachable("Incomplete switch coverage");  } -static void constexprToEdges(CFLAliasAnalysis &Analysis, +static void constexprToEdges(CFLAAResult &Analysis,                               ConstantExpr &CExprToCollapse,                               SmallVectorImpl<Edge> &Results) {    SmallVector<ConstantExpr *, 4> Worklist; @@ -919,7 +809,7 @@ static void constexprToEdges(CFLAliasAnalysis &Analysis,    }  } -static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst, +static void addInstructionToGraph(CFLAAResult &Analysis, Instruction &Inst,                                    SmallVectorImpl<Value *> &ReturnedValues,                                    NodeMapT &Map, GraphT &Graph) {    const auto findOrInsertNode = [&Map, &Graph](Value *Val) { @@ -982,7 +872,7 @@ static void addInstructionToGraph(CFLAliasAnalysis &Analysis, Instruction &Inst,  // buy us much that we don't already have. I'd like to add interprocedural  // analysis prior to this however, in case that somehow requires the graph  // produced by this for efficient execution -static void buildGraphFrom(CFLAliasAnalysis &Analysis, Function *Fn, +static void buildGraphFrom(CFLAAResult &Analysis, Function *Fn,                             SmallVectorImpl<Value *> &ReturnedValues,                             NodeMapT &Map, GraphT &Graph) {    for (auto &Bb : Fn->getBasicBlockList()) @@ -1012,12 +902,13 @@ static bool canSkipAddingToSets(Value *Val) {    return false;  } -static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) { +// Builds the graph + StratifiedSets for a function. +CFLAAResult::FunctionInfo CFLAAResult::buildSetsFrom(Function *Fn) {    NodeMapT Map;    GraphT Graph;    SmallVector<Value *, 4> ReturnedValues; -  buildGraphFrom(Analysis, Fn, ReturnedValues, Map, Graph); +  buildGraphFrom(*this, Fn, ReturnedValues, Map, Graph);    DenseMap<GraphT::Node, Value *> NodeValueMap;    NodeValueMap.resize(Map.size()); @@ -1098,19 +989,35 @@ static FunctionInfo buildSetsFrom(CFLAliasAnalysis &Analysis, Function *Fn) {    return FunctionInfo(Builder.build(), std::move(ReturnedValues));  } -void CFLAliasAnalysis::scan(Function *Fn) { +void CFLAAResult::scan(Function *Fn) {    auto InsertPair = Cache.insert(std::make_pair(Fn, Optional<FunctionInfo>()));    (void)InsertPair;    assert(InsertPair.second &&           "Trying to scan a function that has already been cached"); -  FunctionInfo Info(buildSetsFrom(*this, Fn)); +  FunctionInfo Info(buildSetsFrom(Fn));    Cache[Fn] = std::move(Info);    Handles.push_front(FunctionHandle(Fn, this));  } -AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA, -                                    const MemoryLocation &LocB) { +void CFLAAResult::evict(Function *Fn) { Cache.erase(Fn); } + +/// \brief Ensures that the given function is available in the cache. +/// Returns the appropriate entry from the cache. +const Optional<CFLAAResult::FunctionInfo> & +CFLAAResult::ensureCached(Function *Fn) { +  auto Iter = Cache.find(Fn); +  if (Iter == Cache.end()) { +    scan(Fn); +    Iter = Cache.find(Fn); +    assert(Iter != Cache.end()); +    assert(Iter->second.hasValue()); +  } +  return Iter->second; +} + +AliasResult CFLAAResult::query(const MemoryLocation &LocA, +                               const MemoryLocation &LocB) {    auto *ValA = const_cast<Value *>(LocA.Ptr);    auto *ValB = const_cast<Value *>(LocB.Ptr); @@ -1176,7 +1083,37 @@ AliasResult CFLAliasAnalysis::query(const MemoryLocation &LocA,    return NoAlias;  } -bool CFLAliasAnalysis::doInitialization(Module &M) { -  InitializeAliasAnalysis(this, &M.getDataLayout()); -  return true; +CFLAAResult CFLAA::run(Function &F, AnalysisManager<Function> *AM) { +  return CFLAAResult(AM->getResult<TargetLibraryAnalysis>(F)); +} + +char CFLAA::PassID; + +char CFLAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", +                      false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(CFLAAWrapperPass, "cfl-aa", "CFL-Based Alias Analysis", +                    false, true) + +ImmutablePass *llvm::createCFLAAWrapperPass() { return new CFLAAWrapperPass(); } + +CFLAAWrapperPass::CFLAAWrapperPass() : ImmutablePass(ID) { +  initializeCFLAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool CFLAAWrapperPass::doInitialization(Module &M) { +  Result.reset( +      new CFLAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); +  return false; +} + +bool CFLAAWrapperPass::doFinalization(Module &M) { +  Result.reset(); +  return false; +} + +void CFLAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequired<TargetLibraryInfoWrapperPass>();  } diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp index e2799d965a7d..7cec962678e8 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/CallGraph.cpp @@ -22,7 +22,7 @@ using namespace llvm;  CallGraph::CallGraph(Module &M)      : M(M), Root(nullptr), ExternalCallingNode(getOrInsertFunction(nullptr)), -      CallsExternalNode(new CallGraphNode(nullptr)) { +      CallsExternalNode(llvm::make_unique<CallGraphNode>(nullptr)) {    // Add every function to the call graph.    for (Function &F : M)      addToCallGraph(&F); @@ -32,10 +32,19 @@ CallGraph::CallGraph(Module &M)      Root = ExternalCallingNode;  } +CallGraph::CallGraph(CallGraph &&Arg) +    : M(Arg.M), FunctionMap(std::move(Arg.FunctionMap)), Root(Arg.Root), +      ExternalCallingNode(Arg.ExternalCallingNode), +      CallsExternalNode(std::move(Arg.CallsExternalNode)) { +  Arg.FunctionMap.clear(); +  Arg.Root = nullptr; +  Arg.ExternalCallingNode = nullptr; +} +  CallGraph::~CallGraph() {    // CallsExternalNode is not in the function map, delete it explicitly. -  CallsExternalNode->allReferencesDropped(); -  delete CallsExternalNode; +  if (CallsExternalNode) +    CallsExternalNode->allReferencesDropped();  // Reset all node's use counts to zero before deleting them to prevent an  // assertion from firing. @@ -43,8 +52,6 @@ CallGraph::~CallGraph() {    for (auto &I : FunctionMap)      I.second->allReferencesDropped();  #endif -  for (auto &I : FunctionMap) -    delete I.second;  }  void CallGraph::addToCallGraph(Function *F) { @@ -70,7 +77,7 @@ void CallGraph::addToCallGraph(Function *F) {    // If this function is not defined in this translation unit, it could call    // anything.    if (F->isDeclaration() && !F->isIntrinsic()) -    Node->addCalledFunction(CallSite(), CallsExternalNode); +    Node->addCalledFunction(CallSite(), CallsExternalNode.get());    // Look for calls by this function.    for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) @@ -83,7 +90,7 @@ void CallGraph::addToCallGraph(Function *F) {            // Indirect calls of intrinsics are not allowed so no need to check.            // We can be more precise here by using TargetArg returned by            // Intrinsic::isLeaf. -          Node->addCalledFunction(CS, CallsExternalNode); +          Node->addCalledFunction(CS, CallsExternalNode.get());          else if (!Callee->isIntrinsic())            Node->addCalledFunction(CS, getOrInsertFunction(Callee));        } @@ -105,7 +112,7 @@ void CallGraph::print(raw_ostream &OS) const {    Nodes.reserve(FunctionMap.size());    for (auto I = begin(), E = end(); I != E; ++I) -    Nodes.push_back(I->second); +    Nodes.push_back(I->second.get());    std::sort(Nodes.begin(), Nodes.end(),              [](CallGraphNode *LHS, CallGraphNode *RHS) { @@ -120,9 +127,8 @@ void CallGraph::print(raw_ostream &OS) const {      CN->print(OS);  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD  void CallGraph::dump() const { print(dbgs()); } -#endif  // removeFunctionFromModule - Unlink the function from this module, returning  // it.  Because this removes the function from the module, the call graph node @@ -134,7 +140,6 @@ Function *CallGraph::removeFunctionFromModule(CallGraphNode *CGN) {    assert(CGN->empty() && "Cannot remove function from call "           "graph if it references other functions!");    Function *F = CGN->getFunction(); // Get the function for the call graph node -  delete CGN;                       // Delete the call graph node for this func    FunctionMap.erase(F);             // Remove the call graph node from the map    M.getFunctionList().remove(F); @@ -152,7 +157,7 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {           "Pointing CallGraphNode at a function that already exists");    FunctionMapTy::iterator I = FunctionMap.find(From);    I->second->F = const_cast<Function*>(To); -  FunctionMap[To] = I->second; +  FunctionMap[To] = std::move(I->second);    FunctionMap.erase(I);  } @@ -160,12 +165,13 @@ void CallGraph::spliceFunction(const Function *From, const Function *To) {  // it will insert a new CallGraphNode for the specified function if one does  // not already exist.  CallGraphNode *CallGraph::getOrInsertFunction(const Function *F) { -  CallGraphNode *&CGN = FunctionMap[F]; +  auto &CGN = FunctionMap[F];    if (CGN) -    return CGN; +    return CGN.get();    assert((!F || F->getParent() == &M) && "Function not in current module!"); -  return CGN = new CallGraphNode(const_cast<Function*>(F)); +  CGN = llvm::make_unique<CallGraphNode>(const_cast<Function *>(F)); +  return CGN.get();  }  //===----------------------------------------------------------------------===// @@ -190,9 +196,8 @@ void CallGraphNode::print(raw_ostream &OS) const {    OS << '\n';  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD  void CallGraphNode::dump() const { print(dbgs()); } -#endif  /// removeCallEdgeFor - This method removes the edge in the node for the  /// specified call site.  Note that this method takes linear time, so it @@ -297,6 +302,5 @@ void CallGraphWrapperPass::print(raw_ostream &OS, const Module *) const {    G->print(OS);  } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD  void CallGraphWrapperPass::dump() const { print(dbgs(), nullptr); } -#endif diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp index 07b389a2a139..07b389a2a139 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp diff --git a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp b/contrib/llvm/lib/Analysis/CallPrinter.cpp index 68dcd3c06427..68dcd3c06427 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallPrinter.cpp +++ b/contrib/llvm/lib/Analysis/CallPrinter.cpp diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp index 52ef807aeb59..1add2fa77566 100644 --- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -21,6 +21,7 @@  #include "llvm/Analysis/AliasAnalysis.h"  #include "llvm/Analysis/CFG.h"  #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/OrderedBasicBlock.h"  #include "llvm/IR/CallSite.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/Dominators.h" @@ -52,63 +53,6 @@ namespace {      bool Captured;    }; -  struct NumberedInstCache { -    SmallDenseMap<const Instruction *, unsigned, 32> NumberedInsts; -    BasicBlock::const_iterator LastInstFound; -    unsigned LastInstPos; -    const BasicBlock *BB; - -    NumberedInstCache(const BasicBlock *BasicB) : LastInstPos(0), BB(BasicB) { -      LastInstFound = BB->end(); -    } - -    /// \brief Find the first instruction 'A' or 'B' in 'BB'. Number out -    /// instruction while walking 'BB'. -    const Instruction *find(const Instruction *A, const Instruction *B) { -      const Instruction *Inst = nullptr; -      assert(!(LastInstFound == BB->end() && LastInstPos != 0) && -             "Instruction supposed to be in NumberedInsts"); - -      // Start the search with the instruction found in the last lookup round. -      auto II = BB->begin(); -      auto IE = BB->end(); -      if (LastInstFound != IE) -        II = std::next(LastInstFound); - -      // Number all instructions up to the point where we find 'A' or 'B'. -      for (++LastInstPos; II != IE; ++II, ++LastInstPos) { -        Inst = cast<Instruction>(II); -        NumberedInsts[Inst] = LastInstPos; -        if (Inst == A || Inst == B) -          break; -      } - -      assert(II != IE && "Instruction not found?"); -      LastInstFound = II; -      return Inst; -    } - -    /// \brief Find out whether 'A' dominates 'B', meaning whether 'A' -    /// comes before 'B' in 'BB'. This is a simplification that considers -    /// cached instruction positions and ignores other basic blocks, being -    /// only relevant to compare relative instructions positions inside 'BB'. -    bool dominates(const Instruction *A, const Instruction *B) { -      assert(A->getParent() == B->getParent() && -             "Instructions must be in the same basic block!"); - -      unsigned NA = NumberedInsts.lookup(A); -      unsigned NB = NumberedInsts.lookup(B); -      if (NA && NB) -        return NA < NB; -      if (NA) -        return true; -      if (NB) -        return false; - -      return A == find(A, B); -    } -  }; -    /// Only find pointer captures which happen before the given instruction. Uses    /// the dominator tree to determine whether one instruction is before another.    /// Only support the case where the Value is defined in the same basic block @@ -116,8 +60,8 @@ namespace {    struct CapturesBefore : public CaptureTracker {      CapturesBefore(bool ReturnCaptures, const Instruction *I, DominatorTree *DT, -                   bool IncludeI) -      : LocalInstCache(I->getParent()), BeforeHere(I), DT(DT), +                   bool IncludeI, OrderedBasicBlock *IC) +      : OrderedBB(IC), BeforeHere(I), DT(DT),          ReturnCaptures(ReturnCaptures), IncludeI(IncludeI), Captured(false) {}      void tooManyUses() override { Captured = true; } @@ -131,18 +75,18 @@ namespace {        // Compute the case where both instructions are inside the same basic        // block. Since instructions in the same BB as BeforeHere are numbered in -      // 'LocalInstCache', avoid using 'dominates' and 'isPotentiallyReachable' +      // 'OrderedBB', avoid using 'dominates' and 'isPotentiallyReachable'        // which are very expensive for large basic blocks.        if (BB == BeforeHere->getParent()) {          // 'I' dominates 'BeforeHere' => not safe to prune.          // -        // The value defined by an invoke dominates an instruction only if it -        // dominates every instruction in UseBB. A PHI is dominated only if -        // the instruction dominates every possible use in the UseBB. Since +        // The value defined by an invoke dominates an instruction only +        // if it dominates every instruction in UseBB. A PHI is dominated only +        // if the instruction dominates every possible use in the UseBB. Since          // UseBB == BB, avoid pruning.          if (isa<InvokeInst>(BeforeHere) || isa<PHINode>(I) || I == BeforeHere)            return false; -        if (!LocalInstCache.dominates(BeforeHere, I)) +        if (!OrderedBB->dominates(BeforeHere, I))            return false;          // 'BeforeHere' comes before 'I', it's safe to prune if we also @@ -157,10 +101,7 @@ namespace {          SmallVector<BasicBlock*, 32> Worklist;          Worklist.append(succ_begin(BB), succ_end(BB)); -        if (!isPotentiallyReachableFromMany(Worklist, BB, DT)) -          return true; - -        return false; +        return !isPotentiallyReachableFromMany(Worklist, BB, DT);        }        // If the value is defined in the same basic block as use and BeforeHere, @@ -196,7 +137,7 @@ namespace {        return true;      } -    NumberedInstCache LocalInstCache; +    OrderedBasicBlock *OrderedBB;      const Instruction *BeforeHere;      DominatorTree *DT; @@ -238,21 +179,29 @@ bool llvm::PointerMayBeCaptured(const Value *V,  /// returning the value (or part of it) from the function counts as capturing  /// it or not.  The boolean StoreCaptures specified whether storing the value  /// (or part of it) into memory anywhere automatically counts as capturing it -/// or not. +/// or not. A ordered basic block \p OBB can be used in order to speed up +/// queries about relative order among instructions in the same basic block.  bool llvm::PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures,                                        bool StoreCaptures, const Instruction *I, -                                      DominatorTree *DT, bool IncludeI) { +                                      DominatorTree *DT, bool IncludeI, +                                      OrderedBasicBlock *OBB) {    assert(!isa<GlobalValue>(V) &&           "It doesn't make sense to ask whether a global is captured."); +  bool UseNewOBB = OBB == nullptr;    if (!DT)      return PointerMayBeCaptured(V, ReturnCaptures, StoreCaptures); +  if (UseNewOBB) +    OBB = new OrderedBasicBlock(I->getParent());    // TODO: See comment in PointerMayBeCaptured regarding what could be done    // with StoreCaptures. -  CapturesBefore CB(ReturnCaptures, I, DT, IncludeI); +  CapturesBefore CB(ReturnCaptures, I, DT, IncludeI, OBB);    PointerMayBeCaptured(V, &CB); + +  if (UseNewOBB) +    delete OBB;    return CB.Captured;  } @@ -300,8 +249,9 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) {        // that loading a value from a pointer does not cause the pointer to be        // captured, even though the loaded value might be the pointer itself        // (think of self-referential objects). -      CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); -      for (CallSite::arg_iterator A = B; A != E; ++A) +      CallSite::data_operand_iterator B = +        CS.data_operands_begin(), E = CS.data_operands_end(); +      for (CallSite::data_operand_iterator A = B; A != E; ++A)          if (A->get() == V && !CS.doesNotCapture(A - B))            // The parameter is not marked 'nocapture' - captured.            if (Tracker->captured(U)) diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp index 46a2c43b1690..4090b4cd752b 100644 --- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp +++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp @@ -45,14 +45,8 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,        continue;      // If all uses of this value are ephemeral, then so is this value. -    bool FoundNEUse = false; -    for (const User *I : V->users()) -      if (!EphValues.count(I)) { -        FoundNEUse = true; -        break; -      } - -    if (FoundNEUse) +    if (!std::all_of(V->user_begin(), V->user_end(), +                     [&](const User *U) { return EphValues.count(U); }))        continue;      EphValues.insert(V); @@ -116,7 +110,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,    for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();         II != E; ++II) {      // Skip ephemeral values. -    if (EphValues.count(II)) +    if (EphValues.count(&*II))        continue;      // Special handling for calls. @@ -155,6 +149,9 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,      if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())        ++NumVectorInsts; +    if (II->getType()->isTokenTy() && II->isUsedOutsideOfBlock(BB)) +      notDuplicatable = true; +      if (const CallInst *CI = dyn_cast<CallInst>(II))        if (CI->cannotDuplicate())          notDuplicatable = true; diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index 02a5aef03223..ccb56631b846 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -248,8 +248,7 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV,    // Look through ptr->int and ptr->ptr casts.    if (CE->getOpcode() == Instruction::PtrToInt || -      CE->getOpcode() == Instruction::BitCast || -      CE->getOpcode() == Instruction::AddrSpaceCast) +      CE->getOpcode() == Instruction::BitCast)      return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, DL);    // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) @@ -532,6 +531,10 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,      if (GV->isConstant() && GV->hasDefinitiveInitializer())        return GV->getInitializer(); +  if (auto *GA = dyn_cast<GlobalAlias>(C)) +    if (GA->getAliasee() && !GA->mayBeOverridden()) +      return ConstantFoldLoadFromConstPtr(GA->getAliasee(), DL); +    // If the loaded value isn't a constant expr, we can't handle it.    ConstantExpr *CE = dyn_cast<ConstantExpr>(C);    if (!CE) @@ -1236,6 +1239,9 @@ bool llvm::canConstantFoldCallTo(const Function *F) {    case Intrinsic::sqrt:    case Intrinsic::sin:    case Intrinsic::cos: +  case Intrinsic::trunc: +  case Intrinsic::rint: +  case Intrinsic::nearbyint:    case Intrinsic::pow:    case Intrinsic::powi:    case Intrinsic::bswap: @@ -1276,24 +1282,30 @@ bool llvm::canConstantFoldCallTo(const Function *F) {    // return true for a name like "cos\0blah" which strcmp would return equal to    // "cos", but has length 8.    switch (Name[0]) { -  default: return false; +  default: +    return false;    case 'a': -    return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2"; +    return Name == "acos" || Name == "asin" || Name == "atan" || +           Name == "atan2" || Name == "acosf" || Name == "asinf" || +           Name == "atanf" || Name == "atan2f";    case 'c': -    return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; +    return Name == "ceil" || Name == "cos" || Name == "cosh" || +           Name == "ceilf" || Name == "cosf" || Name == "coshf";    case 'e': -    return Name == "exp" || Name == "exp2"; +    return Name == "exp" || Name == "exp2" || Name == "expf" || Name == "exp2f";    case 'f': -    return Name == "fabs" || Name == "fmod" || Name == "floor"; +    return Name == "fabs" || Name == "floor" || Name == "fmod" || +           Name == "fabsf" || Name == "floorf" || Name == "fmodf";    case 'l': -    return Name == "log" || Name == "log10"; +    return Name == "log" || Name == "log10" || Name == "logf" || +           Name == "log10f";    case 'p': -    return Name == "pow"; +    return Name == "pow" || Name == "powf";    case 's':      return Name == "sin" || Name == "sinh" || Name == "sqrt" || -      Name == "sinf" || Name == "sqrtf"; +           Name == "sinf" || Name == "sinhf" || Name == "sqrtf";    case 't': -    return Name == "tan" || Name == "tanh"; +    return Name == "tan" || Name == "tanh" || Name == "tanf" || Name == "tanhf";    }  } @@ -1422,6 +1434,36 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,          return ConstantFP::get(Ty->getContext(), V);        } +      if (IntrinsicID == Intrinsic::floor) { +        APFloat V = Op->getValueAPF(); +        V.roundToIntegral(APFloat::rmTowardNegative); +        return ConstantFP::get(Ty->getContext(), V); +      } + +      if (IntrinsicID == Intrinsic::ceil) { +        APFloat V = Op->getValueAPF(); +        V.roundToIntegral(APFloat::rmTowardPositive); +        return ConstantFP::get(Ty->getContext(), V); +      } + +      if (IntrinsicID == Intrinsic::trunc) { +        APFloat V = Op->getValueAPF(); +        V.roundToIntegral(APFloat::rmTowardZero); +        return ConstantFP::get(Ty->getContext(), V); +      } + +      if (IntrinsicID == Intrinsic::rint) { +        APFloat V = Op->getValueAPF(); +        V.roundToIntegral(APFloat::rmNearestTiesToEven); +        return ConstantFP::get(Ty->getContext(), V); +      } + +      if (IntrinsicID == Intrinsic::nearbyint) { +        APFloat V = Op->getValueAPF(); +        V.roundToIntegral(APFloat::rmNearestTiesToEven); +        return ConstantFP::get(Ty->getContext(), V); +      } +        /// We only fold functions with finite arguments. Folding NaN and inf is        /// likely to be aborted with an exception anyway, and some host libms        /// have known errors raising exceptions. @@ -1448,10 +1490,6 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,            return ConstantFoldFP(exp, V, Ty);          case Intrinsic::exp2:            return ConstantFoldFP(exp2, V, Ty); -        case Intrinsic::floor: -          return ConstantFoldFP(floor, V, Ty); -        case Intrinsic::ceil: -          return ConstantFoldFP(ceil, V, Ty);          case Intrinsic::sin:            return ConstantFoldFP(sin, V, Ty);          case Intrinsic::cos: @@ -1463,43 +1501,51 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,        switch (Name[0]) {        case 'a': -        if (Name == "acos" && TLI->has(LibFunc::acos)) +        if ((Name == "acos" && TLI->has(LibFunc::acos)) || +            (Name == "acosf" && TLI->has(LibFunc::acosf)))            return ConstantFoldFP(acos, V, Ty); -        else if (Name == "asin" && TLI->has(LibFunc::asin)) +        else if ((Name == "asin" && TLI->has(LibFunc::asin)) || +                 (Name == "asinf" && TLI->has(LibFunc::asinf)))            return ConstantFoldFP(asin, V, Ty); -        else if (Name == "atan" && TLI->has(LibFunc::atan)) +        else if ((Name == "atan" && TLI->has(LibFunc::atan)) || +                 (Name == "atanf" && TLI->has(LibFunc::atanf)))            return ConstantFoldFP(atan, V, Ty);          break;        case 'c': -        if (Name == "ceil" && TLI->has(LibFunc::ceil)) +        if ((Name == "ceil" && TLI->has(LibFunc::ceil)) || +            (Name == "ceilf" && TLI->has(LibFunc::ceilf)))            return ConstantFoldFP(ceil, V, Ty); -        else if (Name == "cos" && TLI->has(LibFunc::cos)) +        else if ((Name == "cos" && TLI->has(LibFunc::cos)) || +                 (Name == "cosf" && TLI->has(LibFunc::cosf)))            return ConstantFoldFP(cos, V, Ty); -        else if (Name == "cosh" && TLI->has(LibFunc::cosh)) +        else if ((Name == "cosh" && TLI->has(LibFunc::cosh)) || +                 (Name == "coshf" && TLI->has(LibFunc::coshf)))            return ConstantFoldFP(cosh, V, Ty); -        else if (Name == "cosf" && TLI->has(LibFunc::cosf)) -          return ConstantFoldFP(cos, V, Ty);          break;        case 'e': -        if (Name == "exp" && TLI->has(LibFunc::exp)) +        if ((Name == "exp" && TLI->has(LibFunc::exp)) || +            (Name == "expf" && TLI->has(LibFunc::expf)))            return ConstantFoldFP(exp, V, Ty); - -        if (Name == "exp2" && TLI->has(LibFunc::exp2)) { +        if ((Name == "exp2" && TLI->has(LibFunc::exp2)) || +            (Name == "exp2f" && TLI->has(LibFunc::exp2f)))            // Constant fold exp2(x) as pow(2,x) in case the host doesn't have a            // C99 library.            return ConstantFoldBinaryFP(pow, 2.0, V, Ty); -        }          break;        case 'f': -        if (Name == "fabs" && TLI->has(LibFunc::fabs)) +        if ((Name == "fabs" && TLI->has(LibFunc::fabs)) || +            (Name == "fabsf" && TLI->has(LibFunc::fabsf)))            return ConstantFoldFP(fabs, V, Ty); -        else if (Name == "floor" && TLI->has(LibFunc::floor)) +        else if ((Name == "floor" && TLI->has(LibFunc::floor)) || +                 (Name == "floorf" && TLI->has(LibFunc::floorf)))            return ConstantFoldFP(floor, V, Ty);          break;        case 'l': -        if (Name == "log" && V > 0 && TLI->has(LibFunc::log)) +        if ((Name == "log" && V > 0 && TLI->has(LibFunc::log)) || +            (Name == "logf" && V > 0 && TLI->has(LibFunc::logf)))            return ConstantFoldFP(log, V, Ty); -        else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) +        else if ((Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) || +                 (Name == "log10f" && V > 0 && TLI->has(LibFunc::log10f)))            return ConstantFoldFP(log10, V, Ty);          else if (IntrinsicID == Intrinsic::sqrt &&                   (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { @@ -1516,21 +1562,22 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,          }          break;        case 's': -        if (Name == "sin" && TLI->has(LibFunc::sin)) +        if ((Name == "sin" && TLI->has(LibFunc::sin)) || +            (Name == "sinf" && TLI->has(LibFunc::sinf)))            return ConstantFoldFP(sin, V, Ty); -        else if (Name == "sinh" && TLI->has(LibFunc::sinh)) +        else if ((Name == "sinh" && TLI->has(LibFunc::sinh)) || +                 (Name == "sinhf" && TLI->has(LibFunc::sinhf)))            return ConstantFoldFP(sinh, V, Ty); -        else if (Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) -          return ConstantFoldFP(sqrt, V, Ty); -        else if (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)) +        else if ((Name == "sqrt" && V >= 0 && TLI->has(LibFunc::sqrt)) || +                 (Name == "sqrtf" && V >= 0 && TLI->has(LibFunc::sqrtf)))            return ConstantFoldFP(sqrt, V, Ty); -        else if (Name == "sinf" && TLI->has(LibFunc::sinf)) -          return ConstantFoldFP(sin, V, Ty);          break;        case 't': -        if (Name == "tan" && TLI->has(LibFunc::tan)) +        if ((Name == "tan" && TLI->has(LibFunc::tan)) || +            (Name == "tanf" && TLI->has(LibFunc::tanf)))            return ConstantFoldFP(tan, V, Ty); -        else if (Name == "tanh" && TLI->has(LibFunc::tanh)) +        else if ((Name == "tanh" && TLI->has(LibFunc::tanh)) || +                 (Name == "tanhf" && TLI->has(LibFunc::tanhf)))            return ConstantFoldFP(tanh, V, Ty);          break;        default: @@ -1633,11 +1680,14 @@ static Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID,          if (!TLI)            return nullptr; -        if (Name == "pow" && TLI->has(LibFunc::pow)) +        if ((Name == "pow" && TLI->has(LibFunc::pow)) || +            (Name == "powf" && TLI->has(LibFunc::powf)))            return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); -        if (Name == "fmod" && TLI->has(LibFunc::fmod)) +        if ((Name == "fmod" && TLI->has(LibFunc::fmod)) || +            (Name == "fmodf" && TLI->has(LibFunc::fmodf)))            return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty); -        if (Name == "atan2" && TLI->has(LibFunc::atan2)) +        if ((Name == "atan2" && TLI->has(LibFunc::atan2)) || +            (Name == "atan2f" && TLI->has(LibFunc::atan2f)))            return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);        } else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {          if (IntrinsicID == Intrinsic::powi && Ty->isHalfTy()) diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp index b529c1a70aa3..0383cbfbbe4c 100644 --- a/contrib/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm/lib/Analysis/CostModel.cpp @@ -152,10 +152,7 @@ static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,      Mask[i] = val;    SmallVector<int, 16> ActualMask = SI->getShuffleMask(); -  if (Mask != ActualMask) -    return false; - -  return true; +  return Mask == ActualMask;  }  static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, @@ -383,10 +380,8 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {      return -1;    switch (I->getOpcode()) { -  case Instruction::GetElementPtr:{ -    Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); -    return TTI->getAddressComputationCost(ValTy); -  } +  case Instruction::GetElementPtr: +    return TTI->getUserCost(I);    case Instruction::Ret:    case Instruction::PHI: @@ -505,12 +500,12 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {    }    case Instruction::Call:      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { -      SmallVector<Type*, 4> Tys; +      SmallVector<Value *, 4> Args;        for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) -        Tys.push_back(II->getArgOperand(J)->getType()); +        Args.push_back(II->getArgOperand(J));        return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), -                                        Tys); +                                        Args);      }      return -1;    default: @@ -525,7 +520,7 @@ void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {    for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) {      for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { -      Instruction *Inst = it; +      Instruction *Inst = &*it;        unsigned Cost = getInstructionCost(Inst);        if (Cost != (unsigned)-1)          OS << "Cost Model: Found an estimated cost of " << Cost; diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp index 9d1578603268..baee8b3b084b 100644 --- a/contrib/llvm/lib/Analysis/Delinearization.cpp +++ b/contrib/llvm/lib/Analysis/Delinearization.cpp @@ -60,12 +60,12 @@ public:  void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const {    AU.setPreservesAll();    AU.addRequired<LoopInfoWrapperPass>(); -  AU.addRequired<ScalarEvolution>(); +  AU.addRequired<ScalarEvolutionWrapperPass>();  }  bool Delinearization::runOnFunction(Function &F) {    this->F = &F; -  SE = &getAnalysis<ScalarEvolution>(); +  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();    return false;  } @@ -102,20 +102,14 @@ void Delinearization::print(raw_ostream &O, const Module *) const {        if (!BasePointer)          break;        AccessFn = SE->getMinusSCEV(AccessFn, BasePointer); -      const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(AccessFn); - -      // Do not try to delinearize memory accesses that are not AddRecs. -      if (!AR) -        break; -        O << "\n";        O << "Inst:" << *Inst << "\n";        O << "In Loop with Header: " << L->getHeader()->getName() << "\n"; -      O << "AddRec: " << *AR << "\n"; +      O << "AccessFunction: " << *AccessFn << "\n";        SmallVector<const SCEV *, 3> Subscripts, Sizes; -      SE->delinearize(AR, Subscripts, Sizes, SE->getElementSize(Inst)); +      SE->delinearize(AccessFn, Subscripts, Sizes, SE->getElementSize(Inst));        if (Subscripts.size() == 0 || Sizes.size() == 0 ||            Subscripts.size() != Sizes.size()) {          O << "failed to delinearize\n"; diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp new file mode 100644 index 000000000000..912c5ceb754d --- /dev/null +++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp @@ -0,0 +1,392 @@ +//===---- DemandedBits.cpp - Determine demanded bits ----------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass implements a demanded bits analysis. A demanded bit is one that +// contributes to a result; bits that are not demanded can be either zero or +// one without affecting control or data flow. For example in this sequence: +// +//   %1 = add i32 %x, %y +//   %2 = trunc i32 %1 to i16 +// +// Only the lowest 16 bits of %1 are demanded; the rest are removed by the +// trunc. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "demanded-bits" + +char DemandedBits::ID = 0; +INITIALIZE_PASS_BEGIN(DemandedBits, "demanded-bits", "Demanded bits analysis", +                      false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_END(DemandedBits, "demanded-bits", "Demanded bits analysis", +                    false, false) + +DemandedBits::DemandedBits() : FunctionPass(ID), F(nullptr), Analyzed(false) { +  initializeDemandedBitsPass(*PassRegistry::getPassRegistry()); +} + +void DemandedBits::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesCFG(); +  AU.addRequired<AssumptionCacheTracker>(); +  AU.addRequired<DominatorTreeWrapperPass>(); +  AU.setPreservesAll(); +} + +static bool isAlwaysLive(Instruction *I) { +  return isa<TerminatorInst>(I) || isa<DbgInfoIntrinsic>(I) || +      I->isEHPad() || I->mayHaveSideEffects(); +} + +void DemandedBits::determineLiveOperandBits( +    const Instruction *UserI, const Instruction *I, unsigned OperandNo, +    const APInt &AOut, APInt &AB, APInt &KnownZero, APInt &KnownOne, +    APInt &KnownZero2, APInt &KnownOne2) { +  unsigned BitWidth = AB.getBitWidth(); + +  // We're called once per operand, but for some instructions, we need to +  // compute known bits of both operands in order to determine the live bits of +  // either (when both operands are instructions themselves). We don't, +  // however, want to do this twice, so we cache the result in APInts that live +  // in the caller. For the two-relevant-operands case, both operand values are +  // provided here. +  auto ComputeKnownBits = +      [&](unsigned BitWidth, const Value *V1, const Value *V2) { +        const DataLayout &DL = I->getModule()->getDataLayout(); +        KnownZero = APInt(BitWidth, 0); +        KnownOne = APInt(BitWidth, 0); +        computeKnownBits(const_cast<Value *>(V1), KnownZero, KnownOne, DL, 0, +                         AC, UserI, DT); + +        if (V2) { +          KnownZero2 = APInt(BitWidth, 0); +          KnownOne2 = APInt(BitWidth, 0); +          computeKnownBits(const_cast<Value *>(V2), KnownZero2, KnownOne2, DL, +                           0, AC, UserI, DT); +        } +      }; + +  switch (UserI->getOpcode()) { +  default: break; +  case Instruction::Call: +  case Instruction::Invoke: +    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(UserI)) +      switch (II->getIntrinsicID()) { +      default: break; +      case Intrinsic::bswap: +        // The alive bits of the input are the swapped alive bits of +        // the output. +        AB = AOut.byteSwap(); +        break; +      case Intrinsic::ctlz: +        if (OperandNo == 0) { +          // We need some output bits, so we need all bits of the +          // input to the left of, and including, the leftmost bit +          // known to be one. +          ComputeKnownBits(BitWidth, I, nullptr); +          AB = APInt::getHighBitsSet(BitWidth, +                 std::min(BitWidth, KnownOne.countLeadingZeros()+1)); +        } +        break; +      case Intrinsic::cttz: +        if (OperandNo == 0) { +          // We need some output bits, so we need all bits of the +          // input to the right of, and including, the rightmost bit +          // known to be one. +          ComputeKnownBits(BitWidth, I, nullptr); +          AB = APInt::getLowBitsSet(BitWidth, +                 std::min(BitWidth, KnownOne.countTrailingZeros()+1)); +        } +        break; +      } +    break; +  case Instruction::Add: +  case Instruction::Sub: +  case Instruction::Mul: +    // Find the highest live output bit. We don't need any more input +    // bits than that (adds, and thus subtracts, ripple only to the +    // left). +    AB = APInt::getLowBitsSet(BitWidth, AOut.getActiveBits()); +    break; +  case Instruction::Shl: +    if (OperandNo == 0) +      if (ConstantInt *CI = +            dyn_cast<ConstantInt>(UserI->getOperand(1))) { +        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); +        AB = AOut.lshr(ShiftAmt); + +        // If the shift is nuw/nsw, then the high bits are not dead +        // (because we've promised that they *must* be zero). +        const ShlOperator *S = cast<ShlOperator>(UserI); +        if (S->hasNoSignedWrap()) +          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt+1); +        else if (S->hasNoUnsignedWrap()) +          AB |= APInt::getHighBitsSet(BitWidth, ShiftAmt); +      } +    break; +  case Instruction::LShr: +    if (OperandNo == 0) +      if (ConstantInt *CI = +            dyn_cast<ConstantInt>(UserI->getOperand(1))) { +        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); +        AB = AOut.shl(ShiftAmt); + +        // If the shift is exact, then the low bits are not dead +        // (they must be zero). +        if (cast<LShrOperator>(UserI)->isExact()) +          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); +      } +    break; +  case Instruction::AShr: +    if (OperandNo == 0) +      if (ConstantInt *CI = +            dyn_cast<ConstantInt>(UserI->getOperand(1))) { +        uint64_t ShiftAmt = CI->getLimitedValue(BitWidth-1); +        AB = AOut.shl(ShiftAmt); +        // Because the high input bit is replicated into the +        // high-order bits of the result, if we need any of those +        // bits, then we must keep the highest input bit. +        if ((AOut & APInt::getHighBitsSet(BitWidth, ShiftAmt)) +            .getBoolValue()) +          AB.setBit(BitWidth-1); + +        // If the shift is exact, then the low bits are not dead +        // (they must be zero). +        if (cast<AShrOperator>(UserI)->isExact()) +          AB |= APInt::getLowBitsSet(BitWidth, ShiftAmt); +      } +    break; +  case Instruction::And: +    AB = AOut; + +    // For bits that are known zero, the corresponding bits in the +    // other operand are dead (unless they're both zero, in which +    // case they can't both be dead, so just mark the LHS bits as +    // dead). +    if (OperandNo == 0) { +      ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); +      AB &= ~KnownZero2; +    } else { +      if (!isa<Instruction>(UserI->getOperand(0))) +        ComputeKnownBits(BitWidth, UserI->getOperand(0), I); +      AB &= ~(KnownZero & ~KnownZero2); +    } +    break; +  case Instruction::Or: +    AB = AOut; + +    // For bits that are known one, the corresponding bits in the +    // other operand are dead (unless they're both one, in which +    // case they can't both be dead, so just mark the LHS bits as +    // dead). +    if (OperandNo == 0) { +      ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); +      AB &= ~KnownOne2; +    } else { +      if (!isa<Instruction>(UserI->getOperand(0))) +        ComputeKnownBits(BitWidth, UserI->getOperand(0), I); +      AB &= ~(KnownOne & ~KnownOne2); +    } +    break; +  case Instruction::Xor: +  case Instruction::PHI: +    AB = AOut; +    break; +  case Instruction::Trunc: +    AB = AOut.zext(BitWidth); +    break; +  case Instruction::ZExt: +    AB = AOut.trunc(BitWidth); +    break; +  case Instruction::SExt: +    AB = AOut.trunc(BitWidth); +    // Because the high input bit is replicated into the +    // high-order bits of the result, if we need any of those +    // bits, then we must keep the highest input bit. +    if ((AOut & APInt::getHighBitsSet(AOut.getBitWidth(), +                                      AOut.getBitWidth() - BitWidth)) +        .getBoolValue()) +      AB.setBit(BitWidth-1); +    break; +  case Instruction::Select: +    if (OperandNo != 0) +      AB = AOut; +    break; +  case Instruction::ICmp: +    // Count the number of leading zeroes in each operand. +    ComputeKnownBits(BitWidth, I, UserI->getOperand(1)); +    auto NumLeadingZeroes = std::min(KnownZero.countLeadingOnes(), +                                     KnownZero2.countLeadingOnes()); +    AB = ~APInt::getHighBitsSet(BitWidth, NumLeadingZeroes); +    break; +  } +} + +bool DemandedBits::runOnFunction(Function& Fn) { +  F = &Fn; +  Analyzed = false; +  return false; +} + +void DemandedBits::performAnalysis() { +  if (Analyzed) +    // Analysis already completed for this function. +    return; +  Analyzed = true; +  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(*F); +  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); +   +  Visited.clear(); +  AliveBits.clear(); + +  SmallVector<Instruction*, 128> Worklist; + +  // Collect the set of "root" instructions that are known live. +  for (Instruction &I : instructions(*F)) { +    if (!isAlwaysLive(&I)) +      continue; + +    DEBUG(dbgs() << "DemandedBits: Root: " << I << "\n"); +    // For integer-valued instructions, set up an initial empty set of alive +    // bits and add the instruction to the work list. For other instructions +    // add their operands to the work list (for integer values operands, mark +    // all bits as live). +    if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { +      if (!AliveBits.count(&I)) { +        AliveBits[&I] = APInt(IT->getBitWidth(), 0); +        Worklist.push_back(&I); +      } + +      continue; +    } + +    // Non-integer-typed instructions... +    for (Use &OI : I.operands()) { +      if (Instruction *J = dyn_cast<Instruction>(OI)) { +        if (IntegerType *IT = dyn_cast<IntegerType>(J->getType())) +          AliveBits[J] = APInt::getAllOnesValue(IT->getBitWidth()); +        Worklist.push_back(J); +      } +    } +    // To save memory, we don't add I to the Visited set here. Instead, we +    // check isAlwaysLive on every instruction when searching for dead +    // instructions later (we need to check isAlwaysLive for the +    // integer-typed instructions anyway). +  } + +  // Propagate liveness backwards to operands. +  while (!Worklist.empty()) { +    Instruction *UserI = Worklist.pop_back_val(); + +    DEBUG(dbgs() << "DemandedBits: Visiting: " << *UserI); +    APInt AOut; +    if (UserI->getType()->isIntegerTy()) { +      AOut = AliveBits[UserI]; +      DEBUG(dbgs() << " Alive Out: " << AOut); +    } +    DEBUG(dbgs() << "\n"); + +    if (!UserI->getType()->isIntegerTy()) +      Visited.insert(UserI); + +    APInt KnownZero, KnownOne, KnownZero2, KnownOne2; +    // Compute the set of alive bits for each operand. These are anded into the +    // existing set, if any, and if that changes the set of alive bits, the +    // operand is added to the work-list. +    for (Use &OI : UserI->operands()) { +      if (Instruction *I = dyn_cast<Instruction>(OI)) { +        if (IntegerType *IT = dyn_cast<IntegerType>(I->getType())) { +          unsigned BitWidth = IT->getBitWidth(); +          APInt AB = APInt::getAllOnesValue(BitWidth); +          if (UserI->getType()->isIntegerTy() && !AOut && +              !isAlwaysLive(UserI)) { +            AB = APInt(BitWidth, 0); +          } else { +            // If all bits of the output are dead, then all bits of the input +            // Bits of each operand that are used to compute alive bits of the +            // output are alive, all others are dead. +            determineLiveOperandBits(UserI, I, OI.getOperandNo(), AOut, AB, +                                     KnownZero, KnownOne, +                                     KnownZero2, KnownOne2); +          } + +          // If we've added to the set of alive bits (or the operand has not +          // been previously visited), then re-queue the operand to be visited +          // again. +          APInt ABPrev(BitWidth, 0); +          auto ABI = AliveBits.find(I); +          if (ABI != AliveBits.end()) +            ABPrev = ABI->second; + +          APInt ABNew = AB | ABPrev; +          if (ABNew != ABPrev || ABI == AliveBits.end()) { +            AliveBits[I] = std::move(ABNew); +            Worklist.push_back(I); +          } +        } else if (!Visited.count(I)) { +          Worklist.push_back(I); +        } +      } +    } +  } +} + +APInt DemandedBits::getDemandedBits(Instruction *I) { +  performAnalysis(); +   +  const DataLayout &DL = I->getParent()->getModule()->getDataLayout(); +  if (AliveBits.count(I)) +    return AliveBits[I]; +  return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType())); +} + +bool DemandedBits::isInstructionDead(Instruction *I) { +  performAnalysis(); + +  return !Visited.count(I) && AliveBits.find(I) == AliveBits.end() && +    !isAlwaysLive(I); +} + +void DemandedBits::print(raw_ostream &OS, const Module *M) const { +  // This is gross. But the alternative is making all the state mutable +  // just because of this one debugging method. +  const_cast<DemandedBits*>(this)->performAnalysis(); +  for (auto &KV : AliveBits) { +    OS << "DemandedBits: 0x" << utohexstr(KV.second.getLimitedValue()) << " for " +       << *KV.first << "\n"; +  } +} + +FunctionPass *llvm::createDemandedBitsPass() { +  return new DemandedBits(); +} diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp index 4826ac407d7f..4040ad3cacd5 100644 --- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -117,8 +117,8 @@ Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore,  INITIALIZE_PASS_BEGIN(DependenceAnalysis, "da",                        "Dependence Analysis", true, true)  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)  INITIALIZE_PASS_END(DependenceAnalysis, "da",                      "Dependence Analysis", true, true) @@ -132,8 +132,8 @@ FunctionPass *llvm::createDependenceAnalysisPass() {  bool DependenceAnalysis::runOnFunction(Function &F) {    this->F = &F; -  AA = &getAnalysis<AliasAnalysis>(); -  SE = &getAnalysis<ScalarEvolution>(); +  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); +  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();    return false;  } @@ -145,8 +145,8 @@ void DependenceAnalysis::releaseMemory() {  void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {    AU.setPreservesAll(); -  AU.addRequiredTransitive<AliasAnalysis>(); -  AU.addRequiredTransitive<ScalarEvolution>(); +  AU.addRequiredTransitive<AAResultsWrapperPass>(); +  AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();    AU.addRequiredTransitive<LoopInfoWrapperPass>();  } @@ -233,7 +233,8 @@ FullDependence::FullDependence(Instruction *Source, Instruction *Destination,      : Dependence(Source, Destination), Levels(CommonLevels),        LoopIndependent(PossiblyLoopIndependent) {    Consistent = true; -  DV = CommonLevels ? new DVEntry[CommonLevels] : nullptr; +  if (CommonLevels) +    DV = make_unique<DVEntry[]>(CommonLevels);  }  // The rest are simple getters that hide the implementation. @@ -371,7 +372,7 @@ void DependenceAnalysis::Constraint::setLine(const SCEV *AA,  void DependenceAnalysis::Constraint::setDistance(const SCEV *D,                                                   const Loop *CurLoop) {    Kind = Distance; -  A = SE->getConstant(D->getType(), 1); +  A = SE->getOne(D->getType());    B = SE->getNegativeSCEV(A);    C = SE->getNegativeSCEV(D);    AssociatedLoop = CurLoop; @@ -500,10 +501,10 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X,        if (!C1B2_C2B1 || !C1A2_C2A1 ||            !A1B2_A2B1 || !A2B1_A1B2)          return false; -      APInt Xtop = C1B2_C2B1->getValue()->getValue(); -      APInt Xbot = A1B2_A2B1->getValue()->getValue(); -      APInt Ytop = C1A2_C2A1->getValue()->getValue(); -      APInt Ybot = A2B1_A1B2->getValue()->getValue(); +      APInt Xtop = C1B2_C2B1->getAPInt(); +      APInt Xbot = A1B2_A2B1->getAPInt(); +      APInt Ytop = C1A2_C2A1->getAPInt(); +      APInt Ybot = A2B1_A1B2->getAPInt();        DEBUG(dbgs() << "\t\tXtop = " << Xtop << "\n");        DEBUG(dbgs() << "\t\tXbot = " << Xbot << "\n");        DEBUG(dbgs() << "\t\tYtop = " << Ytop << "\n"); @@ -527,7 +528,7 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X,        }        if (const SCEVConstant *CUB =            collectConstantUpperBound(X->getAssociatedLoop(), Prod1->getType())) { -        APInt UpperBound = CUB->getValue()->getValue(); +        APInt UpperBound = CUB->getAPInt();          DEBUG(dbgs() << "\t\tupper bound = " << UpperBound << "\n");          if (Xq.sgt(UpperBound) || Yq.sgt(UpperBound)) {            X->setEmpty(); @@ -630,8 +631,8 @@ static AliasResult underlyingObjectsAlias(AliasAnalysis *AA,                                            const Value *B) {    const Value *AObj = GetUnderlyingObject(A, DL);    const Value *BObj = GetUnderlyingObject(B, DL); -  return AA->alias(AObj, AA->getTypeStoreSize(AObj->getType()), -                   BObj, AA->getTypeStoreSize(BObj->getType())); +  return AA->alias(AObj, DL.getTypeStoreSize(AObj->getType()), +                   BObj, DL.getTypeStoreSize(BObj->getType()));  } @@ -1114,8 +1115,8 @@ bool DependenceAnalysis::strongSIVtest(const SCEV *Coeff,    // Can we compute distance?    if (isa<SCEVConstant>(Delta) && isa<SCEVConstant>(Coeff)) { -    APInt ConstDelta = cast<SCEVConstant>(Delta)->getValue()->getValue(); -    APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getValue()->getValue(); +    APInt ConstDelta = cast<SCEVConstant>(Delta)->getAPInt(); +    APInt ConstCoeff = cast<SCEVConstant>(Coeff)->getAPInt();      APInt Distance  = ConstDelta; // these need to be initialized      APInt Remainder = ConstDelta;      APInt::sdivrem(ConstDelta, ConstCoeff, Distance, Remainder); @@ -1256,11 +1257,9 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,    assert(SE->isKnownPositive(ConstCoeff) && "ConstCoeff should be positive");    // compute SplitIter for use by DependenceAnalysis::getSplitIteration() -  SplitIter = -    SE->getUDivExpr(SE->getSMaxExpr(SE->getConstant(Delta->getType(), 0), -                                    Delta), -                    SE->getMulExpr(SE->getConstant(Delta->getType(), 2), -                                   ConstCoeff)); +  SplitIter = SE->getUDivExpr( +      SE->getSMaxExpr(SE->getZero(Delta->getType()), Delta), +      SE->getMulExpr(SE->getConstant(Delta->getType(), 2), ConstCoeff));    DEBUG(dbgs() << "\t    Split iter = " << *SplitIter << "\n");    const SCEVConstant *ConstDelta = dyn_cast<SCEVConstant>(Delta); @@ -1302,14 +1301,14 @@ bool DependenceAnalysis::weakCrossingSIVtest(const SCEV *Coeff,          return true;        }        Result.DV[Level].Splitable = false; -      Result.DV[Level].Distance = SE->getConstant(Delta->getType(), 0); +      Result.DV[Level].Distance = SE->getZero(Delta->getType());        return false;      }    }    // check that Coeff divides Delta -  APInt APDelta = ConstDelta->getValue()->getValue(); -  APInt APCoeff = ConstCoeff->getValue()->getValue(); +  APInt APDelta = ConstDelta->getAPInt(); +  APInt APCoeff = ConstCoeff->getAPInt();    APInt Distance = APDelta; // these need to be initialzed    APInt Remainder = APDelta;    APInt::sdivrem(APDelta, APCoeff, Distance, Remainder); @@ -1463,10 +1462,10 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,    // find gcd    APInt G, X, Y; -  APInt AM = ConstSrcCoeff->getValue()->getValue(); -  APInt BM = ConstDstCoeff->getValue()->getValue(); +  APInt AM = ConstSrcCoeff->getAPInt(); +  APInt BM = ConstDstCoeff->getAPInt();    unsigned Bits = AM.getBitWidth(); -  if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { +  if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {      // gcd doesn't divide Delta, no dependence      ++ExactSIVindependence;      ++ExactSIVsuccesses; @@ -1481,7 +1480,7 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,    // UM is perhaps unavailable, let's check    if (const SCEVConstant *CUB =        collectConstantUpperBound(CurLoop, Delta->getType())) { -    UM = CUB->getValue()->getValue(); +    UM = CUB->getAPInt();      DEBUG(dbgs() << "\t    UM = " << UM << "\n");      UMvalid = true;    } @@ -1609,8 +1608,8 @@ bool DependenceAnalysis::exactSIVtest(const SCEV *SrcCoeff,  static  bool isRemainderZero(const SCEVConstant *Dividend,                       const SCEVConstant *Divisor) { -  APInt ConstDividend = Dividend->getValue()->getValue(); -  APInt ConstDivisor = Divisor->getValue()->getValue(); +  APInt ConstDividend = Dividend->getAPInt(); +  APInt ConstDivisor = Divisor->getAPInt();    return ConstDividend.srem(ConstDivisor) == 0;  } @@ -1665,8 +1664,8 @@ bool DependenceAnalysis::weakZeroSrcSIVtest(const SCEV *DstCoeff,    Level--;    Result.Consistent = false;    const SCEV *Delta = SE->getMinusSCEV(SrcConst, DstConst); -  NewConstraint.setLine(SE->getConstant(Delta->getType(), 0), -                        DstCoeff, Delta, CurLoop); +  NewConstraint.setLine(SE->getZero(Delta->getType()), DstCoeff, Delta, +                        CurLoop);    DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");    if (isKnownPredicate(CmpInst::ICMP_EQ, SrcConst, DstConst)) {      if (Level < CommonLevels) { @@ -1775,8 +1774,8 @@ bool DependenceAnalysis::weakZeroDstSIVtest(const SCEV *SrcCoeff,    Level--;    Result.Consistent = false;    const SCEV *Delta = SE->getMinusSCEV(DstConst, SrcConst); -  NewConstraint.setLine(SrcCoeff, SE->getConstant(Delta->getType(), 0), -                        Delta, CurLoop); +  NewConstraint.setLine(SrcCoeff, SE->getZero(Delta->getType()), Delta, +                        CurLoop);    DEBUG(dbgs() << "\t    Delta = " << *Delta << "\n");    if (isKnownPredicate(CmpInst::ICMP_EQ, DstConst, SrcConst)) {      if (Level < CommonLevels) { @@ -1867,10 +1866,10 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,    // find gcd    APInt G, X, Y; -  APInt AM = ConstSrcCoeff->getValue()->getValue(); -  APInt BM = ConstDstCoeff->getValue()->getValue(); +  APInt AM = ConstSrcCoeff->getAPInt(); +  APInt BM = ConstDstCoeff->getAPInt();    unsigned Bits = AM.getBitWidth(); -  if (findGCD(Bits, AM, BM, ConstDelta->getValue()->getValue(), G, X, Y)) { +  if (findGCD(Bits, AM, BM, ConstDelta->getAPInt(), G, X, Y)) {      // gcd doesn't divide Delta, no dependence      ++ExactRDIVindependence;      return true; @@ -1884,7 +1883,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,    // SrcUM is perhaps unavailable, let's check    if (const SCEVConstant *UpperBound =        collectConstantUpperBound(SrcLoop, Delta->getType())) { -    SrcUM = UpperBound->getValue()->getValue(); +    SrcUM = UpperBound->getAPInt();      DEBUG(dbgs() << "\t    SrcUM = " << SrcUM << "\n");      SrcUMvalid = true;    } @@ -1894,7 +1893,7 @@ bool DependenceAnalysis::exactRDIVtest(const SCEV *SrcCoeff,    // UM is perhaps unavailable, let's check    if (const SCEVConstant *UpperBound =        collectConstantUpperBound(DstLoop, Delta->getType())) { -    DstUM = UpperBound->getValue()->getValue(); +    DstUM = UpperBound->getAPInt();      DEBUG(dbgs() << "\t    DstUM = " << DstUM << "\n");      DstUMvalid = true;    } @@ -2307,7 +2306,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,        Constant = getConstantPart(Product);      if (!Constant)        return false; -    APInt ConstCoeff = Constant->getValue()->getValue(); +    APInt ConstCoeff = Constant->getAPInt();      RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());      Coefficients = AddRec->getStart();    } @@ -2328,7 +2327,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,        Constant = getConstantPart(Product);      if (!Constant)        return false; -    APInt ConstCoeff = Constant->getValue()->getValue(); +    APInt ConstCoeff = Constant->getAPInt();      RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());      Coefficients = AddRec->getStart();    } @@ -2352,7 +2351,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,          const SCEVConstant *ConstOp = getConstantPart(Product);          if (!ConstOp)            return false; -        APInt ConstOpValue = ConstOp->getValue()->getValue(); +        APInt ConstOpValue = ConstOp->getAPInt();          ExtraGCD = APIntOps::GreatestCommonDivisor(ExtraGCD,                                                     ConstOpValue.abs());        } @@ -2362,7 +2361,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,    }    if (!Constant)      return false; -  APInt ConstDelta = cast<SCEVConstant>(Constant)->getValue()->getValue(); +  APInt ConstDelta = cast<SCEVConstant>(Constant)->getAPInt();    DEBUG(dbgs() << "    ConstDelta = " << ConstDelta << "\n");    if (ConstDelta == 0)      return false; @@ -2410,7 +2409,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,            Constant = getConstantPart(Product);          else            Constant = cast<SCEVConstant>(Coeff); -        APInt ConstCoeff = Constant->getValue()->getValue(); +        APInt ConstCoeff = Constant->getAPInt();          RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());        }        Inner = AddRec->getStart(); @@ -2428,7 +2427,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,            Constant = getConstantPart(Product);          else            Constant = cast<SCEVConstant>(Coeff); -        APInt ConstCoeff = Constant->getValue()->getValue(); +        APInt ConstCoeff = Constant->getAPInt();          RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());        }        Inner = AddRec->getStart(); @@ -2445,7 +2444,7 @@ bool DependenceAnalysis::gcdMIVtest(const SCEV *Src,        // or constant, in which case we give up on this direction.        continue;      } -    APInt ConstCoeff = Constant->getValue()->getValue(); +    APInt ConstCoeff = Constant->getAPInt();      RunningGCD = APIntOps::GreatestCommonDivisor(RunningGCD, ConstCoeff.abs());      DEBUG(dbgs() << "\tRunningGCD = " << RunningGCD << "\n");      if (RunningGCD != 0) { @@ -2728,10 +2727,10 @@ void DependenceAnalysis::findBoundsALL(CoefficientInfo *A,      // If the difference is 0, we won't need to know the number of iterations.      if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].NegPart, B[K].PosPart))        Bound[K].Lower[Dependence::DVEntry::ALL] = -        SE->getConstant(A[K].Coeff->getType(), 0); +          SE->getZero(A[K].Coeff->getType());      if (isKnownPredicate(CmpInst::ICMP_EQ, A[K].PosPart, B[K].NegPart))        Bound[K].Upper[Dependence::DVEntry::ALL] = -        SE->getConstant(A[K].Coeff->getType(), 0); +          SE->getZero(A[K].Coeff->getType());    }  } @@ -2800,9 +2799,8 @@ void DependenceAnalysis::findBoundsLT(CoefficientInfo *A,    Bound[K].Lower[Dependence::DVEntry::LT] = nullptr; // Default value = -infinity.    Bound[K].Upper[Dependence::DVEntry::LT] = nullptr; // Default value = +infinity.    if (Bound[K].Iterations) { -    const SCEV *Iter_1 = -      SE->getMinusSCEV(Bound[K].Iterations, -                       SE->getConstant(Bound[K].Iterations->getType(), 1)); +    const SCEV *Iter_1 = SE->getMinusSCEV( +        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));      const SCEV *NegPart =        getNegativePart(SE->getMinusSCEV(A[K].NegPart, B[K].Coeff));      Bound[K].Lower[Dependence::DVEntry::LT] = @@ -2847,9 +2845,8 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,    Bound[K].Lower[Dependence::DVEntry::GT] = nullptr; // Default value = -infinity.    Bound[K].Upper[Dependence::DVEntry::GT] = nullptr; // Default value = +infinity.    if (Bound[K].Iterations) { -    const SCEV *Iter_1 = -      SE->getMinusSCEV(Bound[K].Iterations, -                       SE->getConstant(Bound[K].Iterations->getType(), 1)); +    const SCEV *Iter_1 = SE->getMinusSCEV( +        Bound[K].Iterations, SE->getOne(Bound[K].Iterations->getType()));      const SCEV *NegPart =        getNegativePart(SE->getMinusSCEV(A[K].Coeff, B[K].PosPart));      Bound[K].Lower[Dependence::DVEntry::GT] = @@ -2874,13 +2871,13 @@ void DependenceAnalysis::findBoundsGT(CoefficientInfo *A,  // X^+ = max(X, 0)  const SCEV *DependenceAnalysis::getPositivePart(const SCEV *X) const { -  return SE->getSMaxExpr(X, SE->getConstant(X->getType(), 0)); +  return SE->getSMaxExpr(X, SE->getZero(X->getType()));  }  // X^- = min(X, 0)  const SCEV *DependenceAnalysis::getNegativePart(const SCEV *X) const { -  return SE->getSMinExpr(X, SE->getConstant(X->getType(), 0)); +  return SE->getSMinExpr(X, SE->getZero(X->getType()));  } @@ -2891,7 +2888,7 @@ DependenceAnalysis::CoefficientInfo *  DependenceAnalysis::collectCoeffInfo(const SCEV *Subscript,                                       bool SrcFlag,                                       const SCEV *&Constant) const { -  const SCEV *Zero = SE->getConstant(Subscript->getType(), 0); +  const SCEV *Zero = SE->getZero(Subscript->getType());    CoefficientInfo *CI = new CoefficientInfo[MaxLevels + 1];    for (unsigned K = 1; K <= MaxLevels; ++K) {      CI[K].Coeff = Zero; @@ -2975,7 +2972,7 @@ const SCEV *DependenceAnalysis::findCoefficient(const SCEV *Expr,                                                  const Loop *TargetLoop)  const {    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Expr);    if (!AddRec) -    return SE->getConstant(Expr->getType(), 0); +    return SE->getZero(Expr->getType());    if (AddRec->getLoop() == TargetLoop)      return AddRec->getStepRecurrence(*SE);    return findCoefficient(AddRec->getStart(), TargetLoop); @@ -3110,8 +3107,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,      const SCEVConstant *Bconst = dyn_cast<SCEVConstant>(B);      const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);      if (!Bconst || !Cconst) return false; -    APInt Beta = Bconst->getValue()->getValue(); -    APInt Charlie = Cconst->getValue()->getValue(); +    APInt Beta = Bconst->getAPInt(); +    APInt Charlie = Cconst->getAPInt();      APInt CdivB = Charlie.sdiv(Beta);      assert(Charlie.srem(Beta) == 0 && "C should be evenly divisible by B");      const SCEV *AP_K = findCoefficient(Dst, CurLoop); @@ -3125,8 +3122,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,      const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);      const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);      if (!Aconst || !Cconst) return false; -    APInt Alpha = Aconst->getValue()->getValue(); -    APInt Charlie = Cconst->getValue()->getValue(); +    APInt Alpha = Aconst->getAPInt(); +    APInt Charlie = Cconst->getAPInt();      APInt CdivA = Charlie.sdiv(Alpha);      assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");      const SCEV *A_K = findCoefficient(Src, CurLoop); @@ -3139,8 +3136,8 @@ bool DependenceAnalysis::propagateLine(const SCEV *&Src,      const SCEVConstant *Aconst = dyn_cast<SCEVConstant>(A);      const SCEVConstant *Cconst = dyn_cast<SCEVConstant>(C);      if (!Aconst || !Cconst) return false; -    APInt Alpha = Aconst->getValue()->getValue(); -    APInt Charlie = Cconst->getValue()->getValue(); +    APInt Alpha = Aconst->getAPInt(); +    APInt Charlie = Cconst->getAPInt();      APInt CdivA = Charlie.sdiv(Alpha);      assert(Charlie.srem(Alpha) == 0 && "C should be evenly divisible by A");      const SCEV *A_K = findCoefficient(Src, CurLoop); @@ -3244,20 +3241,36 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level,  /// source and destination array references are recurrences on a nested loop,  /// this function flattens the nested recurrences into separate recurrences  /// for each loop level. -bool DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, -                                        const SCEV *DstSCEV, -                                        SmallVectorImpl<Subscript> &Pair, -                                        const SCEV *ElementSize) { +bool DependenceAnalysis::tryDelinearize(Instruction *Src, +                                        Instruction *Dst, +                                        SmallVectorImpl<Subscript> &Pair) +{ +  Value *SrcPtr = getPointerOperand(Src); +  Value *DstPtr = getPointerOperand(Dst); + +  Loop *SrcLoop = LI->getLoopFor(Src->getParent()); +  Loop *DstLoop = LI->getLoopFor(Dst->getParent()); + +  // Below code mimics the code in Delinearization.cpp +  const SCEV *SrcAccessFn = +    SE->getSCEVAtScope(SrcPtr, SrcLoop); +  const SCEV *DstAccessFn = +    SE->getSCEVAtScope(DstPtr, DstLoop); +    const SCEVUnknown *SrcBase = -      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcSCEV)); +      dyn_cast<SCEVUnknown>(SE->getPointerBase(SrcAccessFn));    const SCEVUnknown *DstBase = -      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstSCEV)); +      dyn_cast<SCEVUnknown>(SE->getPointerBase(DstAccessFn));    if (!SrcBase || !DstBase || SrcBase != DstBase)      return false; -  SrcSCEV = SE->getMinusSCEV(SrcSCEV, SrcBase); -  DstSCEV = SE->getMinusSCEV(DstSCEV, DstBase); +  const SCEV *ElementSize = SE->getElementSize(Src); +  if (ElementSize != SE->getElementSize(Dst)) +    return false; + +  const SCEV *SrcSCEV = SE->getMinusSCEV(SrcAccessFn, SrcBase); +  const SCEV *DstSCEV = SE->getMinusSCEV(DstAccessFn, DstBase);    const SCEVAddRecExpr *SrcAR = dyn_cast<SCEVAddRecExpr>(SrcSCEV);    const SCEVAddRecExpr *DstAR = dyn_cast<SCEVAddRecExpr>(DstSCEV); @@ -3330,7 +3343,6 @@ static void dumpSmallBitVector(SmallBitVector &BV) {  }  #endif -  // depends -  // Returns NULL if there is no dependence.  // Otherwise, return a Dependence with as many details as possible. @@ -3425,10 +3437,11 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,      Pair[0].Dst = DstSCEV;    } -  if (Delinearize && Pairs == 1 && CommonLevels > 1 && -      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { -    DEBUG(dbgs() << "    delinerized GEP\n"); -    Pairs = Pair.size(); +  if (Delinearize && CommonLevels > 1) { +    if (tryDelinearize(Src, Dst, Pair)) { +      DEBUG(dbgs() << "    delinerized GEP\n"); +      Pairs = Pair.size(); +    }    }    for (unsigned P = 0; P < Pairs; ++P) { @@ -3746,9 +3759,7 @@ DependenceAnalysis::depends(Instruction *Src, Instruction *Dst,        return nullptr;    } -  auto Final = make_unique<FullDependence>(Result); -  Result.DV = nullptr; -  return std::move(Final); +  return make_unique<FullDependence>(std::move(Result));  } @@ -3852,10 +3863,11 @@ const  SCEV *DependenceAnalysis::getSplitIteration(const Dependence &Dep,      Pair[0].Dst = DstSCEV;    } -  if (Delinearize && Pairs == 1 && CommonLevels > 1 && -      tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair, SE->getElementSize(Src))) { -    DEBUG(dbgs() << "    delinerized GEP\n"); -    Pairs = Pair.size(); +  if (Delinearize && CommonLevels > 1) { +    if (tryDelinearize(Src, Dst, Pair)) { +      DEBUG(dbgs() << "    delinerized GEP\n"); +      Pairs = Pair.size(); +    }    }    for (unsigned P = 0; P < Pairs; ++P) { diff --git a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp index e5ee2959c15d..5ae6d74130a7 100644 --- a/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DivergenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===- DivergenceAnalysis.cpp ------ Divergence Analysis ------------------===// +//===- DivergenceAnalysis.cpp --------- Divergence Analysis Implementation -==//  //  //                     The LLVM Compiler Infrastructure  // @@ -7,8 +7,8 @@  //  //===----------------------------------------------------------------------===//  // -// This file defines divergence analysis which determines whether a branch in a -// GPU program is divergent. It can help branch optimizations such as jump +// This file implements divergence analysis which determines whether a branch +// in a GPU program is divergent.It can help branch optimizations such as jump  // threading and loop unswitching to make better decisions.  //  // GPU programs typically use the SIMD execution model, where multiple threads @@ -61,75 +61,31 @@  // 2. memory as black box. It conservatively considers values loaded from  //    generic or local address as divergent. This can be improved by leveraging  //    pointer analysis. +//  //===----------------------------------------------------------------------===// -#include <vector> -#include "llvm/IR/Dominators.h" -#include "llvm/ADT/DenseSet.h" +#include "llvm/Analysis/DivergenceAnalysis.h"  #include "llvm/Analysis/Passes.h"  #include "llvm/Analysis/PostDominators.h"  #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/Function.h" +#include "llvm/IR/Dominators.h"  #include "llvm/IR/InstIterator.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h"  #include "llvm/IR/Value.h" -#include "llvm/Pass.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h"  #include "llvm/Transforms/Scalar.h" +#include <vector>  using namespace llvm; -#define DEBUG_TYPE "divergence" - -namespace { -class DivergenceAnalysis : public FunctionPass { -public: -  static char ID; - -  DivergenceAnalysis() : FunctionPass(ID) { -    initializeDivergenceAnalysisPass(*PassRegistry::getPassRegistry()); -  } - -  void getAnalysisUsage(AnalysisUsage &AU) const override { -    AU.addRequired<DominatorTreeWrapperPass>(); -    AU.addRequired<PostDominatorTree>(); -    AU.setPreservesAll(); -  } - -  bool runOnFunction(Function &F) override; - -  // Print all divergent branches in the function. -  void print(raw_ostream &OS, const Module *) const override; - -  // Returns true if V is divergent. -  bool isDivergent(const Value *V) const { return DivergentValues.count(V); } -  // Returns true if V is uniform/non-divergent. -  bool isUniform(const Value *V) const { return !isDivergent(V); } - -private: -  // Stores all divergent values. -  DenseSet<const Value *> DivergentValues; -}; -} // End of anonymous namespace - -// Register this pass. -char DivergenceAnalysis::ID = 0; -INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", -                      false, true) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) -INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", -                    false, true) -  namespace {  class DivergencePropagator {  public: -  DivergencePropagator(Function &F, TargetTransformInfo &TTI, -                       DominatorTree &DT, PostDominatorTree &PDT, -                       DenseSet<const Value *> &DV) +  DivergencePropagator(Function &F, TargetTransformInfo &TTI, DominatorTree &DT, +                       PostDominatorTree &PDT, DenseSet<const Value *> &DV)        : F(F), TTI(TTI), DT(DT), PDT(PDT), DV(DV) {}    void populateWithSourcesOfDivergence();    void propagate(); @@ -140,7 +96,7 @@ private:    // A helper function that explores sync dependents of TI.    void exploreSyncDependency(TerminatorInst *TI);    // Computes the influence region from Start to End. This region includes all -  // basic blocks on any path from Start to End. +  // basic blocks on any simple path from Start to End.    void computeInfluenceRegion(BasicBlock *Start, BasicBlock *End,                                DenseSet<BasicBlock *> &InfluenceRegion);    // Finds all users of I that are outside the influence region, and add these @@ -153,13 +109,13 @@ private:    DominatorTree &DT;    PostDominatorTree &PDT;    std::vector<Value *> Worklist; // Stack for DFS. -  DenseSet<const Value *> &DV; // Stores all divergent values. +  DenseSet<const Value *> &DV;   // Stores all divergent values.  };  void DivergencePropagator::populateWithSourcesOfDivergence() {    Worklist.clear();    DV.clear(); -  for (auto &I : inst_range(F)) { +  for (auto &I : instructions(F)) {      if (TTI.isSourceOfDivergence(&I)) {        Worklist.push_back(&I);        DV.insert(&I); @@ -191,8 +147,8 @@ void DivergencePropagator::exploreSyncDependency(TerminatorInst *TI) {    for (auto I = IPostDom->begin(); isa<PHINode>(I); ++I) {      // A PHINode is uniform if it returns the same value no matter which path is      // taken. -    if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(I).second) -      Worklist.push_back(I); +    if (!cast<PHINode>(I)->hasConstantValue() && DV.insert(&*I).second) +      Worklist.push_back(&*I);    }    // Propagation rule 2: if a value defined in a loop is used outside, the user @@ -242,21 +198,33 @@ void DivergencePropagator::findUsersOutsideInfluenceRegion(    }  } +// A helper function for computeInfluenceRegion that adds successors of "ThisBB" +// to the influence region. +static void +addSuccessorsToInfluenceRegion(BasicBlock *ThisBB, BasicBlock *End, +                               DenseSet<BasicBlock *> &InfluenceRegion, +                               std::vector<BasicBlock *> &InfluenceStack) { +  for (BasicBlock *Succ : successors(ThisBB)) { +    if (Succ != End && InfluenceRegion.insert(Succ).second) +      InfluenceStack.push_back(Succ); +  } +} +  void DivergencePropagator::computeInfluenceRegion(      BasicBlock *Start, BasicBlock *End,      DenseSet<BasicBlock *> &InfluenceRegion) {    assert(PDT.properlyDominates(End, Start) &&           "End does not properly dominate Start"); + +  // The influence region starts from the end of "Start" to the beginning of +  // "End". Therefore, "Start" should not be in the region unless "Start" is in +  // a loop that doesn't contain "End".    std::vector<BasicBlock *> InfluenceStack; -  InfluenceStack.push_back(Start); -  InfluenceRegion.insert(Start); +  addSuccessorsToInfluenceRegion(Start, End, InfluenceRegion, InfluenceStack);    while (!InfluenceStack.empty()) {      BasicBlock *BB = InfluenceStack.back();      InfluenceStack.pop_back(); -    for (BasicBlock *Succ : successors(BB)) { -      if (End != Succ && InfluenceRegion.insert(Succ).second) -        InfluenceStack.push_back(Succ); -    } +    addSuccessorsToInfluenceRegion(BB, End, InfluenceRegion, InfluenceStack);    }  } @@ -286,10 +254,25 @@ void DivergencePropagator::propagate() {  } /// end namespace anonymous +// Register this pass. +char DivergenceAnalysis::ID = 0; +INITIALIZE_PASS_BEGIN(DivergenceAnalysis, "divergence", "Divergence Analysis", +                      false, true) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTree) +INITIALIZE_PASS_END(DivergenceAnalysis, "divergence", "Divergence Analysis", +                    false, true) +  FunctionPass *llvm::createDivergenceAnalysisPass() {    return new DivergenceAnalysis();  } +void DivergenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.addRequired<DominatorTreeWrapperPass>(); +  AU.addRequired<PostDominatorTree>(); +  AU.setPreservesAll(); +} +  bool DivergenceAnalysis::runOnFunction(Function &F) {    auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();    if (TTIWP == nullptr) @@ -329,8 +312,8 @@ void DivergenceAnalysis::print(raw_ostream &OS, const Module *) const {      if (DivergentValues.count(&Arg))        OS << "DIVERGENT:  " << Arg << "\n";    } -  // Iterate instructions using inst_range to ensure a deterministic order. -  for (auto &I : inst_range(F)) { +  // Iterate instructions using instructions() to ensure a deterministic order. +  for (auto &I : instructions(F)) {      if (DivergentValues.count(&I))        OS << "DIVERGENT:" << I << "\n";    } diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp new file mode 100644 index 000000000000..01be8b38fadd --- /dev/null +++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp @@ -0,0 +1,106 @@ +//===- EHPersonalities.cpp - Compute EH-related information ---------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/EHPersonalities.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +/// See if the given exception handling personality function is one that we +/// understand.  If so, return a description of it; otherwise return Unknown. +EHPersonality llvm::classifyEHPersonality(const Value *Pers) { +  const Function *F = +      Pers ? dyn_cast<Function>(Pers->stripPointerCasts()) : nullptr; +  if (!F) +    return EHPersonality::Unknown; +  return StringSwitch<EHPersonality>(F->getName()) +    .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) +    .Case("__gxx_personality_v0",  EHPersonality::GNU_CXX) +    .Case("__gcc_personality_v0",  EHPersonality::GNU_C) +    .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) +    .Case("_except_handler3",      EHPersonality::MSVC_X86SEH) +    .Case("_except_handler4",      EHPersonality::MSVC_X86SEH) +    .Case("__C_specific_handler",  EHPersonality::MSVC_Win64SEH) +    .Case("__CxxFrameHandler3",    EHPersonality::MSVC_CXX) +    .Case("ProcessCLRException",   EHPersonality::CoreCLR) +    .Default(EHPersonality::Unknown); +} + +bool llvm::canSimplifyInvokeNoUnwind(const Function *F) { +  EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn()); +  // We can't simplify any invokes to nounwind functions if the personality +  // function wants to catch asynch exceptions.  The nounwind attribute only +  // implies that the function does not throw synchronous exceptions. +  return !isAsynchronousEHPersonality(Personality); +} + +DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) { +  SmallVector<std::pair<BasicBlock *, BasicBlock *>, 16> Worklist; +  BasicBlock *EntryBlock = &F.getEntryBlock(); +  DenseMap<BasicBlock *, ColorVector> BlockColors; + +  // Build up the color map, which maps each block to its set of 'colors'. +  // For any block B the "colors" of B are the set of funclets F (possibly +  // including a root "funclet" representing the main function) such that +  // F will need to directly contain B or a copy of B (where the term "directly +  // contain" is used to distinguish from being "transitively contained" in +  // a nested funclet). +  // +  // Note: Despite not being a funclet in the truest sense, a catchswitch is +  // considered to belong to its own funclet for the purposes of coloring. + +  DEBUG_WITH_TYPE("winehprepare-coloring", dbgs() << "\nColoring funclets for " +                                                  << F.getName() << "\n"); + +  Worklist.push_back({EntryBlock, EntryBlock}); + +  while (!Worklist.empty()) { +    BasicBlock *Visiting; +    BasicBlock *Color; +    std::tie(Visiting, Color) = Worklist.pop_back_val(); +    DEBUG_WITH_TYPE("winehprepare-coloring", +                    dbgs() << "Visiting " << Visiting->getName() << ", " +                           << Color->getName() << "\n"); +    Instruction *VisitingHead = Visiting->getFirstNonPHI(); +    if (VisitingHead->isEHPad()) { +      // Mark this funclet head as a member of itself. +      Color = Visiting; +    } +    // Note that this is a member of the given color. +    ColorVector &Colors = BlockColors[Visiting]; +    if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end()) +      Colors.push_back(Color); +    else +      continue; + +    DEBUG_WITH_TYPE("winehprepare-coloring", +                    dbgs() << "  Assigned color \'" << Color->getName() +                           << "\' to block \'" << Visiting->getName() +                           << "\'.\n"); + +    BasicBlock *SuccColor = Color; +    TerminatorInst *Terminator = Visiting->getTerminator(); +    if (auto *CatchRet = dyn_cast<CatchReturnInst>(Terminator)) { +      Value *ParentPad = CatchRet->getParentPad(); +      if (isa<ConstantTokenNone>(ParentPad)) +        SuccColor = EntryBlock; +      else +        SuccColor = cast<Instruction>(ParentPad)->getParent(); +    } + +    for (BasicBlock *Succ : successors(Visiting)) +      Worklist.push_back({Succ, SuccColor}); +  } +  return BlockColors; +} diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp new file mode 100644 index 000000000000..ab2263ae374e --- /dev/null +++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp @@ -0,0 +1,1002 @@ +//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This simple pass provides alias and mod/ref information for global values +// that do not have their address taken, and keeps track of whether functions +// read or write memory (are "pure").  For this simple (but very common) case, +// we can provide pretty accurate and useful information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +using namespace llvm; + +#define DEBUG_TYPE "globalsmodref-aa" + +STATISTIC(NumNonAddrTakenGlobalVars, +          "Number of global vars without address taken"); +STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); +STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); +STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); +STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); + +// An option to enable unsafe alias results from the GlobalsModRef analysis. +// When enabled, GlobalsModRef will provide no-alias results which in extremely +// rare cases may not be conservatively correct. In particular, in the face of +// transforms which cause assymetry between how effective GetUnderlyingObject +// is for two pointers, it may produce incorrect results. +// +// These unsafe results have been returned by GMR for many years without +// causing significant issues in the wild and so we provide a mechanism to +// re-enable them for users of LLVM that have a particular performance +// sensitivity and no known issues. The option also makes it easy to evaluate +// the performance impact of these results. +static cl::opt<bool> EnableUnsafeGlobalsModRefAliasResults( +    "enable-unsafe-globalsmodref-alias-results", cl::init(false), cl::Hidden); + +/// The mod/ref information collected for a particular function. +/// +/// We collect information about mod/ref behavior of a function here, both in +/// general and as pertains to specific globals. We only have this detailed +/// information when we know *something* useful about the behavior. If we +/// saturate to fully general mod/ref, we remove the info for the function. +class GlobalsAAResult::FunctionInfo { +  typedef SmallDenseMap<const GlobalValue *, ModRefInfo, 16> GlobalInfoMapType; + +  /// Build a wrapper struct that has 8-byte alignment. All heap allocations +  /// should provide this much alignment at least, but this makes it clear we +  /// specifically rely on this amount of alignment. +  struct LLVM_ALIGNAS(8) AlignedMap { +    AlignedMap() {} +    AlignedMap(const AlignedMap &Arg) : Map(Arg.Map) {} +    GlobalInfoMapType Map; +  }; + +  /// Pointer traits for our aligned map. +  struct AlignedMapPointerTraits { +    static inline void *getAsVoidPointer(AlignedMap *P) { return P; } +    static inline AlignedMap *getFromVoidPointer(void *P) { +      return (AlignedMap *)P; +    } +    enum { NumLowBitsAvailable = 3 }; +    static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable), +                  "AlignedMap insufficiently aligned to have enough low bits."); +  }; + +  /// The bit that flags that this function may read any global. This is +  /// chosen to mix together with ModRefInfo bits. +  enum { MayReadAnyGlobal = 4 }; + +  /// Checks to document the invariants of the bit packing here. +  static_assert((MayReadAnyGlobal & MRI_ModRef) == 0, +                "ModRef and the MayReadAnyGlobal flag bits overlap."); +  static_assert(((MayReadAnyGlobal | MRI_ModRef) >> +                 AlignedMapPointerTraits::NumLowBitsAvailable) == 0, +                "Insufficient low bits to store our flag and ModRef info."); + +public: +  FunctionInfo() : Info() {} +  ~FunctionInfo() { +    delete Info.getPointer(); +  } +  // Spell out the copy ond move constructors and assignment operators to get +  // deep copy semantics and correct move semantics in the face of the +  // pointer-int pair. +  FunctionInfo(const FunctionInfo &Arg) +      : Info(nullptr, Arg.Info.getInt()) { +    if (const auto *ArgPtr = Arg.Info.getPointer()) +      Info.setPointer(new AlignedMap(*ArgPtr)); +  } +  FunctionInfo(FunctionInfo &&Arg) +      : Info(Arg.Info.getPointer(), Arg.Info.getInt()) { +    Arg.Info.setPointerAndInt(nullptr, 0); +  } +  FunctionInfo &operator=(const FunctionInfo &RHS) { +    delete Info.getPointer(); +    Info.setPointerAndInt(nullptr, RHS.Info.getInt()); +    if (const auto *RHSPtr = RHS.Info.getPointer()) +      Info.setPointer(new AlignedMap(*RHSPtr)); +    return *this; +  } +  FunctionInfo &operator=(FunctionInfo &&RHS) { +    delete Info.getPointer(); +    Info.setPointerAndInt(RHS.Info.getPointer(), RHS.Info.getInt()); +    RHS.Info.setPointerAndInt(nullptr, 0); +    return *this; +  } + +  /// Returns the \c ModRefInfo info for this function. +  ModRefInfo getModRefInfo() const { +    return ModRefInfo(Info.getInt() & MRI_ModRef); +  } + +  /// Adds new \c ModRefInfo for this function to its state. +  void addModRefInfo(ModRefInfo NewMRI) { +    Info.setInt(Info.getInt() | NewMRI); +  } + +  /// Returns whether this function may read any global variable, and we don't +  /// know which global. +  bool mayReadAnyGlobal() const { return Info.getInt() & MayReadAnyGlobal; } + +  /// Sets this function as potentially reading from any global. +  void setMayReadAnyGlobal() { Info.setInt(Info.getInt() | MayReadAnyGlobal); } + +  /// Returns the \c ModRefInfo info for this function w.r.t. a particular +  /// global, which may be more precise than the general information above. +  ModRefInfo getModRefInfoForGlobal(const GlobalValue &GV) const { +    ModRefInfo GlobalMRI = mayReadAnyGlobal() ? MRI_Ref : MRI_NoModRef; +    if (AlignedMap *P = Info.getPointer()) { +      auto I = P->Map.find(&GV); +      if (I != P->Map.end()) +        GlobalMRI = ModRefInfo(GlobalMRI | I->second); +    } +    return GlobalMRI; +  } + +  /// Add mod/ref info from another function into ours, saturating towards +  /// MRI_ModRef. +  void addFunctionInfo(const FunctionInfo &FI) { +    addModRefInfo(FI.getModRefInfo()); + +    if (FI.mayReadAnyGlobal()) +      setMayReadAnyGlobal(); + +    if (AlignedMap *P = FI.Info.getPointer()) +      for (const auto &G : P->Map) +        addModRefInfoForGlobal(*G.first, G.second); +  } + +  void addModRefInfoForGlobal(const GlobalValue &GV, ModRefInfo NewMRI) { +    AlignedMap *P = Info.getPointer(); +    if (!P) { +      P = new AlignedMap(); +      Info.setPointer(P); +    } +    auto &GlobalMRI = P->Map[&GV]; +    GlobalMRI = ModRefInfo(GlobalMRI | NewMRI); +  } + +  /// Clear a global's ModRef info. Should be used when a global is being +  /// deleted. +  void eraseModRefInfoForGlobal(const GlobalValue &GV) { +    if (AlignedMap *P = Info.getPointer()) +      P->Map.erase(&GV); +  } + +private: +  /// All of the information is encoded into a single pointer, with a three bit +  /// integer in the low three bits. The high bit provides a flag for when this +  /// function may read any global. The low two bits are the ModRefInfo. And +  /// the pointer, when non-null, points to a map from GlobalValue to +  /// ModRefInfo specific to that GlobalValue. +  PointerIntPair<AlignedMap *, 3, unsigned, AlignedMapPointerTraits> Info; +}; + +void GlobalsAAResult::DeletionCallbackHandle::deleted() { +  Value *V = getValPtr(); +  if (auto *F = dyn_cast<Function>(V)) +    GAR->FunctionInfos.erase(F); + +  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { +    if (GAR->NonAddressTakenGlobals.erase(GV)) { +      // This global might be an indirect global.  If so, remove it and +      // remove any AllocRelatedValues for it. +      if (GAR->IndirectGlobals.erase(GV)) { +        // Remove any entries in AllocsForIndirectGlobals for this global. +        for (auto I = GAR->AllocsForIndirectGlobals.begin(), +                  E = GAR->AllocsForIndirectGlobals.end(); +             I != E; ++I) +          if (I->second == GV) +            GAR->AllocsForIndirectGlobals.erase(I); +      } + +      // Scan the function info we have collected and remove this global +      // from all of them. +      for (auto &FIPair : GAR->FunctionInfos) +        FIPair.second.eraseModRefInfoForGlobal(*GV); +    } +  } + +  // If this is an allocation related to an indirect global, remove it. +  GAR->AllocsForIndirectGlobals.erase(V); + +  // And clear out the handle. +  setValPtr(nullptr); +  GAR->Handles.erase(I); +  // This object is now destroyed! +} + +FunctionModRefBehavior GlobalsAAResult::getModRefBehavior(const Function *F) { +  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; + +  if (FunctionInfo *FI = getFunctionInfo(F)) { +    if (FI->getModRefInfo() == MRI_NoModRef) +      Min = FMRB_DoesNotAccessMemory; +    else if ((FI->getModRefInfo() & MRI_Mod) == 0) +      Min = FMRB_OnlyReadsMemory; +  } + +  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min); +} + +FunctionModRefBehavior +GlobalsAAResult::getModRefBehavior(ImmutableCallSite CS) { +  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior; + +  if (const Function *F = CS.getCalledFunction()) +    if (FunctionInfo *FI = getFunctionInfo(F)) { +      if (FI->getModRefInfo() == MRI_NoModRef) +        Min = FMRB_DoesNotAccessMemory; +      else if ((FI->getModRefInfo() & MRI_Mod) == 0) +        Min = FMRB_OnlyReadsMemory; +    } + +  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min); +} + +/// Returns the function info for the function, or null if we don't have +/// anything useful to say about it. +GlobalsAAResult::FunctionInfo * +GlobalsAAResult::getFunctionInfo(const Function *F) { +  auto I = FunctionInfos.find(F); +  if (I != FunctionInfos.end()) +    return &I->second; +  return nullptr; +} + +/// AnalyzeGlobals - Scan through the users of all of the internal +/// GlobalValue's in the program.  If none of them have their "address taken" +/// (really, their address passed to something nontrivial), record this fact, +/// and record the functions that they are used directly in. +void GlobalsAAResult::AnalyzeGlobals(Module &M) { +  SmallPtrSet<Function *, 64> TrackedFunctions; +  for (Function &F : M) +    if (F.hasLocalLinkage()) +      if (!AnalyzeUsesOfPointer(&F)) { +        // Remember that we are tracking this global. +        NonAddressTakenGlobals.insert(&F); +        TrackedFunctions.insert(&F); +        Handles.emplace_front(*this, &F); +        Handles.front().I = Handles.begin(); +        ++NumNonAddrTakenFunctions; +      } + +  SmallPtrSet<Function *, 64> Readers, Writers; +  for (GlobalVariable &GV : M.globals()) +    if (GV.hasLocalLinkage()) { +      if (!AnalyzeUsesOfPointer(&GV, &Readers, +                                GV.isConstant() ? nullptr : &Writers)) { +        // Remember that we are tracking this global, and the mod/ref fns +        NonAddressTakenGlobals.insert(&GV); +        Handles.emplace_front(*this, &GV); +        Handles.front().I = Handles.begin(); + +        for (Function *Reader : Readers) { +          if (TrackedFunctions.insert(Reader).second) { +            Handles.emplace_front(*this, Reader); +            Handles.front().I = Handles.begin(); +          } +          FunctionInfos[Reader].addModRefInfoForGlobal(GV, MRI_Ref); +        } + +        if (!GV.isConstant()) // No need to keep track of writers to constants +          for (Function *Writer : Writers) { +            if (TrackedFunctions.insert(Writer).second) { +              Handles.emplace_front(*this, Writer); +              Handles.front().I = Handles.begin(); +            } +            FunctionInfos[Writer].addModRefInfoForGlobal(GV, MRI_Mod); +          } +        ++NumNonAddrTakenGlobalVars; + +        // If this global holds a pointer type, see if it is an indirect global. +        if (GV.getType()->getElementType()->isPointerTy() && +            AnalyzeIndirectGlobalMemory(&GV)) +          ++NumIndirectGlobalVars; +      } +      Readers.clear(); +      Writers.clear(); +    } +} + +/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. +/// If this is used by anything complex (i.e., the address escapes), return +/// true.  Also, while we are at it, keep track of those functions that read and +/// write to the value. +/// +/// If OkayStoreDest is non-null, stores into this global are allowed. +bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V, +                                           SmallPtrSetImpl<Function *> *Readers, +                                           SmallPtrSetImpl<Function *> *Writers, +                                           GlobalValue *OkayStoreDest) { +  if (!V->getType()->isPointerTy()) +    return true; + +  for (Use &U : V->uses()) { +    User *I = U.getUser(); +    if (LoadInst *LI = dyn_cast<LoadInst>(I)) { +      if (Readers) +        Readers->insert(LI->getParent()->getParent()); +    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { +      if (V == SI->getOperand(1)) { +        if (Writers) +          Writers->insert(SI->getParent()->getParent()); +      } else if (SI->getOperand(1) != OkayStoreDest) { +        return true; // Storing the pointer +      } +    } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { +      if (AnalyzeUsesOfPointer(I, Readers, Writers)) +        return true; +    } else if (Operator::getOpcode(I) == Instruction::BitCast) { +      if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) +        return true; +    } else if (auto CS = CallSite(I)) { +      // Make sure that this is just the function being called, not that it is +      // passing into the function. +      if (CS.isDataOperand(&U)) { +        // Detect calls to free. +        if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) { +          if (Writers) +            Writers->insert(CS->getParent()->getParent()); +        } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) { +          Function *ParentF = CS->getParent()->getParent(); +          // A nocapture argument may be read from or written to, but does not +          // escape unless the call can somehow recurse. +          // +          // nocapture "indicates that the callee does not make any copies of +          // the pointer that outlive itself". Therefore if we directly or +          // indirectly recurse, we must treat the pointer as escaping. +          if (FunctionToSCCMap[ParentF] == +              FunctionToSCCMap[CS.getCalledFunction()]) +            return true; +          if (Readers) +            Readers->insert(ParentF); +          if (Writers) +            Writers->insert(ParentF); +        } else { +          return true; // Argument of an unknown call. +        } +        // If the Callee is not ReadNone, it may read the global, +        // and if it is not ReadOnly, it may also write to it. +        Function *CalleeF = CS.getCalledFunction(); +        if (!CalleeF->doesNotAccessMemory()) { +          if (Readers) +            Readers->insert(CalleeF); +          if (Writers && !CalleeF->onlyReadsMemory()) +            Writers->insert(CalleeF); +        } +      } +    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { +      if (!isa<ConstantPointerNull>(ICI->getOperand(1))) +        return true; // Allow comparison against null. +    } else { +      return true; +    } +  } + +  return false; +} + +/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable +/// which holds a pointer type.  See if the global always points to non-aliased +/// heap memory: that is, all initializers of the globals are allocations, and +/// those allocations have no use other than initialization of the global. +/// Further, all loads out of GV must directly use the memory, not store the +/// pointer somewhere.  If this is true, we consider the memory pointed to by +/// GV to be owned by GV and can disambiguate other pointers from it. +bool GlobalsAAResult::AnalyzeIndirectGlobalMemory(GlobalVariable *GV) { +  // Keep track of values related to the allocation of the memory, f.e. the +  // value produced by the malloc call and any casts. +  std::vector<Value *> AllocRelatedValues; + +  // If the initializer is a valid pointer, bail. +  if (Constant *C = GV->getInitializer()) +    if (!C->isNullValue()) +      return false; +     +  // Walk the user list of the global.  If we find anything other than a direct +  // load or store, bail out. +  for (User *U : GV->users()) { +    if (LoadInst *LI = dyn_cast<LoadInst>(U)) { +      // The pointer loaded from the global can only be used in simple ways: +      // we allow addressing of it and loading storing to it.  We do *not* allow +      // storing the loaded pointer somewhere else or passing to a function. +      if (AnalyzeUsesOfPointer(LI)) +        return false; // Loaded pointer escapes. +      // TODO: Could try some IP mod/ref of the loaded pointer. +    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { +      // Storing the global itself. +      if (SI->getOperand(0) == GV) +        return false; + +      // If storing the null pointer, ignore it. +      if (isa<ConstantPointerNull>(SI->getOperand(0))) +        continue; + +      // Check the value being stored. +      Value *Ptr = GetUnderlyingObject(SI->getOperand(0), +                                       GV->getParent()->getDataLayout()); + +      if (!isAllocLikeFn(Ptr, &TLI)) +        return false; // Too hard to analyze. + +      // Analyze all uses of the allocation.  If any of them are used in a +      // non-simple way (e.g. stored to another global) bail out. +      if (AnalyzeUsesOfPointer(Ptr, /*Readers*/ nullptr, /*Writers*/ nullptr, +                               GV)) +        return false; // Loaded pointer escapes. + +      // Remember that this allocation is related to the indirect global. +      AllocRelatedValues.push_back(Ptr); +    } else { +      // Something complex, bail out. +      return false; +    } +  } + +  // Okay, this is an indirect global.  Remember all of the allocations for +  // this global in AllocsForIndirectGlobals. +  while (!AllocRelatedValues.empty()) { +    AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; +    Handles.emplace_front(*this, AllocRelatedValues.back()); +    Handles.front().I = Handles.begin(); +    AllocRelatedValues.pop_back(); +  } +  IndirectGlobals.insert(GV); +  Handles.emplace_front(*this, GV); +  Handles.front().I = Handles.begin(); +  return true; +} + +void GlobalsAAResult::CollectSCCMembership(CallGraph &CG) {   +  // We do a bottom-up SCC traversal of the call graph.  In other words, we +  // visit all callees before callers (leaf-first). +  unsigned SCCID = 0; +  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { +    const std::vector<CallGraphNode *> &SCC = *I; +    assert(!SCC.empty() && "SCC with no functions?"); + +    for (auto *CGN : SCC) +      if (Function *F = CGN->getFunction()) +        FunctionToSCCMap[F] = SCCID; +    ++SCCID; +  } +} + +/// AnalyzeCallGraph - At this point, we know the functions where globals are +/// immediately stored to and read from.  Propagate this information up the call +/// graph to all callers and compute the mod/ref info for all memory for each +/// function. +void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) { +  // We do a bottom-up SCC traversal of the call graph.  In other words, we +  // visit all callees before callers (leaf-first). +  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { +    const std::vector<CallGraphNode *> &SCC = *I; +    assert(!SCC.empty() && "SCC with no functions?"); + +    if (!SCC[0]->getFunction() || SCC[0]->getFunction()->mayBeOverridden()) { +      // Calls externally or is weak - can't say anything useful. Remove any existing +      // function records (may have been created when scanning globals). +      for (auto *Node : SCC) +        FunctionInfos.erase(Node->getFunction()); +      continue; +    } + +    FunctionInfo &FI = FunctionInfos[SCC[0]->getFunction()]; +    bool KnowNothing = false; + +    // Collect the mod/ref properties due to called functions.  We only compute +    // one mod-ref set. +    for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { +      Function *F = SCC[i]->getFunction(); +      if (!F) { +        KnowNothing = true; +        break; +      } + +      if (F->isDeclaration()) { +        // Try to get mod/ref behaviour from function attributes. +        if (F->doesNotAccessMemory() || F->onlyAccessesInaccessibleMemory()) { +          // Can't do better than that! +        } else if (F->onlyReadsMemory()) { +          FI.addModRefInfo(MRI_Ref); +          if (!F->isIntrinsic()) +            // This function might call back into the module and read a global - +            // consider every global as possibly being read by this function. +            FI.setMayReadAnyGlobal(); +        } else if (F->onlyAccessesArgMemory() ||  +                   F->onlyAccessesInaccessibleMemOrArgMem()) { +          // This function may only access (read/write) memory pointed to by its +          // arguments. If this pointer is to a global, this escaping use of the +          // pointer is captured in AnalyzeUsesOfPointer(). +          FI.addModRefInfo(MRI_ModRef); +        } else { +          FI.addModRefInfo(MRI_ModRef); +          // Can't say anything useful unless it's an intrinsic - they don't +          // read or write global variables of the kind considered here. +          KnowNothing = !F->isIntrinsic(); +        } +        continue; +      } + +      for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); +           CI != E && !KnowNothing; ++CI) +        if (Function *Callee = CI->second->getFunction()) { +          if (FunctionInfo *CalleeFI = getFunctionInfo(Callee)) { +            // Propagate function effect up. +            FI.addFunctionInfo(*CalleeFI); +          } else { +            // Can't say anything about it.  However, if it is inside our SCC, +            // then nothing needs to be done. +            CallGraphNode *CalleeNode = CG[Callee]; +            if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) +              KnowNothing = true; +          } +        } else { +          KnowNothing = true; +        } +    } + +    // If we can't say anything useful about this SCC, remove all SCC functions +    // from the FunctionInfos map. +    if (KnowNothing) { +      for (auto *Node : SCC) +        FunctionInfos.erase(Node->getFunction()); +      continue; +    } + +    // Scan the function bodies for explicit loads or stores. +    for (auto *Node : SCC) { +      if (FI.getModRefInfo() == MRI_ModRef) +        break; // The mod/ref lattice saturates here. +      for (Instruction &I : instructions(Node->getFunction())) { +        if (FI.getModRefInfo() == MRI_ModRef) +          break; // The mod/ref lattice saturates here. + +        // We handle calls specially because the graph-relevant aspects are +        // handled above. +        if (auto CS = CallSite(&I)) { +          if (isAllocationFn(&I, &TLI) || isFreeCall(&I, &TLI)) { +            // FIXME: It is completely unclear why this is necessary and not +            // handled by the above graph code. +            FI.addModRefInfo(MRI_ModRef); +          } else if (Function *Callee = CS.getCalledFunction()) { +            // The callgraph doesn't include intrinsic calls. +            if (Callee->isIntrinsic()) { +              FunctionModRefBehavior Behaviour = +                  AAResultBase::getModRefBehavior(Callee); +              FI.addModRefInfo(ModRefInfo(Behaviour & MRI_ModRef)); +            } +          } +          continue; +        } + +        // All non-call instructions we use the primary predicates for whether +        // thay read or write memory. +        if (I.mayReadFromMemory()) +          FI.addModRefInfo(MRI_Ref); +        if (I.mayWriteToMemory()) +          FI.addModRefInfo(MRI_Mod); +      } +    } + +    if ((FI.getModRefInfo() & MRI_Mod) == 0) +      ++NumReadMemFunctions; +    if (FI.getModRefInfo() == MRI_NoModRef) +      ++NumNoMemFunctions; + +    // Finally, now that we know the full effect on this SCC, clone the +    // information to each function in the SCC. +    // FI is a reference into FunctionInfos, so copy it now so that it doesn't +    // get invalidated if DenseMap decides to re-hash. +    FunctionInfo CachedFI = FI; +    for (unsigned i = 1, e = SCC.size(); i != e; ++i) +      FunctionInfos[SCC[i]->getFunction()] = CachedFI; +  } +} + +// GV is a non-escaping global. V is a pointer address that has been loaded from. +// If we can prove that V must escape, we can conclude that a load from V cannot +// alias GV. +static bool isNonEscapingGlobalNoAliasWithLoad(const GlobalValue *GV, +                                               const Value *V, +                                               int &Depth, +                                               const DataLayout &DL) { +  SmallPtrSet<const Value *, 8> Visited; +  SmallVector<const Value *, 8> Inputs; +  Visited.insert(V); +  Inputs.push_back(V); +  do { +    const Value *Input = Inputs.pop_back_val(); +     +    if (isa<GlobalValue>(Input) || isa<Argument>(Input) || isa<CallInst>(Input) || +        isa<InvokeInst>(Input)) +      // Arguments to functions or returns from functions are inherently +      // escaping, so we can immediately classify those as not aliasing any +      // non-addr-taken globals. +      // +      // (Transitive) loads from a global are also safe - if this aliased +      // another global, its address would escape, so no alias. +      continue; + +    // Recurse through a limited number of selects, loads and PHIs. This is an +    // arbitrary depth of 4, lower numbers could be used to fix compile time +    // issues if needed, but this is generally expected to be only be important +    // for small depths. +    if (++Depth > 4) +      return false; + +    if (auto *LI = dyn_cast<LoadInst>(Input)) { +      Inputs.push_back(GetUnderlyingObject(LI->getPointerOperand(), DL)); +      continue; +    }   +    if (auto *SI = dyn_cast<SelectInst>(Input)) { +      const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL); +      const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL); +      if (Visited.insert(LHS).second) +        Inputs.push_back(LHS); +      if (Visited.insert(RHS).second) +        Inputs.push_back(RHS); +      continue; +    } +    if (auto *PN = dyn_cast<PHINode>(Input)) { +      for (const Value *Op : PN->incoming_values()) { +        Op = GetUnderlyingObject(Op, DL); +        if (Visited.insert(Op).second) +          Inputs.push_back(Op); +      } +      continue; +    } +     +    return false; +  } while (!Inputs.empty()); + +  // All inputs were known to be no-alias. +  return true; +} + +// There are particular cases where we can conclude no-alias between +// a non-addr-taken global and some other underlying object. Specifically, +// a non-addr-taken global is known to not be escaped from any function. It is +// also incorrect for a transformation to introduce an escape of a global in +// a way that is observable when it was not there previously. One function +// being transformed to introduce an escape which could possibly be observed +// (via loading from a global or the return value for example) within another +// function is never safe. If the observation is made through non-atomic +// operations on different threads, it is a data-race and UB. If the +// observation is well defined, by being observed the transformation would have +// changed program behavior by introducing the observed escape, making it an +// invalid transform. +// +// This property does require that transformations which *temporarily* escape +// a global that was not previously escaped, prior to restoring it, cannot rely +// on the results of GMR::alias. This seems a reasonable restriction, although +// currently there is no way to enforce it. There is also no realistic +// optimization pass that would make this mistake. The closest example is +// a transformation pass which does reg2mem of SSA values but stores them into +// global variables temporarily before restoring the global variable's value. +// This could be useful to expose "benign" races for example. However, it seems +// reasonable to require that a pass which introduces escapes of global +// variables in this way to either not trust AA results while the escape is +// active, or to be forced to operate as a module pass that cannot co-exist +// with an alias analysis such as GMR. +bool GlobalsAAResult::isNonEscapingGlobalNoAlias(const GlobalValue *GV, +                                                 const Value *V) { +  // In order to know that the underlying object cannot alias the +  // non-addr-taken global, we must know that it would have to be an escape. +  // Thus if the underlying object is a function argument, a load from +  // a global, or the return of a function, it cannot alias. We can also +  // recurse through PHI nodes and select nodes provided all of their inputs +  // resolve to one of these known-escaping roots. +  SmallPtrSet<const Value *, 8> Visited; +  SmallVector<const Value *, 8> Inputs; +  Visited.insert(V); +  Inputs.push_back(V); +  int Depth = 0; +  do { +    const Value *Input = Inputs.pop_back_val(); + +    if (auto *InputGV = dyn_cast<GlobalValue>(Input)) { +      // If one input is the very global we're querying against, then we can't +      // conclude no-alias. +      if (InputGV == GV) +        return false; + +      // Distinct GlobalVariables never alias, unless overriden or zero-sized. +      // FIXME: The condition can be refined, but be conservative for now. +      auto *GVar = dyn_cast<GlobalVariable>(GV); +      auto *InputGVar = dyn_cast<GlobalVariable>(InputGV); +      if (GVar && InputGVar && +          !GVar->isDeclaration() && !InputGVar->isDeclaration() && +          !GVar->mayBeOverridden() && !InputGVar->mayBeOverridden()) { +        Type *GVType = GVar->getInitializer()->getType(); +        Type *InputGVType = InputGVar->getInitializer()->getType(); +        if (GVType->isSized() && InputGVType->isSized() && +            (DL.getTypeAllocSize(GVType) > 0) && +            (DL.getTypeAllocSize(InputGVType) > 0)) +          continue; +      } + +      // Conservatively return false, even though we could be smarter +      // (e.g. look through GlobalAliases). +      return false; +    } + +    if (isa<Argument>(Input) || isa<CallInst>(Input) || +        isa<InvokeInst>(Input)) { +      // Arguments to functions or returns from functions are inherently +      // escaping, so we can immediately classify those as not aliasing any +      // non-addr-taken globals. +      continue; +    } +     +    // Recurse through a limited number of selects, loads and PHIs. This is an +    // arbitrary depth of 4, lower numbers could be used to fix compile time +    // issues if needed, but this is generally expected to be only be important +    // for small depths. +    if (++Depth > 4) +      return false; + +    if (auto *LI = dyn_cast<LoadInst>(Input)) { +      // A pointer loaded from a global would have been captured, and we know +      // that the global is non-escaping, so no alias. +      const Value *Ptr = GetUnderlyingObject(LI->getPointerOperand(), DL); +      if (isNonEscapingGlobalNoAliasWithLoad(GV, Ptr, Depth, DL)) +        // The load does not alias with GV. +        continue; +      // Otherwise, a load could come from anywhere, so bail. +      return false; +    } +    if (auto *SI = dyn_cast<SelectInst>(Input)) { +      const Value *LHS = GetUnderlyingObject(SI->getTrueValue(), DL); +      const Value *RHS = GetUnderlyingObject(SI->getFalseValue(), DL); +      if (Visited.insert(LHS).second) +        Inputs.push_back(LHS); +      if (Visited.insert(RHS).second) +        Inputs.push_back(RHS); +      continue; +    } +    if (auto *PN = dyn_cast<PHINode>(Input)) { +      for (const Value *Op : PN->incoming_values()) { +        Op = GetUnderlyingObject(Op, DL); +        if (Visited.insert(Op).second) +          Inputs.push_back(Op); +      } +      continue; +    } + +    // FIXME: It would be good to handle other obvious no-alias cases here, but +    // it isn't clear how to do so reasonbly without building a small version +    // of BasicAA into this code. We could recurse into AAResultBase::alias +    // here but that seems likely to go poorly as we're inside the +    // implementation of such a query. Until then, just conservatievly retun +    // false. +    return false; +  } while (!Inputs.empty()); + +  // If all the inputs to V were definitively no-alias, then V is no-alias. +  return true; +} + +/// alias - If one of the pointers is to a global that we are tracking, and the +/// other is some random pointer, we know there cannot be an alias, because the +/// address of the global isn't taken. +AliasResult GlobalsAAResult::alias(const MemoryLocation &LocA, +                                   const MemoryLocation &LocB) { +  // Get the base object these pointers point to. +  const Value *UV1 = GetUnderlyingObject(LocA.Ptr, DL); +  const Value *UV2 = GetUnderlyingObject(LocB.Ptr, DL); + +  // If either of the underlying values is a global, they may be non-addr-taken +  // globals, which we can answer queries about. +  const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1); +  const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2); +  if (GV1 || GV2) { +    // If the global's address is taken, pretend we don't know it's a pointer to +    // the global. +    if (GV1 && !NonAddressTakenGlobals.count(GV1)) +      GV1 = nullptr; +    if (GV2 && !NonAddressTakenGlobals.count(GV2)) +      GV2 = nullptr; + +    // If the two pointers are derived from two different non-addr-taken +    // globals we know these can't alias. +    if (GV1 && GV2 && GV1 != GV2) +      return NoAlias; + +    // If one is and the other isn't, it isn't strictly safe but we can fake +    // this result if necessary for performance. This does not appear to be +    // a common problem in practice. +    if (EnableUnsafeGlobalsModRefAliasResults) +      if ((GV1 || GV2) && GV1 != GV2) +        return NoAlias; + +    // Check for a special case where a non-escaping global can be used to +    // conclude no-alias. +    if ((GV1 || GV2) && GV1 != GV2) { +      const GlobalValue *GV = GV1 ? GV1 : GV2; +      const Value *UV = GV1 ? UV2 : UV1; +      if (isNonEscapingGlobalNoAlias(GV, UV)) +        return NoAlias; +    } + +    // Otherwise if they are both derived from the same addr-taken global, we +    // can't know the two accesses don't overlap. +  } + +  // These pointers may be based on the memory owned by an indirect global.  If +  // so, we may be able to handle this.  First check to see if the base pointer +  // is a direct load from an indirect global. +  GV1 = GV2 = nullptr; +  if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) +    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) +      if (IndirectGlobals.count(GV)) +        GV1 = GV; +  if (const LoadInst *LI = dyn_cast<LoadInst>(UV2)) +    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) +      if (IndirectGlobals.count(GV)) +        GV2 = GV; + +  // These pointers may also be from an allocation for the indirect global.  If +  // so, also handle them. +  if (!GV1) +    GV1 = AllocsForIndirectGlobals.lookup(UV1); +  if (!GV2) +    GV2 = AllocsForIndirectGlobals.lookup(UV2); + +  // Now that we know whether the two pointers are related to indirect globals, +  // use this to disambiguate the pointers. If the pointers are based on +  // different indirect globals they cannot alias. +  if (GV1 && GV2 && GV1 != GV2) +    return NoAlias; + +  // If one is based on an indirect global and the other isn't, it isn't +  // strictly safe but we can fake this result if necessary for performance. +  // This does not appear to be a common problem in practice. +  if (EnableUnsafeGlobalsModRefAliasResults) +    if ((GV1 || GV2) && GV1 != GV2) +      return NoAlias; + +  return AAResultBase::alias(LocA, LocB); +} + +ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS, +                                                     const GlobalValue *GV) { +  if (CS.doesNotAccessMemory()) +    return MRI_NoModRef; +  ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef; +   +  // Iterate through all the arguments to the called function. If any argument +  // is based on GV, return the conservative result. +  for (auto &A : CS.args()) { +    SmallVector<Value*, 4> Objects; +    GetUnderlyingObjects(A, Objects, DL); +     +    // All objects must be identified. +    if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject)) +      return ConservativeResult; + +    if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end()) +      return ConservativeResult; +  } + +  // We identified all objects in the argument list, and none of them were GV. +  return MRI_NoModRef; +} + +ModRefInfo GlobalsAAResult::getModRefInfo(ImmutableCallSite CS, +                                          const MemoryLocation &Loc) { +  unsigned Known = MRI_ModRef; + +  // If we are asking for mod/ref info of a direct call with a pointer to a +  // global we are tracking, return information if we have it. +  if (const GlobalValue *GV = +          dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL))) +    if (GV->hasLocalLinkage()) +      if (const Function *F = CS.getCalledFunction()) +        if (NonAddressTakenGlobals.count(GV)) +          if (const FunctionInfo *FI = getFunctionInfo(F)) +            Known = FI->getModRefInfoForGlobal(*GV) | +              getModRefInfoForArgument(CS, GV); + +  if (Known == MRI_NoModRef) +    return MRI_NoModRef; // No need to query other mod/ref analyses +  return ModRefInfo(Known & AAResultBase::getModRefInfo(CS, Loc)); +} + +GlobalsAAResult::GlobalsAAResult(const DataLayout &DL, +                                 const TargetLibraryInfo &TLI) +    : AAResultBase(TLI), DL(DL) {} + +GlobalsAAResult::GlobalsAAResult(GlobalsAAResult &&Arg) +    : AAResultBase(std::move(Arg)), DL(Arg.DL), +      NonAddressTakenGlobals(std::move(Arg.NonAddressTakenGlobals)), +      IndirectGlobals(std::move(Arg.IndirectGlobals)), +      AllocsForIndirectGlobals(std::move(Arg.AllocsForIndirectGlobals)), +      FunctionInfos(std::move(Arg.FunctionInfos)), +      Handles(std::move(Arg.Handles)) { +  // Update the parent for each DeletionCallbackHandle. +  for (auto &H : Handles) { +    assert(H.GAR == &Arg); +    H.GAR = this; +  } +} + +/*static*/ GlobalsAAResult +GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI, +                               CallGraph &CG) { +  GlobalsAAResult Result(M.getDataLayout(), TLI); + +  // Discover which functions aren't recursive, to feed into AnalyzeGlobals. +  Result.CollectSCCMembership(CG); + +  // Find non-addr taken globals. +  Result.AnalyzeGlobals(M); + +  // Propagate on CG. +  Result.AnalyzeCallGraph(CG, M); + +  return Result; +} + +GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> *AM) { +  return GlobalsAAResult::analyzeModule(M, +                                        AM->getResult<TargetLibraryAnalysis>(M), +                                        AM->getResult<CallGraphAnalysis>(M)); +} + +char GlobalsAA::PassID; + +char GlobalsAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(GlobalsAAWrapperPass, "globals-aa", +                      "Globals Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(GlobalsAAWrapperPass, "globals-aa", +                    "Globals Alias Analysis", false, true) + +ModulePass *llvm::createGlobalsAAWrapperPass() { +  return new GlobalsAAWrapperPass(); +} + +GlobalsAAWrapperPass::GlobalsAAWrapperPass() : ModulePass(ID) { +  initializeGlobalsAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool GlobalsAAWrapperPass::runOnModule(Module &M) { +  Result.reset(new GlobalsAAResult(GlobalsAAResult::analyzeModule( +      M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), +      getAnalysis<CallGraphWrapperPass>().getCallGraph()))); +  return false; +} + +bool GlobalsAAWrapperPass::doFinalization(Module &M) { +  Result.reset(); +  return false; +} + +void GlobalsAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequired<CallGraphWrapperPass>(); +  AU.addRequired<TargetLibraryInfoWrapperPass>(); +} diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp deleted file mode 100644 index 28fb49c89019..000000000000 --- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp +++ /dev/null @@ -1,609 +0,0 @@ -//===- GlobalsModRef.cpp - Simple Mod/Ref Analysis for Globals ------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This simple pass provides alias and mod/ref information for global values -// that do not have their address taken, and keeps track of whether functions -// read or write memory (are "pure").  For this simple (but very common) case, -// we can provide pretty accurate and useful information. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/SCCIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/CallGraph.h" -#include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include <set> -using namespace llvm; - -#define DEBUG_TYPE "globalsmodref-aa" - -STATISTIC(NumNonAddrTakenGlobalVars, -          "Number of global vars without address taken"); -STATISTIC(NumNonAddrTakenFunctions,"Number of functions without address taken"); -STATISTIC(NumNoMemFunctions, "Number of functions that do not access memory"); -STATISTIC(NumReadMemFunctions, "Number of functions that only read memory"); -STATISTIC(NumIndirectGlobalVars, "Number of indirect global objects"); - -namespace { -/// FunctionRecord - One instance of this structure is stored for every -/// function in the program.  Later, the entries for these functions are -/// removed if the function is found to call an external function (in which -/// case we know nothing about it. -struct FunctionRecord { -  /// GlobalInfo - Maintain mod/ref info for all of the globals without -  /// addresses taken that are read or written (transitively) by this -  /// function. -  std::map<const GlobalValue *, unsigned> GlobalInfo; - -  /// MayReadAnyGlobal - May read global variables, but it is not known which. -  bool MayReadAnyGlobal; - -  unsigned getInfoForGlobal(const GlobalValue *GV) const { -    unsigned Effect = MayReadAnyGlobal ? AliasAnalysis::Ref : 0; -    std::map<const GlobalValue *, unsigned>::const_iterator I = -        GlobalInfo.find(GV); -    if (I != GlobalInfo.end()) -      Effect |= I->second; -    return Effect; -  } - -  /// FunctionEffect - Capture whether or not this function reads or writes to -  /// ANY memory.  If not, we can do a lot of aggressive analysis on it. -  unsigned FunctionEffect; - -  FunctionRecord() : MayReadAnyGlobal(false), FunctionEffect(0) {} -}; - -/// GlobalsModRef - The actual analysis pass. -class GlobalsModRef : public ModulePass, public AliasAnalysis { -  /// NonAddressTakenGlobals - The globals that do not have their addresses -  /// taken. -  std::set<const GlobalValue *> NonAddressTakenGlobals; - -  /// IndirectGlobals - The memory pointed to by this global is known to be -  /// 'owned' by the global. -  std::set<const GlobalValue *> IndirectGlobals; - -  /// AllocsForIndirectGlobals - If an instruction allocates memory for an -  /// indirect global, this map indicates which one. -  std::map<const Value *, const GlobalValue *> AllocsForIndirectGlobals; - -  /// FunctionInfo - For each function, keep track of what globals are -  /// modified or read. -  std::map<const Function *, FunctionRecord> FunctionInfo; - -public: -  static char ID; -  GlobalsModRef() : ModulePass(ID) { -    initializeGlobalsModRefPass(*PassRegistry::getPassRegistry()); -  } - -  bool runOnModule(Module &M) override { -    InitializeAliasAnalysis(this, &M.getDataLayout()); - -    // Find non-addr taken globals. -    AnalyzeGlobals(M); - -    // Propagate on CG. -    AnalyzeCallGraph(getAnalysis<CallGraphWrapperPass>().getCallGraph(), M); -    return false; -  } - -  void getAnalysisUsage(AnalysisUsage &AU) const override { -    AliasAnalysis::getAnalysisUsage(AU); -    AU.addRequired<CallGraphWrapperPass>(); -    AU.setPreservesAll(); // Does not transform code -  } - -  //------------------------------------------------ -  // Implement the AliasAnalysis API -  // -  AliasResult alias(const MemoryLocation &LocA, -                    const MemoryLocation &LocB) override; -  ModRefResult getModRefInfo(ImmutableCallSite CS, -                             const MemoryLocation &Loc) override; -  ModRefResult getModRefInfo(ImmutableCallSite CS1, -                             ImmutableCallSite CS2) override { -    return AliasAnalysis::getModRefInfo(CS1, CS2); -  } - -  /// getModRefBehavior - Return the behavior of the specified function if -  /// called from the specified call site.  The call site may be null in which -  /// case the most generic behavior of this function should be returned. -  ModRefBehavior getModRefBehavior(const Function *F) override { -    ModRefBehavior Min = UnknownModRefBehavior; - -    if (FunctionRecord *FR = getFunctionInfo(F)) { -      if (FR->FunctionEffect == 0) -        Min = DoesNotAccessMemory; -      else if ((FR->FunctionEffect & Mod) == 0) -        Min = OnlyReadsMemory; -    } - -    return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); -  } - -  /// getModRefBehavior - Return the behavior of the specified function if -  /// called from the specified call site.  The call site may be null in which -  /// case the most generic behavior of this function should be returned. -  ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { -    ModRefBehavior Min = UnknownModRefBehavior; - -    if (const Function *F = CS.getCalledFunction()) -      if (FunctionRecord *FR = getFunctionInfo(F)) { -        if (FR->FunctionEffect == 0) -          Min = DoesNotAccessMemory; -        else if ((FR->FunctionEffect & Mod) == 0) -          Min = OnlyReadsMemory; -      } - -    return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); -  } - -  void deleteValue(Value *V) override; -  void addEscapingUse(Use &U) override; - -  /// getAdjustedAnalysisPointer - This method is used when a pass implements -  /// an analysis interface through multiple inheritance.  If needed, it -  /// should override this to adjust the this pointer as needed for the -  /// specified pass info. -  void *getAdjustedAnalysisPointer(AnalysisID PI) override { -    if (PI == &AliasAnalysis::ID) -      return (AliasAnalysis *)this; -    return this; -  } - -private: -  /// getFunctionInfo - Return the function info for the function, or null if -  /// we don't have anything useful to say about it. -  FunctionRecord *getFunctionInfo(const Function *F) { -    std::map<const Function *, FunctionRecord>::iterator I = -        FunctionInfo.find(F); -    if (I != FunctionInfo.end()) -      return &I->second; -    return nullptr; -  } - -  void AnalyzeGlobals(Module &M); -  void AnalyzeCallGraph(CallGraph &CG, Module &M); -  bool AnalyzeUsesOfPointer(Value *V, std::vector<Function *> &Readers, -                            std::vector<Function *> &Writers, -                            GlobalValue *OkayStoreDest = nullptr); -  bool AnalyzeIndirectGlobalMemory(GlobalValue *GV); -}; -} - -char GlobalsModRef::ID = 0; -INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", -                         "Simple mod/ref analysis for globals", false, true, -                         false) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) -INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", -                       "Simple mod/ref analysis for globals", false, true, -                       false) - -Pass *llvm::createGlobalsModRefPass() { return new GlobalsModRef(); } - -/// AnalyzeGlobals - Scan through the users of all of the internal -/// GlobalValue's in the program.  If none of them have their "address taken" -/// (really, their address passed to something nontrivial), record this fact, -/// and record the functions that they are used directly in. -void GlobalsModRef::AnalyzeGlobals(Module &M) { -  std::vector<Function *> Readers, Writers; -  for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) -    if (I->hasLocalLinkage()) { -      if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { -        // Remember that we are tracking this global. -        NonAddressTakenGlobals.insert(I); -        ++NumNonAddrTakenFunctions; -      } -      Readers.clear(); -      Writers.clear(); -    } - -  for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; -       ++I) -    if (I->hasLocalLinkage()) { -      if (!AnalyzeUsesOfPointer(I, Readers, Writers)) { -        // Remember that we are tracking this global, and the mod/ref fns -        NonAddressTakenGlobals.insert(I); - -        for (unsigned i = 0, e = Readers.size(); i != e; ++i) -          FunctionInfo[Readers[i]].GlobalInfo[I] |= Ref; - -        if (!I->isConstant()) // No need to keep track of writers to constants -          for (unsigned i = 0, e = Writers.size(); i != e; ++i) -            FunctionInfo[Writers[i]].GlobalInfo[I] |= Mod; -        ++NumNonAddrTakenGlobalVars; - -        // If this global holds a pointer type, see if it is an indirect global. -        if (I->getType()->getElementType()->isPointerTy() && -            AnalyzeIndirectGlobalMemory(I)) -          ++NumIndirectGlobalVars; -      } -      Readers.clear(); -      Writers.clear(); -    } -} - -/// AnalyzeUsesOfPointer - Look at all of the users of the specified pointer. -/// If this is used by anything complex (i.e., the address escapes), return -/// true.  Also, while we are at it, keep track of those functions that read and -/// write to the value. -/// -/// If OkayStoreDest is non-null, stores into this global are allowed. -bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V, -                                         std::vector<Function *> &Readers, -                                         std::vector<Function *> &Writers, -                                         GlobalValue *OkayStoreDest) { -  if (!V->getType()->isPointerTy()) -    return true; - -  for (Use &U : V->uses()) { -    User *I = U.getUser(); -    if (LoadInst *LI = dyn_cast<LoadInst>(I)) { -      Readers.push_back(LI->getParent()->getParent()); -    } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) { -      if (V == SI->getOperand(1)) { -        Writers.push_back(SI->getParent()->getParent()); -      } else if (SI->getOperand(1) != OkayStoreDest) { -        return true; // Storing the pointer -      } -    } else if (Operator::getOpcode(I) == Instruction::GetElementPtr) { -      if (AnalyzeUsesOfPointer(I, Readers, Writers)) -        return true; -    } else if (Operator::getOpcode(I) == Instruction::BitCast) { -      if (AnalyzeUsesOfPointer(I, Readers, Writers, OkayStoreDest)) -        return true; -    } else if (auto CS = CallSite(I)) { -      // Make sure that this is just the function being called, not that it is -      // passing into the function. -      if (!CS.isCallee(&U)) { -        // Detect calls to free. -        if (isFreeCall(I, TLI)) -          Writers.push_back(CS->getParent()->getParent()); -        else -          return true; // Argument of an unknown call. -      } -    } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) { -      if (!isa<ConstantPointerNull>(ICI->getOperand(1))) -        return true; // Allow comparison against null. -    } else { -      return true; -    } -  } - -  return false; -} - -/// AnalyzeIndirectGlobalMemory - We found an non-address-taken global variable -/// which holds a pointer type.  See if the global always points to non-aliased -/// heap memory: that is, all initializers of the globals are allocations, and -/// those allocations have no use other than initialization of the global. -/// Further, all loads out of GV must directly use the memory, not store the -/// pointer somewhere.  If this is true, we consider the memory pointed to by -/// GV to be owned by GV and can disambiguate other pointers from it. -bool GlobalsModRef::AnalyzeIndirectGlobalMemory(GlobalValue *GV) { -  // Keep track of values related to the allocation of the memory, f.e. the -  // value produced by the malloc call and any casts. -  std::vector<Value *> AllocRelatedValues; - -  // Walk the user list of the global.  If we find anything other than a direct -  // load or store, bail out. -  for (User *U : GV->users()) { -    if (LoadInst *LI = dyn_cast<LoadInst>(U)) { -      // The pointer loaded from the global can only be used in simple ways: -      // we allow addressing of it and loading storing to it.  We do *not* allow -      // storing the loaded pointer somewhere else or passing to a function. -      std::vector<Function *> ReadersWriters; -      if (AnalyzeUsesOfPointer(LI, ReadersWriters, ReadersWriters)) -        return false; // Loaded pointer escapes. -      // TODO: Could try some IP mod/ref of the loaded pointer. -    } else if (StoreInst *SI = dyn_cast<StoreInst>(U)) { -      // Storing the global itself. -      if (SI->getOperand(0) == GV) -        return false; - -      // If storing the null pointer, ignore it. -      if (isa<ConstantPointerNull>(SI->getOperand(0))) -        continue; - -      // Check the value being stored. -      Value *Ptr = GetUnderlyingObject(SI->getOperand(0), -                                       GV->getParent()->getDataLayout()); - -      if (!isAllocLikeFn(Ptr, TLI)) -        return false; // Too hard to analyze. - -      // Analyze all uses of the allocation.  If any of them are used in a -      // non-simple way (e.g. stored to another global) bail out. -      std::vector<Function *> ReadersWriters; -      if (AnalyzeUsesOfPointer(Ptr, ReadersWriters, ReadersWriters, GV)) -        return false; // Loaded pointer escapes. - -      // Remember that this allocation is related to the indirect global. -      AllocRelatedValues.push_back(Ptr); -    } else { -      // Something complex, bail out. -      return false; -    } -  } - -  // Okay, this is an indirect global.  Remember all of the allocations for -  // this global in AllocsForIndirectGlobals. -  while (!AllocRelatedValues.empty()) { -    AllocsForIndirectGlobals[AllocRelatedValues.back()] = GV; -    AllocRelatedValues.pop_back(); -  } -  IndirectGlobals.insert(GV); -  return true; -} - -/// AnalyzeCallGraph - At this point, we know the functions where globals are -/// immediately stored to and read from.  Propagate this information up the call -/// graph to all callers and compute the mod/ref info for all memory for each -/// function. -void GlobalsModRef::AnalyzeCallGraph(CallGraph &CG, Module &M) { -  // We do a bottom-up SCC traversal of the call graph.  In other words, we -  // visit all callees before callers (leaf-first). -  for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) { -    const std::vector<CallGraphNode *> &SCC = *I; -    assert(!SCC.empty() && "SCC with no functions?"); - -    if (!SCC[0]->getFunction()) { -      // Calls externally - can't say anything useful.  Remove any existing -      // function records (may have been created when scanning globals). -      for (unsigned i = 0, e = SCC.size(); i != e; ++i) -        FunctionInfo.erase(SCC[i]->getFunction()); -      continue; -    } - -    FunctionRecord &FR = FunctionInfo[SCC[0]->getFunction()]; - -    bool KnowNothing = false; -    unsigned FunctionEffect = 0; - -    // Collect the mod/ref properties due to called functions.  We only compute -    // one mod-ref set. -    for (unsigned i = 0, e = SCC.size(); i != e && !KnowNothing; ++i) { -      Function *F = SCC[i]->getFunction(); -      if (!F) { -        KnowNothing = true; -        break; -      } - -      if (F->isDeclaration()) { -        // Try to get mod/ref behaviour from function attributes. -        if (F->doesNotAccessMemory()) { -          // Can't do better than that! -        } else if (F->onlyReadsMemory()) { -          FunctionEffect |= Ref; -          if (!F->isIntrinsic()) -            // This function might call back into the module and read a global - -            // consider every global as possibly being read by this function. -            FR.MayReadAnyGlobal = true; -        } else { -          FunctionEffect |= ModRef; -          // Can't say anything useful unless it's an intrinsic - they don't -          // read or write global variables of the kind considered here. -          KnowNothing = !F->isIntrinsic(); -        } -        continue; -      } - -      for (CallGraphNode::iterator CI = SCC[i]->begin(), E = SCC[i]->end(); -           CI != E && !KnowNothing; ++CI) -        if (Function *Callee = CI->second->getFunction()) { -          if (FunctionRecord *CalleeFR = getFunctionInfo(Callee)) { -            // Propagate function effect up. -            FunctionEffect |= CalleeFR->FunctionEffect; - -            // Incorporate callee's effects on globals into our info. -            for (const auto &G : CalleeFR->GlobalInfo) -              FR.GlobalInfo[G.first] |= G.second; -            FR.MayReadAnyGlobal |= CalleeFR->MayReadAnyGlobal; -          } else { -            // Can't say anything about it.  However, if it is inside our SCC, -            // then nothing needs to be done. -            CallGraphNode *CalleeNode = CG[Callee]; -            if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end()) -              KnowNothing = true; -          } -        } else { -          KnowNothing = true; -        } -    } - -    // If we can't say anything useful about this SCC, remove all SCC functions -    // from the FunctionInfo map. -    if (KnowNothing) { -      for (unsigned i = 0, e = SCC.size(); i != e; ++i) -        FunctionInfo.erase(SCC[i]->getFunction()); -      continue; -    } - -    // Scan the function bodies for explicit loads or stores. -    for (auto *Node : SCC) { -      if (FunctionEffect == ModRef) -        break; // The mod/ref lattice saturates here. -      for (Instruction &I : inst_range(Node->getFunction())) { -        if (FunctionEffect == ModRef) -          break; // The mod/ref lattice saturates here. - -        // We handle calls specially because the graph-relevant aspects are -        // handled above. -        if (auto CS = CallSite(&I)) { -          if (isAllocationFn(&I, TLI) || isFreeCall(&I, TLI)) { -            // FIXME: It is completely unclear why this is necessary and not -            // handled by the above graph code. -            FunctionEffect |= ModRef; -          } else if (Function *Callee = CS.getCalledFunction()) { -            // The callgraph doesn't include intrinsic calls. -            if (Callee->isIntrinsic()) { -              ModRefBehavior Behaviour = -                  AliasAnalysis::getModRefBehavior(Callee); -              FunctionEffect |= (Behaviour & ModRef); -            } -          } -          continue; -        } - -        // All non-call instructions we use the primary predicates for whether -        // thay read or write memory. -        if (I.mayReadFromMemory()) -          FunctionEffect |= Ref; -        if (I.mayWriteToMemory()) -          FunctionEffect |= Mod; -      } -    } - -    if ((FunctionEffect & Mod) == 0) -      ++NumReadMemFunctions; -    if (FunctionEffect == 0) -      ++NumNoMemFunctions; -    FR.FunctionEffect = FunctionEffect; - -    // Finally, now that we know the full effect on this SCC, clone the -    // information to each function in the SCC. -    for (unsigned i = 1, e = SCC.size(); i != e; ++i) -      FunctionInfo[SCC[i]->getFunction()] = FR; -  } -} - -/// alias - If one of the pointers is to a global that we are tracking, and the -/// other is some random pointer, we know there cannot be an alias, because the -/// address of the global isn't taken. -AliasResult GlobalsModRef::alias(const MemoryLocation &LocA, -                                 const MemoryLocation &LocB) { -  // Get the base object these pointers point to. -  const Value *UV1 = GetUnderlyingObject(LocA.Ptr, *DL); -  const Value *UV2 = GetUnderlyingObject(LocB.Ptr, *DL); - -  // If either of the underlying values is a global, they may be non-addr-taken -  // globals, which we can answer queries about. -  const GlobalValue *GV1 = dyn_cast<GlobalValue>(UV1); -  const GlobalValue *GV2 = dyn_cast<GlobalValue>(UV2); -  if (GV1 || GV2) { -    // If the global's address is taken, pretend we don't know it's a pointer to -    // the global. -    if (GV1 && !NonAddressTakenGlobals.count(GV1)) -      GV1 = nullptr; -    if (GV2 && !NonAddressTakenGlobals.count(GV2)) -      GV2 = nullptr; - -    // If the two pointers are derived from two different non-addr-taken -    // globals, or if one is and the other isn't, we know these can't alias. -    if ((GV1 || GV2) && GV1 != GV2) -      return NoAlias; - -    // Otherwise if they are both derived from the same addr-taken global, we -    // can't know the two accesses don't overlap. -  } - -  // These pointers may be based on the memory owned by an indirect global.  If -  // so, we may be able to handle this.  First check to see if the base pointer -  // is a direct load from an indirect global. -  GV1 = GV2 = nullptr; -  if (const LoadInst *LI = dyn_cast<LoadInst>(UV1)) -    if (GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) -      if (IndirectGlobals.count(GV)) -        GV1 = GV; -  if (const LoadInst *LI = dyn_cast<LoadInst>(UV2)) -    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(LI->getOperand(0))) -      if (IndirectGlobals.count(GV)) -        GV2 = GV; - -  // These pointers may also be from an allocation for the indirect global.  If -  // so, also handle them. -  if (AllocsForIndirectGlobals.count(UV1)) -    GV1 = AllocsForIndirectGlobals[UV1]; -  if (AllocsForIndirectGlobals.count(UV2)) -    GV2 = AllocsForIndirectGlobals[UV2]; - -  // Now that we know whether the two pointers are related to indirect globals, -  // use this to disambiguate the pointers.  If either pointer is based on an -  // indirect global and if they are not both based on the same indirect global, -  // they cannot alias. -  if ((GV1 || GV2) && GV1 != GV2) -    return NoAlias; - -  return AliasAnalysis::alias(LocA, LocB); -} - -AliasAnalysis::ModRefResult -GlobalsModRef::getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc) { -  unsigned Known = ModRef; - -  // If we are asking for mod/ref info of a direct call with a pointer to a -  // global we are tracking, return information if we have it. -  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout(); -  if (const GlobalValue *GV = -          dyn_cast<GlobalValue>(GetUnderlyingObject(Loc.Ptr, DL))) -    if (GV->hasLocalLinkage()) -      if (const Function *F = CS.getCalledFunction()) -        if (NonAddressTakenGlobals.count(GV)) -          if (const FunctionRecord *FR = getFunctionInfo(F)) -            Known = FR->getInfoForGlobal(GV); - -  if (Known == NoModRef) -    return NoModRef; // No need to query other mod/ref analyses -  return ModRefResult(Known & AliasAnalysis::getModRefInfo(CS, Loc)); -} - -//===----------------------------------------------------------------------===// -// Methods to update the analysis as a result of the client transformation. -// -void GlobalsModRef::deleteValue(Value *V) { -  if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) { -    if (NonAddressTakenGlobals.erase(GV)) { -      // This global might be an indirect global.  If so, remove it and remove -      // any AllocRelatedValues for it. -      if (IndirectGlobals.erase(GV)) { -        // Remove any entries in AllocsForIndirectGlobals for this global. -        for (std::map<const Value *, const GlobalValue *>::iterator -                 I = AllocsForIndirectGlobals.begin(), -                 E = AllocsForIndirectGlobals.end(); -             I != E;) { -          if (I->second == GV) { -            AllocsForIndirectGlobals.erase(I++); -          } else { -            ++I; -          } -        } -      } -    } -  } - -  // Otherwise, if this is an allocation related to an indirect global, remove -  // it. -  AllocsForIndirectGlobals.erase(V); - -  AliasAnalysis::deleteValue(V); -} - -void GlobalsModRef::addEscapingUse(Use &U) { -  // For the purposes of this analysis, it is conservatively correct to treat -  // a newly escaping value equivalently to a deleted one.  We could perhaps -  // be more precise by processing the new use and attempting to update our -  // saved analysis results to accommodate it. -  deleteValue(U); - -  AliasAnalysis::addEscapingUse(U); -} diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp deleted file mode 100644 index 806bfb81b6d5..000000000000 --- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp +++ /dev/null @@ -1,30 +0,0 @@ -//===-- IPA.cpp -----------------------------------------------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the common initialization routines for the IPA library. -// -//===----------------------------------------------------------------------===// - -#include "llvm/InitializePasses.h" -#include "llvm-c/Initialization.h" -#include "llvm/PassRegistry.h" - -using namespace llvm; - -/// initializeIPA - Initialize all passes linked into the IPA library. -void llvm::initializeIPA(PassRegistry &Registry) { -  initializeCallGraphWrapperPassPass(Registry); -  initializeCallGraphPrinterPass(Registry); -  initializeCallGraphViewerPass(Registry); -  initializeGlobalsModRefPass(Registry); -} - -void LLVMInitializeIPA(LLVMPassRegistryRef R) { -  initializeIPA(*unwrap(R)); -} diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp index 926787d3be91..e0c5d8fa5f5a 100644 --- a/contrib/llvm/lib/Analysis/IVUsers.cpp +++ b/contrib/llvm/lib/Analysis/IVUsers.cpp @@ -39,7 +39,7 @@ INITIALIZE_PASS_BEGIN(IVUsers, "iv-users",  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)  INITIALIZE_PASS_END(IVUsers, "iv-users",                        "Induction Variable Users", false, true) @@ -255,7 +255,7 @@ void IVUsers::getAnalysisUsage(AnalysisUsage &AU) const {    AU.addRequired<AssumptionCacheTracker>();    AU.addRequired<LoopInfoWrapperPass>();    AU.addRequired<DominatorTreeWrapperPass>(); -  AU.addRequired<ScalarEvolution>(); +  AU.addRequired<ScalarEvolutionWrapperPass>();    AU.setPreservesAll();  } @@ -266,7 +266,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {        *L->getHeader()->getParent());    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); -  SE = &getAnalysis<ScalarEvolution>(); +  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();    // Collect ephemeral values so that AddUsersIfInteresting skips them.    EphValues.clear(); @@ -276,7 +276,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {    // them by stride.  Start by finding all of the PHI nodes in the header for    // this loop.  If they are induction variables, inspect their uses.    for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) -    (void)AddUsersIfInteresting(I); +    (void)AddUsersIfInteresting(&*I);    return false;  } diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp index c0d2e375cb04..a86a703ed9d6 100644 --- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/InlineCost.cpp @@ -115,11 +115,11 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {    /// inlining has the given attribute set either at the call site or the    /// function declaration.  Primarily used to inspect call site specific    /// attributes since these can be more precise than the ones on the callee -  /// itself.  +  /// itself.    bool paramHasAttr(Argument *A, Attribute::AttrKind Attr);    /// Return true if the given value is known non null within the callee if -  /// inlined through this particular callsite.  +  /// inlined through this particular callsite.    bool isKnownNonNullInCallee(Value *V);    // Custom analysis routines. @@ -156,6 +156,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {    bool visitSwitchInst(SwitchInst &SI);    bool visitIndirectBrInst(IndirectBrInst &IBI);    bool visitResumeInst(ResumeInst &RI); +  bool visitCleanupReturnInst(CleanupReturnInst &RI); +  bool visitCatchReturnInst(CatchReturnInst &RI);    bool visitUnreachableInst(UnreachableInst &I);  public: @@ -832,8 +834,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {    CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS);    if (CA.analyzeCall(CS)) {      // We were able to inline the indirect call! Subtract the cost from the -    // bonus we want to apply, but don't go below zero. -    Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); +    // threshold to get the bonus we want to apply, but don't go below zero. +    Cost -= std::max(0, CA.getThreshold() - CA.getCost());    }    return Base::visitCallSite(CS); @@ -903,6 +905,18 @@ bool CallAnalyzer::visitResumeInst(ResumeInst &RI) {    return false;  } +bool CallAnalyzer::visitCleanupReturnInst(CleanupReturnInst &CRI) { +  // FIXME: It's not clear that a single instruction is an accurate model for +  // the inline cost of a cleanupret instruction. +  return false; +} + +bool CallAnalyzer::visitCatchReturnInst(CatchReturnInst &CRI) { +  // FIXME: It's not clear that a single instruction is an accurate model for +  // the inline cost of a catchret instruction. +  return false; +} +  bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) {    // FIXME: It might be reasonably to discount the cost of instructions leading    // to unreachable as they have the lowest possible impact on both runtime and @@ -946,20 +960,21 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,        continue;      // Skip ephemeral values. -    if (EphValues.count(I)) +    if (EphValues.count(&*I))        continue;      ++NumInstructions;      if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())        ++NumVectorInstructions; -    // If the instruction is floating point, and the target says this operation is -    // expensive or the function has the "use-soft-float" attribute, this may -    // eventually become a library call.  Treat the cost as such. +    // If the instruction is floating point, and the target says this operation +    // is expensive or the function has the "use-soft-float" attribute, this may +    // eventually become a library call. Treat the cost as such.      if (I->getType()->isFloatingPointTy()) {        bool hasSoftFloatAttr = false; -      // If the function has the "use-soft-float" attribute, mark it as expensive. +      // If the function has the "use-soft-float" attribute, mark it as +      // expensive.        if (F.hasFnAttribute("use-soft-float")) {          Attribute Attr = F.getFnAttribute("use-soft-float");          StringRef Val = Attr.getValueAsString(); @@ -977,7 +992,7 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB,      // all of the per-instruction logic. The visit tree returns true if we      // consumed the instruction in any way, and false if the instruction's base      // cost should count against inlining. -    if (Base::visit(I)) +    if (Base::visit(&*I))        ++NumInstructionsSimplified;      else        Cost += InlineConstants::InstrCost; @@ -1157,15 +1172,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {         FAI != FAE; ++FAI, ++CAI) {      assert(CAI != CS.arg_end());      if (Constant *C = dyn_cast<Constant>(CAI)) -      SimplifiedValues[FAI] = C; +      SimplifiedValues[&*FAI] = C;      Value *PtrArg = *CAI;      if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { -      ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); +      ConstantOffsetPtrs[&*FAI] = std::make_pair(PtrArg, C->getValue());        // We can SROA any pointer arguments derived from alloca instructions.        if (isa<AllocaInst>(PtrArg)) { -        SROAArgValues[FAI] = PtrArg; +        SROAArgValues[&*FAI] = PtrArg;          SROAArgCosts[PtrArg] = 0;        }      } @@ -1281,7 +1296,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {    else if (NumVectorInstructions <= NumInstructions / 2)      Threshold -= (FiftyPercentVectorBonus - TenPercentVectorBonus); -  return Cost < Threshold; +  return Cost <= std::max(0, Threshold);  }  #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) @@ -1304,36 +1319,6 @@ void CallAnalyzer::dump() {  }  #endif -INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", -                      true, true) -INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", -                    true, true) - -char InlineCostAnalysis::ID = 0; - -InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID) {} - -InlineCostAnalysis::~InlineCostAnalysis() {} - -void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.setPreservesAll(); -  AU.addRequired<AssumptionCacheTracker>(); -  AU.addRequired<TargetTransformInfoWrapperPass>(); -  CallGraphSCCPass::getAnalysisUsage(AU); -} - -bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { -  TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>(); -  ACT = &getAnalysis<AssumptionCacheTracker>(); -  return false; -} - -InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { -  return getInlineCost(CS, CS.getCalledFunction(), Threshold); -} -  /// \brief Test that two functions either have or have not the given attribute  ///        at the same time.  template<typename AttrKind> @@ -1346,14 +1331,19 @@ static bool attributeMatches(Function *F1, Function *F2, AttrKind Attr) {  static bool functionsHaveCompatibleAttributes(Function *Caller,                                                Function *Callee,                                                TargetTransformInfo &TTI) { -  return TTI.hasCompatibleFunctionAttributes(Caller, Callee) && -         attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && -         attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && -         attributeMatches(Caller, Callee, Attribute::SanitizeThread); +  return TTI.areInlineCompatible(Caller, Callee) && +         AttributeFuncs::areInlineCompatible(*Caller, *Callee); +} + +InlineCost llvm::getInlineCost(CallSite CS, int Threshold, +                               TargetTransformInfo &CalleeTTI, +                               AssumptionCacheTracker *ACT) { +  return getInlineCost(CS, CS.getCalledFunction(), Threshold, CalleeTTI, ACT);  } -InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, -                                             int Threshold) { +InlineCost llvm::getInlineCost(CallSite CS, Function *Callee, int Threshold, +                               TargetTransformInfo &CalleeTTI, +                               AssumptionCacheTracker *ACT) {    // Cannot inline indirect calls.    if (!Callee)      return llvm::InlineCost::getNever(); @@ -1368,8 +1358,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,    // Never inline functions with conflicting attributes (unless callee has    // always-inline attribute). -  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, -                                         TTIWP->getTTI(*Callee))) +  if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee, CalleeTTI))      return llvm::InlineCost::getNever();    // Don't inline this call if the caller has the optnone attribute. @@ -1386,7 +1375,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,    DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()          << "...\n"); -  CallAnalyzer CA(TTIWP->getTTI(*Callee), ACT, *Callee, Threshold, CS); +  CallAnalyzer CA(CalleeTTI, ACT, *Callee, Threshold, CS);    bool ShouldInline = CA.analyzeCall(CS);    DEBUG(CA.dump()); @@ -1400,7 +1389,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,    return llvm::InlineCost::get(CA.getCost(), CA.getThreshold());  } -bool InlineCostAnalysis::isInlineViable(Function &F) { +bool llvm::isInlineViable(Function &F) {    bool ReturnsTwice = F.hasFnAttribute(Attribute::ReturnsTwice);    for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {      // Disallow inlining of functions which contain indirect branches or @@ -1408,9 +1397,8 @@ bool InlineCostAnalysis::isInlineViable(Function &F) {      if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken())        return false; -    for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; -         ++II) { -      CallSite CS(II); +    for (auto &II : *BI) { +      CallSite CS(&II);        if (!CS)          continue; diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index a7f8f5c8c99b..b89ff268d11e 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -122,7 +122,7 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {      return DT->dominates(I, P);    } -  // Otherwise, if the instruction is in the entry block, and is not an invoke, +  // Otherwise, if the instruction is in the entry block and is not an invoke,    // then it obviously dominates all phi nodes.    if (I->getParent() == &I->getParent()->getParent()->getEntryBlock() &&        !isa<InvokeInst>(I)) @@ -2090,8 +2090,7 @@ static Constant *computePointerICmp(const DataLayout &DL,      // Is the set of underlying objects all noalias calls?      auto IsNAC = [](SmallVectorImpl<Value *> &Objects) { -      return std::all_of(Objects.begin(), Objects.end(), -                         [](Value *V){ return isNoAliasCall(V); }); +      return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall);      };      // Is the set of underlying objects all things which must be disjoint from @@ -2176,6 +2175,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,        // X >=u 1 -> X        if (match(RHS, m_One()))          return LHS; +      if (isImpliedCondition(RHS, LHS, Q.DL)) +        return getTrue(ITy); +      break; +    case ICmpInst::ICMP_SGE: +      /// For signed comparison, the values for an i1 are 0 and -1  +      /// respectively. This maps into a truth table of: +      /// LHS | RHS | LHS >=s RHS   | LHS implies RHS +      ///  0  |  0  |  1 (0 >= 0)   |  1 +      ///  0  |  1  |  1 (0 >= -1)  |  1 +      ///  1  |  0  |  0 (-1 >= 0)  |  0 +      ///  1  |  1  |  1 (-1 >= -1) |  1 +      if (isImpliedCondition(LHS, RHS, Q.DL)) +        return getTrue(ITy);        break;      case ICmpInst::ICMP_SLT:        // X <s 0 -> X @@ -2187,6 +2199,10 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,        if (match(RHS, m_One()))          return LHS;        break; +    case ICmpInst::ICMP_ULE: +      if (isImpliedCondition(LHS, RHS, Q.DL)) +        return getTrue(ITy); +      break;      }    } @@ -2360,9 +2376,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,      } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {        // 'and x, CI2' produces [0, CI2].        Upper = CI2->getValue() + 1; +    } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) { +      // 'add nuw x, CI2' produces [CI2, UINT_MAX]. +      Lower = CI2->getValue();      } -    if (Lower != Upper) { -      ConstantRange LHS_CR = ConstantRange(Lower, Upper); + +    ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper) +                                          : ConstantRange(Width, true); + +    if (auto *I = dyn_cast<Instruction>(LHS)) +      if (auto *Ranges = I->getMetadata(LLVMContext::MD_range)) +        LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges)); + +    if (!LHS_CR.isFullSet()) {        if (RHS_CR.contains(LHS_CR))          return ConstantInt::getTrue(RHS->getContext());        if (RHS_CR.inverse().contains(LHS_CR)) @@ -2370,6 +2396,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,      }    } +  // If both operands have range metadata, use the metadata +  // to simplify the comparison. +  if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) { +    auto RHS_Instr = dyn_cast<Instruction>(RHS); +    auto LHS_Instr = dyn_cast<Instruction>(LHS); + +    if (RHS_Instr->getMetadata(LLVMContext::MD_range) && +        LHS_Instr->getMetadata(LLVMContext::MD_range)) { +      auto RHS_CR = getConstantRangeFromMetadata( +          *RHS_Instr->getMetadata(LLVMContext::MD_range)); +      auto LHS_CR = getConstantRangeFromMetadata( +          *LHS_Instr->getMetadata(LLVMContext::MD_range)); + +      auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR); +      if (Satisfied_CR.contains(LHS_CR)) +        return ConstantInt::getTrue(RHS->getContext()); + +      auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion( +                CmpInst::getInversePredicate(Pred), RHS_CR); +      if (InversedSatisfied_CR.contains(LHS_CR)) +        return ConstantInt::getFalse(RHS->getContext()); +    } +  } +    // Compare of cast, for example (zext X) != 0 -> X != 0    if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {      Instruction *LI = cast<CastInst>(LHS); @@ -2529,6 +2579,14 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,      }    } +  // icmp eq|ne X, Y -> false|true if X != Y +  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) && +      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) { +    LLVMContext &Ctx = LHS->getType()->getContext(); +    return Pred == ICmpInst::ICMP_NE ? +      ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx); +  } +      // Special logic for binary operators.    BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);    BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); @@ -3039,7 +3097,7 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,                                const DataLayout &DL,                                const TargetLibraryInfo *TLI,                                const DominatorTree *DT, AssumptionCache *AC, -                              Instruction *CxtI) { +                              const Instruction *CxtI) {    return ::SimplifyICmpInst(Predicate, LHS, RHS, Query(DL, TLI, DT, AC, CxtI),                              RecursionLimit);  } @@ -4024,6 +4082,17 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,      break;    } +  // In general, it is possible for computeKnownBits to determine all bits in a +  // value even when the operands are not all constants. +  if (!Result && I->getType()->isIntegerTy()) { +    unsigned BitWidth = I->getType()->getScalarSizeInBits(); +    APInt KnownZero(BitWidth, 0); +    APInt KnownOne(BitWidth, 0); +    computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT); +    if ((KnownZero | KnownOne).isAllOnesValue()) +      Result = ConstantInt::get(I->getContext(), KnownOne); +  } +    /// If called on unreachable code, the above logic may report that the    /// instruction simplified to itself.  Make life easier for users by    /// detecting that case here, returning a safe value instead. diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp index c8d0410c1e0f..0f0f31e62ac7 100644 --- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp +++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp @@ -198,7 +198,8 @@ void LazyCallGraph::SCC::insertOutgoingEdge(Node &CallerN, Node &CalleeN) {    assert(CalleeC.isDescendantOf(*this) &&           "Callee must be a descendant of the Caller."); -  // The only change required is to add this SCC to the parent set of the callee. +  // The only change required is to add this SCC to the parent set of the +  // callee.    CalleeC.ParentSCCs.insert(this);  } @@ -454,8 +455,7 @@ void LazyCallGraph::SCC::internalDFS(  }  SmallVector<LazyCallGraph::SCC *, 1> -LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, -                                       Node &CalleeN) { +LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN, Node &CalleeN) {    // First remove it from the node.    CallerN.removeEdgeInternal(CalleeN.getFunction()); @@ -522,7 +522,7 @@ LazyCallGraph::SCC::removeIntraSCCEdge(Node &CallerN,    // the leaf SCC list.    if (!IsLeafSCC && !ResultSCCs.empty())      G->LeafSCCs.erase(std::remove(G->LeafSCCs.begin(), G->LeafSCCs.end(), this), -                     G->LeafSCCs.end()); +                      G->LeafSCCs.end());    // Return the new list of SCCs.    return ResultSCCs; diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index a6ae7f2229c5..0d1d34e0cb4f 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -26,6 +26,7 @@  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Instructions.h"  #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h"  #include "llvm/IR/PatternMatch.h"  #include "llvm/IR/ValueHandle.h"  #include "llvm/Support/Debug.h" @@ -64,10 +65,10 @@ class LVILatticeVal {    enum LatticeValueTy {      /// This Value has no known value yet.      undefined, -     +      /// This Value has a specific constant value.      constant, -     +      /// This Value is known to not have the specified value.      notconstant, @@ -77,13 +78,13 @@ class LVILatticeVal {      /// This value is not known to be constant, and we know that it has a value.      overdefined    }; -   +    /// Val: This stores the current lattice value along with the Constant* for    /// the constant if this is a 'constant' or 'notconstant' value.    LatticeValueTy Tag;    Constant *Val;    ConstantRange Range; -   +  public:    LVILatticeVal() : Tag(undefined), Val(nullptr), Range(1, true) {} @@ -104,29 +105,34 @@ public:      Res.markConstantRange(CR);      return Res;    } +  static LVILatticeVal getOverdefined() { +    LVILatticeVal Res; +    Res.markOverdefined(); +    return Res; +  }    bool isUndefined() const     { return Tag == undefined; }    bool isConstant() const      { return Tag == constant; }    bool isNotConstant() const   { return Tag == notconstant; }    bool isConstantRange() const { return Tag == constantrange; }    bool isOverdefined() const   { return Tag == overdefined; } -   +    Constant *getConstant() const {      assert(isConstant() && "Cannot get the constant of a non-constant!");      return Val;    } -   +    Constant *getNotConstant() const {      assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");      return Val;    } -   +    ConstantRange getConstantRange() const {      assert(isConstantRange() &&             "Cannot get the constant-range of a non-constant-range!");      return Range;    } -   +    /// Return true if this is a change in status.    bool markOverdefined() {      if (isOverdefined()) @@ -150,7 +156,7 @@ public:      Val = V;      return true;    } -   +    /// Return true if this is a change in status.    bool markNotConstant(Constant *V) {      assert(V && "Marking constant with NULL"); @@ -168,27 +174,27 @@ public:      Val = V;      return true;    } -   +    /// Return true if this is a change in status.    bool markConstantRange(const ConstantRange NewR) {      if (isConstantRange()) {        if (NewR.isEmptySet())          return markOverdefined(); -       +        bool changed = Range != NewR;        Range = NewR;        return changed;      } -     +      assert(isUndefined());      if (NewR.isEmptySet())        return markOverdefined(); -     +      Tag = constantrange;      Range = NewR;      return true;    } -   +    /// Merge the specified lattice value into this one, updating this    /// one and returning true if anything changed.    bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) { @@ -267,7 +273,7 @@ public:      return markConstantRange(NewR);    }  }; -   +  } // end anonymous namespace.  namespace llvm { @@ -295,9 +301,9 @@ raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {  namespace {    /// A callback value handle updates the cache when values are erased.    class LazyValueInfoCache; -  struct LVIValueHandle : public CallbackVH { +  struct LVIValueHandle final : public CallbackVH {      LazyValueInfoCache *Parent; -       +      LVIValueHandle(Value *V, LazyValueInfoCache *P)        : CallbackVH(V), Parent(P) { } @@ -308,24 +314,27 @@ namespace {    };  } -namespace {  +namespace {    /// This is the cache kept by LazyValueInfo which    /// maintains information about queries across the clients' queries.    class LazyValueInfoCache {      /// This is all of the cached block information for exactly one Value*.      /// The entries are sorted by the BasicBlock* of the      /// entries, allowing us to do a lookup with a binary search. -    typedef std::map<AssertingVH<BasicBlock>, LVILatticeVal> ValueCacheEntryTy; +    /// Over-defined lattice values are recorded in OverDefinedCache to reduce +    /// memory overhead. +    typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> +        ValueCacheEntryTy;      /// This is all of the cached information for all values,      /// mapped from Value* to key information.      std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache; -     +      /// This tracks, on a per-block basis, the set of values that are -    /// over-defined at the end of that block.  This is required -    /// for cache updating. -    typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; -    DenseSet<OverDefinedPairTy> OverDefinedCache; +    /// over-defined at the end of that block. +    typedef DenseMap<AssertingVH<BasicBlock>, SmallPtrSet<Value *, 4>> +        OverDefinedCacheTy; +    OverDefinedCacheTy OverDefinedCache;      /// Keep track of all blocks that we have ever seen, so we      /// don't spend time removing unused blocks from our caches. @@ -357,9 +366,13 @@ namespace {      void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {        SeenBlocks.insert(BB); -      lookup(Val)[BB] = Result; + +      // Insert over-defined values into their own cache to reduce memory +      // overhead.        if (Result.isOverdefined()) -        OverDefinedCache.insert(std::make_pair(BB, Val)); +        OverDefinedCache[BB].insert(Val); +      else +        lookup(Val)[BB] = Result;      }      LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); @@ -382,11 +395,39 @@ namespace {                                              Instruction *BBI);      void solve(); -     +      ValueCacheEntryTy &lookup(Value *V) {        return ValueCache[LVIValueHandle(V, this)];      } +    bool isOverdefined(Value *V, BasicBlock *BB) const { +      auto ODI = OverDefinedCache.find(BB); + +      if (ODI == OverDefinedCache.end()) +        return false; + +      return ODI->second.count(V); +    } + +    bool hasCachedValueInfo(Value *V, BasicBlock *BB) { +      if (isOverdefined(V, BB)) +        return true; + +      LVIValueHandle ValHandle(V, this); +      auto I = ValueCache.find(ValHandle); +      if (I == ValueCache.end()) +        return false; + +      return I->second.count(BB); +    } + +    LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) { +      if (isOverdefined(V, BB)) +        return LVILatticeVal::getOverdefined(); + +      return lookup(V)[BB]; +    } +        public:      /// This is the query interface to determine the lattice      /// value for the specified Value* at the end of the specified block. @@ -402,15 +443,15 @@ namespace {      /// value for the specified Value* that is true on the specified edge.      LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,                                   Instruction *CxtI = nullptr); -     +      /// This is the update interface to inform the cache that an edge from      /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.      void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc); -     +      /// This is part of the update interface to inform the cache      /// that a block has been deleted.      void eraseBlock(BasicBlock *BB); -     +      /// clear - Empty the cache.      void clear() {        SeenBlocks.clear(); @@ -425,15 +466,17 @@ namespace {  } // end anonymous namespace  void LVIValueHandle::deleted() { -  typedef std::pair<AssertingVH<BasicBlock>, Value*> OverDefinedPairTy; -   -  SmallVector<OverDefinedPairTy, 4> ToErase; -  for (const OverDefinedPairTy &P : Parent->OverDefinedCache) -    if (P.second == getValPtr()) -      ToErase.push_back(P); -  for (const OverDefinedPairTy &P : ToErase) -    Parent->OverDefinedCache.erase(P); -   +  SmallVector<AssertingVH<BasicBlock>, 4> ToErase; +  for (auto &I : Parent->OverDefinedCache) { +    SmallPtrSetImpl<Value *> &ValueSet = I.second; +    if (ValueSet.count(getValPtr())) +      ValueSet.erase(getValPtr()); +    if (ValueSet.empty()) +      ToErase.push_back(I.first); +  } +  for (auto &BB : ToErase) +    Parent->OverDefinedCache.erase(BB); +    // This erasure deallocates *this, so it MUST happen after we're done    // using any and all members of *this.    Parent->ValueCache.erase(*this); @@ -446,15 +489,11 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {      return;    SeenBlocks.erase(I); -  SmallVector<OverDefinedPairTy, 4> ToErase; -  for (const OverDefinedPairTy& P : OverDefinedCache) -    if (P.first == BB) -      ToErase.push_back(P); -  for (const OverDefinedPairTy &P : ToErase) -    OverDefinedCache.erase(P); +  auto ODI = OverDefinedCache.find(BB); +  if (ODI != OverDefinedCache.end()) +    OverDefinedCache.erase(ODI); -  for (std::map<LVIValueHandle, ValueCacheEntryTy>::iterator -       I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I) +  for (auto I = ValueCache.begin(), E = ValueCache.end(); I != E; ++I)      I->second.erase(BB);  } @@ -466,7 +505,8 @@ void LazyValueInfoCache::solve() {      if (solveBlockValue(e.second, e.first)) {        // The work item was completely processed.        assert(BlockValueStack.top() == e && "Nothing should have been pushed!"); -      assert(lookup(e.second).count(e.first) && "Result should be in cache!"); +      assert(hasCachedValueInfo(e.second, e.first) && +             "Result should be in cache!");        BlockValueStack.pop();        BlockValueSet.erase(e); @@ -482,11 +522,7 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {    if (isa<Constant>(Val))      return true; -  LVIValueHandle ValHandle(Val, this); -  std::map<LVIValueHandle, ValueCacheEntryTy>::iterator I = -    ValueCache.find(ValHandle); -  if (I == ValueCache.end()) return false; -  return I->second.count(BB); +  return hasCachedValueInfo(Val, BB);  }  LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { @@ -495,17 +531,36 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {      return LVILatticeVal::get(VC);    SeenBlocks.insert(BB); -  return lookup(Val)[BB]; +  return getCachedValueInfo(Val, BB); +} + +static LVILatticeVal getFromRangeMetadata(Instruction *BBI) { +  switch (BBI->getOpcode()) { +  default: break; +  case Instruction::Load: +  case Instruction::Call: +  case Instruction::Invoke: +    if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range))  +      if (isa<IntegerType>(BBI->getType())) { +        ConstantRange Result = getConstantRangeFromMetadata(*Ranges); +        return LVILatticeVal::getRange(Result); +      } +    break; +  }; +  // Nothing known - Note that we do not want overdefined here.  We may know +  // something else about the value and not having range metadata shouldn't +  // cause us to throw away those facts. +  return LVILatticeVal();  }  bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {    if (isa<Constant>(Val))      return true; -  if (lookup(Val).count(BB)) { +  if (hasCachedValueInfo(Val, BB)) {      // If we have a cached value, use that.      DEBUG(dbgs() << "  reuse BB '" << BB->getName() -                 << "' val=" << lookup(Val)[BB] << '\n'); +                 << "' val=" << getCachedValueInfo(Val, BB) << '\n');      // Since we're reusing a cached value, we don't need to update the      // OverDefinedCache. The cache will have been properly updated whenever the @@ -516,7 +571,7 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {    // Hold off inserting this value into the Cache in case we have to return    // false and come back later.    LVILatticeVal Res; -   +    Instruction *BBI = dyn_cast<Instruction>(Val);    if (!BBI || BBI->getParent() != BB) {      if (!solveBlockValueNonLocal(Res, Val, BB)) @@ -532,12 +587,18 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {      return true;    } -  if (AllocaInst *AI = dyn_cast<AllocaInst>(BBI)) { -    Res = LVILatticeVal::getNot(ConstantPointerNull::get(AI->getType())); +  // If this value is a nonnull pointer, record it's range and bailout. +  PointerType *PT = dyn_cast<PointerType>(BBI->getType()); +  if (PT && isKnownNonNull(BBI)) { +    Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT));      insertResult(Val, BB, Res);      return true;    } +  // If this is an instruction which supports range metadata, return the +  // implied range.  TODO: This should be an intersection, not a union. +  Res.mergeIn(getFromRangeMetadata(BBI), DL); +    // We can only analyze the definitions of certain classes of instructions    // (integral binops and casts at the moment), so bail if this isn't one.    LVILatticeVal Result; @@ -661,7 +722,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,          PointerType *PTy = cast<PointerType>(Val->getType());          Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));        } -       +        BBLV = Result;        return true;      } @@ -674,7 +735,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,    BBLV = Result;    return true;  } -   +  bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,                                                  PHINode *PN, BasicBlock *BB) {    LVILatticeVal Result;  // Start Undefined. @@ -700,7 +761,7 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,      if (Result.isOverdefined()) {        DEBUG(dbgs() << " compute BB '" << BB->getName()              << "' - overdefined because of pred.\n"); -       +        BBLV = Result;        return true;      } @@ -765,7 +826,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,      BBLV.markOverdefined();      return true;    } -   +    ConstantRange LHSRange = LHSVal.getConstantRange();    ConstantRange RHSRange(1);    IntegerType *ResultTy = cast<IntegerType>(BBI->getType()); @@ -819,7 +880,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,    case Instruction::Or:      Result.markConstantRange(LHSRange.binaryOr(RHSRange));      break; -   +    // Unhandled instructions are overdefined.    default:      DEBUG(dbgs() << " compute BB '" << BB->getName() @@ -827,7 +888,7 @@ bool LazyValueInfoCache::solveBlockValueConstantRange(LVILatticeVal &BBLV,      Result.markOverdefined();      break;    } -   +    BBLV = Result;    return true;  } @@ -877,7 +938,7 @@ bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,  /// Val is not constrained on the edge.  static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,                                BasicBlock *BBTo, LVILatticeVal &Result) { -  // TODO: Handle more complex conditionals.  If (v == 0 || v2 < 1) is false, we +  // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we    // know that v != 0.    if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {      // If this is a conditional branch and only one successor goes to BBTo, then @@ -887,7 +948,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,        bool isTrueDest = BI->getSuccessor(0) == BBTo;        assert(BI->getSuccessor(!isTrueDest) == BBTo &&               "BBTo isn't a successor of BBFrom"); -       +        // If V is the condition of the branch itself, then we know exactly what        // it is.        if (BI->getCondition() == Val) { @@ -895,7 +956,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,                                Type::getInt1Ty(Val->getContext()), isTrueDest));          return true;        } -       +        // If the condition of the branch is an equality comparison, we may be        // able to infer the value.        if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) @@ -997,7 +1058,7 @@ LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,                                                    Instruction *CxtI) {    DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"          << BB->getName() << "'\n"); -   +    assert(BlockValueStack.empty() && BlockValueSet.empty());    pushBlockValue(std::make_pair(BB, V)); @@ -1014,6 +1075,8 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {          << CxtI->getName() << "'\n");    LVILatticeVal Result; +  if (auto *I = dyn_cast<Instruction>(V)) +    Result = getFromRangeMetadata(I);    mergeAssumeBlockValueConstantRange(V, Result, CxtI);    DEBUG(dbgs() << "  Result = " << Result << "\n"); @@ -1025,7 +1088,7 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,                 Instruction *CxtI) {    DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"          << FromBB->getName() << "' to '" << ToBB->getName() << "'\n"); -   +    LVILatticeVal Result;    if (!getEdgeValue(V, FromBB, ToBB, Result, CxtI)) {      solve(); @@ -1040,24 +1103,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,  void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,                                      BasicBlock *NewSucc) { -  // When an edge in the graph has been threaded, values that we could not  -  // determine a value for before (i.e. were marked overdefined) may be possible -  // to solve now.  We do NOT try to proactively update these values.  Instead, -  // we clear their entries from the cache, and allow lazy updating to recompute -  // them when needed. -   +  // When an edge in the graph has been threaded, values that we could not +  // determine a value for before (i.e. were marked overdefined) may be +  // possible to solve now. We do NOT try to proactively update these values. +  // Instead, we clear their entries from the cache, and allow lazy updating to +  // recompute them when needed. +    // The updating process is fairly simple: we need to drop cached info    // for all values that were marked overdefined in OldSucc, and for those same    // values in any successor of OldSucc (except NewSucc) in which they were    // also marked overdefined.    std::vector<BasicBlock*> worklist;    worklist.push_back(OldSucc); -   -  DenseSet<Value*> ClearSet; -  for (OverDefinedPairTy &P : OverDefinedCache) -    if (P.first == OldSucc) -      ClearSet.insert(P.second); -   + +  auto I = OverDefinedCache.find(OldSucc); +  if (I == OverDefinedCache.end()) +    return; // Nothing to process here. +  SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end()); +    // Use a worklist to perform a depth-first search of OldSucc's successors.    // NOTE: We do not need a visited list since any blocks we have already    // visited will have had their overdefined markers cleared already, and we @@ -1065,32 +1128,31 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,    while (!worklist.empty()) {      BasicBlock *ToUpdate = worklist.back();      worklist.pop_back(); -     +      // Skip blocks only accessible through NewSucc.      if (ToUpdate == NewSucc) continue; -     +      bool changed = false; -    for (Value *V : ClearSet) { +    for (Value *V : ValsToClear) {        // If a value was marked overdefined in OldSucc, and is here too... -      DenseSet<OverDefinedPairTy>::iterator OI = -        OverDefinedCache.find(std::make_pair(ToUpdate, V)); -      if (OI == OverDefinedCache.end()) continue; - -      // Remove it from the caches. -      ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)]; -      ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); - -      assert(CI != Entry.end() && "Couldn't find entry to update?"); -      Entry.erase(CI); -      OverDefinedCache.erase(OI); - -      // If we removed anything, then we potentially need to update  +      auto OI = OverDefinedCache.find(ToUpdate); +      if (OI == OverDefinedCache.end()) +        continue; +      SmallPtrSetImpl<Value *> &ValueSet = OI->second; +      if (!ValueSet.count(V)) +        continue; + +      ValueSet.erase(V); +      if (ValueSet.empty()) +        OverDefinedCache.erase(OI); + +      // If we removed anything, then we potentially need to update        // blocks successors too.        changed = true;      }      if (!changed) continue; -     +      worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));    }  } @@ -1158,7 +1220,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,  }  /// Determine whether the specified value is known to be a -/// constant on the specified edge.  Return null if not. +/// constant on the specified edge. Return null if not.  Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,                                             BasicBlock *ToBB,                                             Instruction *CxtI) { @@ -1190,26 +1252,26 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,        return ResCI->isZero() ? LazyValueInfo::False : LazyValueInfo::True;      return LazyValueInfo::Unknown;    } -   +    if (Result.isConstantRange()) {      ConstantInt *CI = dyn_cast<ConstantInt>(C);      if (!CI) return LazyValueInfo::Unknown; -     +      ConstantRange CR = Result.getConstantRange();      if (Pred == ICmpInst::ICMP_EQ) {        if (!CR.contains(CI->getValue()))          return LazyValueInfo::False; -       +        if (CR.isSingleElement() && CR.contains(CI->getValue()))          return LazyValueInfo::True;      } else if (Pred == ICmpInst::ICMP_NE) {        if (!CR.contains(CI->getValue()))          return LazyValueInfo::True; -       +        if (CR.isSingleElement() && CR.contains(CI->getValue()))          return LazyValueInfo::False;      } -     +      // Handle more complex predicates.      ConstantRange TrueValues =          ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue()); @@ -1219,7 +1281,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,        return LazyValueInfo::False;      return LazyValueInfo::Unknown;    } -   +    if (Result.isNotConstant()) {      // If this is an equality comparison, we can try to fold it knowing that      // "V != C1". @@ -1240,7 +1302,7 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,      }      return LazyValueInfo::Unknown;    } -   +    return LazyValueInfo::Unknown;  } @@ -1266,20 +1328,69 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,    if (Ret != Unknown)      return Ret; -  // TODO: Move this logic inside getValueAt so that it can be cached rather -  // than re-queried on each call.  This would also allow us to merge the -  // underlying lattice values to get more information  +  // Note: The following bit of code is somewhat distinct from the rest of LVI; +  // LVI as a whole tries to compute a lattice value which is conservatively +  // correct at a given location.  In this case, we have a predicate which we +  // weren't able to prove about the merged result, and we're pushing that +  // predicate back along each incoming edge to see if we can prove it +  // separately for each input.  As a motivating example, consider: +  // bb1: +  //   %v1 = ... ; constantrange<1, 5> +  //   br label %merge +  // bb2: +  //   %v2 = ... ; constantrange<10, 20> +  //   br label %merge +  // merge: +  //   %phi = phi [%v1, %v2] ; constantrange<1,20> +  //   %pred = icmp eq i32 %phi, 8 +  // We can't tell from the lattice value for '%phi' that '%pred' is false +  // along each path, but by checking the predicate over each input separately, +  // we can. +  // We limit the search to one step backwards from the current BB and value. +  // We could consider extending this to search further backwards through the +  // CFG and/or value graph, but there are non-obvious compile time vs quality +  // tradeoffs.      if (CxtI) { -    // For a comparison where the V is outside this block, it's possible -    // that we've branched on it before.  Look to see if the value is known -    // on all incoming edges.      BasicBlock *BB = CxtI->getParent(); + +    // Function entry or an unreachable block.  Bail to avoid confusing +    // analysis below.      pred_iterator PI = pred_begin(BB), PE = pred_end(BB); -    if (PI != PE && -        (!isa<Instruction>(V) || -         cast<Instruction>(V)->getParent() != BB)) { +    if (PI == PE) +      return Unknown; + +    // If V is a PHI node in the same block as the context, we need to ask +    // questions about the predicate as applied to the incoming value along +    // each edge. This is useful for eliminating cases where the predicate is +    // known along all incoming edges. +    if (auto *PHI = dyn_cast<PHINode>(V)) +      if (PHI->getParent() == BB) { +        Tristate Baseline = Unknown; +        for (unsigned i = 0, e = PHI->getNumIncomingValues(); i < e; i++) { +          Value *Incoming = PHI->getIncomingValue(i); +          BasicBlock *PredBB = PHI->getIncomingBlock(i); +          // Note that PredBB may be BB itself.         +          Tristate Result = getPredicateOnEdge(Pred, Incoming, C, PredBB, BB, +                                               CxtI); +           +          // Keep going as long as we've seen a consistent known result for +          // all inputs. +          Baseline = (i == 0) ? Result /* First iteration */ +            : (Baseline == Result ? Baseline : Unknown); /* All others */ +          if (Baseline == Unknown) +            break; +        } +        if (Baseline != Unknown) +          return Baseline; +      }     + +    // For a comparison where the V is outside this block, it's possible +    // that we've branched on it before. Look to see if the value is known +    // on all incoming edges. +    if (!isa<Instruction>(V) || +        cast<Instruction>(V)->getParent() != BB) {        // For predecessor edge, determine if the comparison is true or false -      // on that edge.  If they're all true or all false, we can conclude  +      // on that edge. If they're all true or all false, we can conclude        // the value of the comparison in this block.        Tristate Baseline = getPredicateOnEdge(Pred, V, C, *PI, BB, CxtI);        if (Baseline != Unknown) { diff --git a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp deleted file mode 100644 index 991a0e3e2752..000000000000 --- a/contrib/llvm/lib/Analysis/LibCallAliasAnalysis.cpp +++ /dev/null @@ -1,141 +0,0 @@ -//===- LibCallAliasAnalysis.cpp - Implement AliasAnalysis for libcalls ----===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the LibCallAliasAnalysis class. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LibCallAliasAnalysis.h" -#include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/IR/Function.h" -#include "llvm/Pass.h" -using namespace llvm; -   -// Register this pass... -char LibCallAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(LibCallAliasAnalysis, AliasAnalysis, "libcall-aa", -                   "LibCall Alias Analysis", false, true, false) - -FunctionPass *llvm::createLibCallAliasAnalysisPass(LibCallInfo *LCI) { -  return new LibCallAliasAnalysis(LCI); -} - -LibCallAliasAnalysis::~LibCallAliasAnalysis() { -  delete LCI; -} - -void LibCallAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { -  AliasAnalysis::getAnalysisUsage(AU); -  AU.setPreservesAll();                         // Does not transform code -} - -bool LibCallAliasAnalysis::runOnFunction(Function &F) { -  // set up super class -  InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); -  return false; -} - -/// AnalyzeLibCallDetails - Given a call to a function with the specified -/// LibCallFunctionInfo, see if we can improve the mod/ref footprint of the call -/// vs the specified pointer/size. -AliasAnalysis::ModRefResult -LibCallAliasAnalysis::AnalyzeLibCallDetails(const LibCallFunctionInfo *FI, -                                            ImmutableCallSite CS, -                                            const MemoryLocation &Loc) { -  // If we have a function, check to see what kind of mod/ref effects it -  // has.  Start by including any info globally known about the function. -  AliasAnalysis::ModRefResult MRInfo = FI->UniversalBehavior; -  if (MRInfo == NoModRef) return MRInfo; -   -  // If that didn't tell us that the function is 'readnone', check to see -  // if we have detailed info and if 'P' is any of the locations we know -  // about. -  const LibCallFunctionInfo::LocationMRInfo *Details = FI->LocationDetails; -  if (Details == nullptr) -    return MRInfo; -   -  // If the details array is of the 'DoesNot' kind, we only know something if -  // the pointer is a match for one of the locations in 'Details'.  If we find a -  // match, we can prove some interactions cannot happen. -  //  -  if (FI->DetailsType == LibCallFunctionInfo::DoesNot) { -    // Find out if the pointer refers to a known location. -    for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { -      const LibCallLocationInfo &LocInfo = -      LCI->getLocationInfo(Details[i].LocationID); -      LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); -      if (Res != LibCallLocationInfo::Yes) continue; -       -      // If we find a match against a location that we 'do not' interact with, -      // learn this info into MRInfo. -      return ModRefResult(MRInfo & ~Details[i].MRInfo); -    } -    return MRInfo; -  } -   -  // If the details are of the 'DoesOnly' sort, we know something if the pointer -  // is a match for one of the locations in 'Details'.  Also, if we can prove -  // that the pointers is *not* one of the locations in 'Details', we know that -  // the call is NoModRef. -  assert(FI->DetailsType == LibCallFunctionInfo::DoesOnly); -   -  // Find out if the pointer refers to a known location. -  bool NoneMatch = true; -  for (unsigned i = 0; Details[i].LocationID != ~0U; ++i) { -    const LibCallLocationInfo &LocInfo = -    LCI->getLocationInfo(Details[i].LocationID); -    LibCallLocationInfo::LocResult Res = LocInfo.isLocation(CS, Loc); -    if (Res == LibCallLocationInfo::No) continue; -     -    // If we don't know if this pointer points to the location, then we have to -    // assume it might alias in some case. -    if (Res == LibCallLocationInfo::Unknown) { -      NoneMatch = false; -      continue; -    } -     -    // If we know that this pointer definitely is pointing into the location, -    // merge in this information. -    return ModRefResult(MRInfo & Details[i].MRInfo); -  } -   -  // If we found that the pointer is guaranteed to not match any of the -  // locations in our 'DoesOnly' rule, then we know that the pointer must point -  // to some other location.  Since the libcall doesn't mod/ref any other -  // locations, return NoModRef. -  if (NoneMatch) -    return NoModRef; -   -  // Otherwise, return any other info gained so far. -  return MRInfo; -} - -// getModRefInfo - Check to see if the specified callsite can clobber the -// specified memory object. -// -AliasAnalysis::ModRefResult -LibCallAliasAnalysis::getModRefInfo(ImmutableCallSite CS, -                                    const MemoryLocation &Loc) { -  ModRefResult MRInfo = ModRef; -   -  // If this is a direct call to a function that LCI knows about, get the -  // information about the runtime function. -  if (LCI) { -    if (const Function *F = CS.getCalledFunction()) { -      if (const LibCallFunctionInfo *FI = LCI->getFunctionInfo(F)) { -        MRInfo = ModRefResult(MRInfo & AnalyzeLibCallDetails(FI, CS, Loc)); -        if (MRInfo == NoModRef) return NoModRef; -      } -    } -  } -   -  // The AliasAnalysis base class has some smarts, lets use them. -  return (ModRefResult)(MRInfo | AliasAnalysis::getModRefInfo(CS, Loc)); -} diff --git a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp b/contrib/llvm/lib/Analysis/LibCallSemantics.cpp deleted file mode 100644 index 003c81e87b60..000000000000 --- a/contrib/llvm/lib/Analysis/LibCallSemantics.cpp +++ /dev/null @@ -1,89 +0,0 @@ -//===- LibCallSemantics.cpp - Describe library semantics ------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements interfaces that can be used to describe language -// specific runtime library interfaces (e.g. libc, libm, etc) to LLVM -// optimizers. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringSwitch.h" -#include "llvm/IR/Function.h" -using namespace llvm; - -/// This impl pointer in ~LibCallInfo is actually a StringMap.  This -/// helper does the cast. -static StringMap<const LibCallFunctionInfo*> *getMap(void *Ptr) { -  return static_cast<StringMap<const LibCallFunctionInfo*> *>(Ptr); -} - -LibCallInfo::~LibCallInfo() { -  delete getMap(Impl); -} - -const LibCallLocationInfo &LibCallInfo::getLocationInfo(unsigned LocID) const { -  // Get location info on the first call. -  if (NumLocations == 0) -    NumLocations = getLocationInfo(Locations); -   -  assert(LocID < NumLocations && "Invalid location ID!"); -  return Locations[LocID]; -} - - -/// Return the LibCallFunctionInfo object corresponding to -/// the specified function if we have it.  If not, return null. -const LibCallFunctionInfo * -LibCallInfo::getFunctionInfo(const Function *F) const { -  StringMap<const LibCallFunctionInfo*> *Map = getMap(Impl); -   -  /// If this is the first time we are querying for this info, lazily construct -  /// the StringMap to index it. -  if (!Map) { -    Impl = Map = new StringMap<const LibCallFunctionInfo*>(); -     -    const LibCallFunctionInfo *Array = getFunctionInfoArray(); -    if (!Array) return nullptr; -     -    // We now have the array of entries.  Populate the StringMap. -    for (unsigned i = 0; Array[i].Name; ++i) -      (*Map)[Array[i].Name] = Array+i; -  } -   -  // Look up this function in the string map. -  return Map->lookup(F->getName()); -} - -/// See if the given exception handling personality function is one that we -/// understand.  If so, return a description of it; otherwise return Unknown. -EHPersonality llvm::classifyEHPersonality(const Value *Pers) { -  const Function *F = dyn_cast<Function>(Pers->stripPointerCasts()); -  if (!F) -    return EHPersonality::Unknown; -  return StringSwitch<EHPersonality>(F->getName()) -    .Case("__gnat_eh_personality", EHPersonality::GNU_Ada) -    .Case("__gxx_personality_v0",  EHPersonality::GNU_CXX) -    .Case("__gcc_personality_v0",  EHPersonality::GNU_C) -    .Case("__objc_personality_v0", EHPersonality::GNU_ObjC) -    .Case("_except_handler3",      EHPersonality::MSVC_X86SEH) -    .Case("_except_handler4",      EHPersonality::MSVC_X86SEH) -    .Case("__C_specific_handler",  EHPersonality::MSVC_Win64SEH) -    .Case("__CxxFrameHandler3",    EHPersonality::MSVC_CXX) -    .Default(EHPersonality::Unknown); -} - -bool llvm::canSimplifyInvokeNoUnwind(const Function *F) { -  EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn()); -  // We can't simplify any invokes to nounwind functions if the personality -  // function wants to catch asynch exceptions.  The nounwind attribute only -  // implies that the function does not throw synchronous exceptions. -  return !isAsynchronousEHPersonality(Personality); -} diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 0b9308a573a0..2dfb09c95ad6 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -49,6 +49,7 @@  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h"  #include "llvm/IR/InstVisitor.h"  #include "llvm/IR/IntrinsicInst.h"  #include "llvm/IR/LegacyPassManager.h" @@ -98,12 +99,13 @@ namespace {      void visitInsertElementInst(InsertElementInst &I);      void visitUnreachableInst(UnreachableInst &I); -    Value *findValue(Value *V, const DataLayout &DL, bool OffsetOk) const; -    Value *findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, +    Value *findValue(Value *V, bool OffsetOk) const; +    Value *findValueImpl(Value *V, bool OffsetOk,                           SmallPtrSetImpl<Value *> &Visited) const;    public:      Module *Mod; +    const DataLayout *DL;      AliasAnalysis *AA;      AssumptionCache *AC;      DominatorTree *DT; @@ -121,7 +123,7 @@ namespace {      void getAnalysisUsage(AnalysisUsage &AU) const override {        AU.setPreservesAll(); -      AU.addRequired<AliasAnalysis>(); +      AU.addRequired<AAResultsWrapperPass>();        AU.addRequired<AssumptionCacheTracker>();        AU.addRequired<TargetLibraryInfoWrapperPass>();        AU.addRequired<DominatorTreeWrapperPass>(); @@ -165,7 +167,7 @@ INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)  INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)  INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",                      false, true) @@ -178,7 +180,8 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",  //  bool Lint::runOnFunction(Function &F) {    Mod = F.getParent(); -  AA = &getAnalysis<AliasAnalysis>(); +  DL = &F.getParent()->getDataLayout(); +  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();    AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();    TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); @@ -200,12 +203,11 @@ void Lint::visitFunction(Function &F) {  void Lint::visitCallSite(CallSite CS) {    Instruction &I = *CS.getInstruction();    Value *Callee = CS.getCalledValue(); -  const DataLayout &DL = CS->getModule()->getDataLayout();    visitMemoryReference(I, Callee, MemoryLocation::UnknownSize, 0, nullptr,                         MemRef::Callee); -  if (Function *F = dyn_cast<Function>(findValue(Callee, DL, +  if (Function *F = dyn_cast<Function>(findValue(Callee,                                                   /*OffsetOk=*/false))) {      Assert(CS.getCallingConv() == F->getCallingConv(),             "Undefined behavior: Caller and callee calling convention differ", @@ -232,7 +234,7 @@ void Lint::visitCallSite(CallSite CS) {      for (; AI != AE; ++AI) {        Value *Actual = *AI;        if (PI != PE) { -        Argument *Formal = PI++; +        Argument *Formal = &*PI++;          Assert(Formal->getType() == Actual->getType(),                 "Undefined behavior: Call argument type mismatches "                 "callee parameter type", @@ -253,8 +255,8 @@ void Lint::visitCallSite(CallSite CS) {          if (Formal->hasStructRetAttr() && Actual->getType()->isPointerTy()) {            Type *Ty =              cast<PointerType>(Formal->getType())->getElementType(); -          visitMemoryReference(I, Actual, AA->getTypeStoreSize(Ty), -                               DL.getABITypeAlignment(Ty), Ty, +          visitMemoryReference(I, Actual, DL->getTypeStoreSize(Ty), +                               DL->getABITypeAlignment(Ty), Ty,                                 MemRef::Read | MemRef::Write);          }        } @@ -264,7 +266,7 @@ void Lint::visitCallSite(CallSite CS) {    if (CS.isCall() && cast<CallInst>(CS.getInstruction())->isTailCall())      for (CallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end();           AI != AE; ++AI) { -      Value *Obj = findValue(*AI, DL, /*OffsetOk=*/true); +      Value *Obj = findValue(*AI, /*OffsetOk=*/true);        Assert(!isa<AllocaInst>(Obj),               "Undefined behavior: Call with \"tail\" keyword references "               "alloca", @@ -291,7 +293,7 @@ void Lint::visitCallSite(CallSite CS) {        // overlap is not distinguished from the case where nothing is known.        uint64_t Size = 0;        if (const ConstantInt *Len = -              dyn_cast<ConstantInt>(findValue(MCI->getLength(), DL, +              dyn_cast<ConstantInt>(findValue(MCI->getLength(),                                                /*OffsetOk=*/false)))          if (Len->getValue().isIntN(32))            Size = Len->getValue().getZExtValue(); @@ -343,13 +345,6 @@ void Lint::visitCallSite(CallSite CS) {        visitMemoryReference(I, CS.getArgument(0), MemoryLocation::UnknownSize, 0,                             nullptr, MemRef::Read | MemRef::Write);        break; - -    case Intrinsic::eh_begincatch: -      visitEHBeginCatch(II); -      break; -    case Intrinsic::eh_endcatch: -      visitEHEndCatch(II); -      break;      }  } @@ -367,8 +362,7 @@ void Lint::visitReturnInst(ReturnInst &I) {           "Unusual: Return statement in function with noreturn attribute", &I);    if (Value *V = I.getReturnValue()) { -    Value *Obj = -        findValue(V, F->getParent()->getDataLayout(), /*OffsetOk=*/true); +    Value *Obj = findValue(V, /*OffsetOk=*/true);      Assert(!isa<AllocaInst>(Obj), "Unusual: Returning alloca value", &I);    }  } @@ -383,8 +377,7 @@ void Lint::visitMemoryReference(Instruction &I,    if (Size == 0)      return; -  Value *UnderlyingObject = -      findValue(Ptr, I.getModule()->getDataLayout(), /*OffsetOk=*/true); +  Value *UnderlyingObject = findValue(Ptr, /*OffsetOk=*/true);    Assert(!isa<ConstantPointerNull>(UnderlyingObject),           "Undefined behavior: Null pointer dereference", &I);    Assert(!isa<UndefValue>(UnderlyingObject), @@ -423,9 +416,8 @@ void Lint::visitMemoryReference(Instruction &I,    // Check for buffer overflows and misalignment.    // Only handles memory references that read/write something simple like an    // alloca instruction or a global variable. -  auto &DL = I.getModule()->getDataLayout();    int64_t Offset = 0; -  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, DL)) { +  if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *DL)) {      // OK, so the access is to a constant offset from Ptr.  Check that Ptr is      // something we can handle and if so extract the size of this base object      // along with its alignment. @@ -435,20 +427,20 @@ void Lint::visitMemoryReference(Instruction &I,      if (AllocaInst *AI = dyn_cast<AllocaInst>(Base)) {        Type *ATy = AI->getAllocatedType();        if (!AI->isArrayAllocation() && ATy->isSized()) -        BaseSize = DL.getTypeAllocSize(ATy); +        BaseSize = DL->getTypeAllocSize(ATy);        BaseAlign = AI->getAlignment();        if (BaseAlign == 0 && ATy->isSized()) -        BaseAlign = DL.getABITypeAlignment(ATy); +        BaseAlign = DL->getABITypeAlignment(ATy);      } else if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Base)) {        // If the global may be defined differently in another compilation unit        // then don't warn about funky memory accesses.        if (GV->hasDefinitiveInitializer()) {          Type *GTy = GV->getType()->getElementType();          if (GTy->isSized()) -          BaseSize = DL.getTypeAllocSize(GTy); +          BaseSize = DL->getTypeAllocSize(GTy);          BaseAlign = GV->getAlignment();          if (BaseAlign == 0 && GTy->isSized()) -          BaseAlign = DL.getABITypeAlignment(GTy); +          BaseAlign = DL->getABITypeAlignment(GTy);        }      } @@ -462,7 +454,7 @@ void Lint::visitMemoryReference(Instruction &I,      // Accesses that say that the memory is more aligned than it is are not      // defined.      if (Align == 0 && Ty && Ty->isSized()) -      Align = DL.getABITypeAlignment(Ty); +      Align = DL->getABITypeAlignment(Ty);      Assert(!BaseAlign || Align <= MinAlign(BaseAlign, Offset),             "Undefined behavior: Memory reference address is misaligned", &I);    } @@ -470,13 +462,13 @@ void Lint::visitMemoryReference(Instruction &I,  void Lint::visitLoadInst(LoadInst &I) {    visitMemoryReference(I, I.getPointerOperand(), -                       AA->getTypeStoreSize(I.getType()), I.getAlignment(), +                       DL->getTypeStoreSize(I.getType()), I.getAlignment(),                         I.getType(), MemRef::Read);  }  void Lint::visitStoreInst(StoreInst &I) {    visitMemoryReference(I, I.getPointerOperand(), -                       AA->getTypeStoreSize(I.getOperand(0)->getType()), +                       DL->getTypeStoreSize(I.getOperand(0)->getType()),                         I.getAlignment(),                         I.getOperand(0)->getType(), MemRef::Write);  } @@ -492,208 +484,26 @@ void Lint::visitSub(BinaryOperator &I) {  }  void Lint::visitLShr(BinaryOperator &I) { -  if (ConstantInt *CI = dyn_cast<ConstantInt>( -          findValue(I.getOperand(1), I.getModule()->getDataLayout(), -                    /*OffsetOk=*/false))) +  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(1), +                                                        /*OffsetOk=*/false)))      Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),             "Undefined result: Shift count out of range", &I);  }  void Lint::visitAShr(BinaryOperator &I) { -  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( -          I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) +  if (ConstantInt *CI = +          dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))      Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),             "Undefined result: Shift count out of range", &I);  }  void Lint::visitShl(BinaryOperator &I) { -  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue( -          I.getOperand(1), I.getModule()->getDataLayout(), /*OffsetOk=*/false))) +  if (ConstantInt *CI = +          dyn_cast<ConstantInt>(findValue(I.getOperand(1), /*OffsetOk=*/false)))      Assert(CI->getValue().ult(cast<IntegerType>(I.getType())->getBitWidth()),             "Undefined result: Shift count out of range", &I);  } -static bool -allPredsCameFromLandingPad(BasicBlock *BB, -                           SmallSet<BasicBlock *, 4> &VisitedBlocks) { -  VisitedBlocks.insert(BB); -  if (BB->isLandingPad()) -    return true; -  // If we find a block with no predecessors, the search failed. -  if (pred_empty(BB)) -    return false; -  for (BasicBlock *Pred : predecessors(BB)) { -    if (VisitedBlocks.count(Pred)) -      continue; -    if (!allPredsCameFromLandingPad(Pred, VisitedBlocks)) -      return false; -  } -  return true; -} - -static bool -allSuccessorsReachEndCatch(BasicBlock *BB, BasicBlock::iterator InstBegin, -                           IntrinsicInst **SecondBeginCatch, -                           SmallSet<BasicBlock *, 4> &VisitedBlocks) { -  VisitedBlocks.insert(BB); -  for (BasicBlock::iterator I = InstBegin, E = BB->end(); I != E; ++I) { -    IntrinsicInst *IC = dyn_cast<IntrinsicInst>(I); -    if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) -      return true; -    // If we find another begincatch while looking for an endcatch, -    // that's also an error. -    if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) { -      *SecondBeginCatch = IC; -      return false; -    } -  } - -  // If we reach a block with no successors while searching, the -  // search has failed. -  if (succ_empty(BB)) -    return false; -  // Otherwise, search all of the successors. -  for (BasicBlock *Succ : successors(BB)) { -    if (VisitedBlocks.count(Succ)) -      continue; -    if (!allSuccessorsReachEndCatch(Succ, Succ->begin(), SecondBeginCatch, -                                    VisitedBlocks)) -      return false; -  } -  return true; -} - -void Lint::visitEHBeginCatch(IntrinsicInst *II) { -  // The checks in this function make a potentially dubious assumption about -  // the CFG, namely that any block involved in a catch is only used for the -  // catch.  This will very likely be true of IR generated by a front end, -  // but it may cease to be true, for example, if the IR is run through a -  // pass which combines similar blocks. -  // -  // In general, if we encounter a block the isn't dominated by the catch -  // block while we are searching the catch block's successors for a call -  // to end catch intrinsic, then it is possible that it will be legal for -  // a path through this block to never reach a call to llvm.eh.endcatch. -  // An analogous statement could be made about our search for a landing -  // pad among the catch block's predecessors. -  // -  // What is actually required is that no path is possible at runtime that -  // reaches a call to llvm.eh.begincatch without having previously visited -  // a landingpad instruction and that no path is possible at runtime that -  // calls llvm.eh.begincatch and does not subsequently call llvm.eh.endcatch -  // (mentally adjusting for the fact that in reality these calls will be -  // removed before code generation). -  // -  // Because this is a lint check, we take a pessimistic approach and warn if -  // the control flow is potentially incorrect. - -  SmallSet<BasicBlock *, 4> VisitedBlocks; -  BasicBlock *CatchBB = II->getParent(); - -  // The begin catch must occur in a landing pad block or all paths -  // to it must have come from a landing pad. -  Assert(allPredsCameFromLandingPad(CatchBB, VisitedBlocks), -         "llvm.eh.begincatch may be reachable without passing a landingpad", -         II); - -  // Reset the visited block list. -  VisitedBlocks.clear(); - -  IntrinsicInst *SecondBeginCatch = nullptr; - -  // This has to be called before it is asserted.  Otherwise, the first assert -  // below can never be hit. -  bool EndCatchFound = allSuccessorsReachEndCatch( -      CatchBB, std::next(static_cast<BasicBlock::iterator>(II)), -      &SecondBeginCatch, VisitedBlocks); -  Assert( -      SecondBeginCatch == nullptr, -      "llvm.eh.begincatch may be called a second time before llvm.eh.endcatch", -      II, SecondBeginCatch); -  Assert(EndCatchFound, -         "Some paths from llvm.eh.begincatch may not reach llvm.eh.endcatch", -         II); -} - -static bool allPredCameFromBeginCatch( -    BasicBlock *BB, BasicBlock::reverse_iterator InstRbegin, -    IntrinsicInst **SecondEndCatch, SmallSet<BasicBlock *, 4> &VisitedBlocks) { -  VisitedBlocks.insert(BB); -  // Look for a begincatch in this block. -  for (BasicBlock::reverse_iterator RI = InstRbegin, RE = BB->rend(); RI != RE; -       ++RI) { -    IntrinsicInst *IC = dyn_cast<IntrinsicInst>(&*RI); -    if (IC && IC->getIntrinsicID() == Intrinsic::eh_begincatch) -      return true; -    // If we find another end catch before we find a begin catch, that's -    // an error. -    if (IC && IC->getIntrinsicID() == Intrinsic::eh_endcatch) { -      *SecondEndCatch = IC; -      return false; -    } -    // If we encounter a landingpad instruction, the search failed. -    if (isa<LandingPadInst>(*RI)) -      return false; -  } -  // If while searching we find a block with no predeccesors, -  // the search failed. -  if (pred_empty(BB)) -    return false; -  // Search any predecessors we haven't seen before. -  for (BasicBlock *Pred : predecessors(BB)) { -    if (VisitedBlocks.count(Pred)) -      continue; -    if (!allPredCameFromBeginCatch(Pred, Pred->rbegin(), SecondEndCatch, -                                   VisitedBlocks)) -      return false; -  } -  return true; -} - -void Lint::visitEHEndCatch(IntrinsicInst *II) { -  // The check in this function makes a potentially dubious assumption about -  // the CFG, namely that any block involved in a catch is only used for the -  // catch.  This will very likely be true of IR generated by a front end, -  // but it may cease to be true, for example, if the IR is run through a -  // pass which combines similar blocks. -  // -  // In general, if we encounter a block the isn't post-dominated by the -  // end catch block while we are searching the end catch block's predecessors -  // for a call to the begin catch intrinsic, then it is possible that it will -  // be legal for a path to reach the end catch block without ever having -  // called llvm.eh.begincatch. -  // -  // What is actually required is that no path is possible at runtime that -  // reaches a call to llvm.eh.endcatch without having previously visited -  // a call to llvm.eh.begincatch (mentally adjusting for the fact that in -  // reality these calls will be removed before code generation). -  // -  // Because this is a lint check, we take a pessimistic approach and warn if -  // the control flow is potentially incorrect. - -  BasicBlock *EndCatchBB = II->getParent(); - -  // Alls paths to the end catch call must pass through a begin catch call. - -  // If llvm.eh.begincatch wasn't called in the current block, we'll use this -  // lambda to recursively look for it in predecessors. -  SmallSet<BasicBlock *, 4> VisitedBlocks; -  IntrinsicInst *SecondEndCatch = nullptr; - -  // This has to be called before it is asserted.  Otherwise, the first assert -  // below can never be hit. -  bool BeginCatchFound = -      allPredCameFromBeginCatch(EndCatchBB, BasicBlock::reverse_iterator(II), -                                &SecondEndCatch, VisitedBlocks); -  Assert( -      SecondEndCatch == nullptr, -      "llvm.eh.endcatch may be called a second time after llvm.eh.begincatch", -      II, SecondEndCatch); -  Assert(BeginCatchFound, -         "llvm.eh.endcatch may be reachable without passing llvm.eh.begincatch", -         II); -} -  static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT,                     AssumptionCache *AC) {    // Assume undef could be zero. @@ -777,25 +587,23 @@ void Lint::visitIndirectBrInst(IndirectBrInst &I) {  }  void Lint::visitExtractElementInst(ExtractElementInst &I) { -  if (ConstantInt *CI = dyn_cast<ConstantInt>( -          findValue(I.getIndexOperand(), I.getModule()->getDataLayout(), -                    /*OffsetOk=*/false))) +  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getIndexOperand(), +                                                        /*OffsetOk=*/false)))      Assert(CI->getValue().ult(I.getVectorOperandType()->getNumElements()),             "Undefined result: extractelement index out of range", &I);  }  void Lint::visitInsertElementInst(InsertElementInst &I) { -  if (ConstantInt *CI = dyn_cast<ConstantInt>( -          findValue(I.getOperand(2), I.getModule()->getDataLayout(), -                    /*OffsetOk=*/false))) +  if (ConstantInt *CI = dyn_cast<ConstantInt>(findValue(I.getOperand(2), +                                                        /*OffsetOk=*/false)))      Assert(CI->getValue().ult(I.getType()->getNumElements()),             "Undefined result: insertelement index out of range", &I);  }  void Lint::visitUnreachableInst(UnreachableInst &I) {    // This isn't undefined behavior, it's merely suspicious. -  Assert(&I == I.getParent()->begin() || -             std::prev(BasicBlock::iterator(&I))->mayHaveSideEffects(), +  Assert(&I == &I.getParent()->front() || +             std::prev(I.getIterator())->mayHaveSideEffects(),           "Unusual: unreachable immediately preceded by instruction without "           "side effects",           &I); @@ -808,13 +616,13 @@ void Lint::visitUnreachableInst(UnreachableInst &I) {  /// Most analysis passes don't require this logic, because instcombine  /// will simplify most of these kinds of things away. But it's a goal of  /// this Lint pass to be useful even on non-optimized IR. -Value *Lint::findValue(Value *V, const DataLayout &DL, bool OffsetOk) const { +Value *Lint::findValue(Value *V, bool OffsetOk) const {    SmallPtrSet<Value *, 4> Visited; -  return findValueImpl(V, DL, OffsetOk, Visited); +  return findValueImpl(V, OffsetOk, Visited);  }  /// findValueImpl - Implementation helper for findValue. -Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk, +Value *Lint::findValueImpl(Value *V, bool OffsetOk,                             SmallPtrSetImpl<Value *> &Visited) const {    // Detect self-referential values.    if (!Visited.insert(V).second) @@ -825,17 +633,18 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,    // TODO: Look through eliminable cast pairs.    // TODO: Look through calls with unique return values.    // TODO: Look through vector insert/extract/shuffle. -  V = OffsetOk ? GetUnderlyingObject(V, DL) : V->stripPointerCasts(); +  V = OffsetOk ? GetUnderlyingObject(V, *DL) : V->stripPointerCasts();    if (LoadInst *L = dyn_cast<LoadInst>(V)) { -    BasicBlock::iterator BBI = L; +    BasicBlock::iterator BBI = L->getIterator();      BasicBlock *BB = L->getParent();      SmallPtrSet<BasicBlock *, 4> VisitedBlocks;      for (;;) {        if (!VisitedBlocks.insert(BB).second)          break; -      if (Value *U = FindAvailableLoadedValue(L->getPointerOperand(), -                                              BB, BBI, 6, AA)) -        return findValueImpl(U, DL, OffsetOk, Visited); +      if (Value *U = +          FindAvailableLoadedValue(L->getPointerOperand(), +                                   BB, BBI, DefMaxInstsToScan, AA)) +        return findValueImpl(U, OffsetOk, Visited);        if (BBI != BB->begin()) break;        BB = BB->getUniquePredecessor();        if (!BB) break; @@ -844,38 +653,38 @@ Value *Lint::findValueImpl(Value *V, const DataLayout &DL, bool OffsetOk,    } else if (PHINode *PN = dyn_cast<PHINode>(V)) {      if (Value *W = PN->hasConstantValue())        if (W != V) -        return findValueImpl(W, DL, OffsetOk, Visited); +        return findValueImpl(W, OffsetOk, Visited);    } else if (CastInst *CI = dyn_cast<CastInst>(V)) { -    if (CI->isNoopCast(DL)) -      return findValueImpl(CI->getOperand(0), DL, OffsetOk, Visited); +    if (CI->isNoopCast(*DL)) +      return findValueImpl(CI->getOperand(0), OffsetOk, Visited);    } else if (ExtractValueInst *Ex = dyn_cast<ExtractValueInst>(V)) {      if (Value *W = FindInsertedValue(Ex->getAggregateOperand(),                                       Ex->getIndices()))        if (W != V) -        return findValueImpl(W, DL, OffsetOk, Visited); +        return findValueImpl(W, OffsetOk, Visited);    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {      // Same as above, but for ConstantExpr instead of Instruction.      if (Instruction::isCast(CE->getOpcode())) {        if (CastInst::isNoopCast(Instruction::CastOps(CE->getOpcode()),                                 CE->getOperand(0)->getType(), CE->getType(), -                               DL.getIntPtrType(V->getType()))) -        return findValueImpl(CE->getOperand(0), DL, OffsetOk, Visited); +                               DL->getIntPtrType(V->getType()))) +        return findValueImpl(CE->getOperand(0), OffsetOk, Visited);      } else if (CE->getOpcode() == Instruction::ExtractValue) {        ArrayRef<unsigned> Indices = CE->getIndices();        if (Value *W = FindInsertedValue(CE->getOperand(0), Indices))          if (W != V) -          return findValueImpl(W, DL, OffsetOk, Visited); +          return findValueImpl(W, OffsetOk, Visited);      }    }    // As a last resort, try SimplifyInstruction or constant folding.    if (Instruction *Inst = dyn_cast<Instruction>(V)) { -    if (Value *W = SimplifyInstruction(Inst, DL, TLI, DT, AC)) -      return findValueImpl(W, DL, OffsetOk, Visited); +    if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC)) +      return findValueImpl(W, OffsetOk, Visited);    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { -    if (Value *W = ConstantFoldConstantExpression(CE, DL, TLI)) +    if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI))        if (W != V) -        return findValueImpl(W, DL, OffsetOk, Visited); +        return findValueImpl(W, OffsetOk, Visited);    }    return V; diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp index 624c5a18d679..4b2fa3c6505a 100644 --- a/contrib/llvm/lib/Analysis/Loads.cpp +++ b/contrib/llvm/lib/Analysis/Loads.cpp @@ -118,7 +118,8 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,    // from/to.  If so, the previous load or store would have already trapped,    // so there is no harm doing an extra load (also, CSE will later eliminate    // the load entirely). -  BasicBlock::iterator BBI = ScanFrom, E = ScanFrom->getParent()->begin(); +  BasicBlock::iterator BBI = ScanFrom->getIterator(), +                       E = ScanFrom->getParent()->begin();    // We can at least always strip pointer casts even though we can't use the    // base here. @@ -161,6 +162,18 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom,    return false;  } +/// DefMaxInstsToScan - the default number of maximum instructions +/// to scan in the block, used by FindAvailableLoadedValue(). +/// FindAvailableLoadedValue() was introduced in r60148, to improve jump +/// threading in part by eliminating partially redundant loads. +/// At that point, the value of MaxInstsToScan was already set to '6' +/// without documented explanation. +cl::opt<unsigned> +llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden, +  cl::desc("Use this to specify the default maximum number of instructions " +           "to scan backward from a given instruction, when searching for " +           "available loaded value")); +  /// \brief Scan the ScanBB block backwards to see if we have the value at the  /// memory address *Ptr locally available within a small number of instructions.  /// @@ -199,7 +212,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,    while (ScanFrom != ScanBB->begin()) {      // We must ignore debug info directives when counting (otherwise they      // would affect codegen). -    Instruction *Inst = --ScanFrom; +    Instruction *Inst = &*--ScanFrom;      if (isa<DbgInfoIntrinsic>(Inst))        continue; @@ -246,9 +259,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,        // If we have alias analysis and it says the store won't modify the loaded        // value, ignore the store. -      if (AA && -          (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & -           AliasAnalysis::Mod) == 0) +      if (AA && (AA->getModRefInfo(SI, StrippedPtr, AccessSize) & MRI_Mod) == 0)          continue;        // Otherwise the store that may or may not alias the pointer, bail out. @@ -261,8 +272,7 @@ Value *llvm::FindAvailableLoadedValue(Value *Ptr, BasicBlock *ScanBB,        // If alias analysis claims that it really won't modify the load,        // ignore it.        if (AA && -          (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & -           AliasAnalysis::Mod) == 0) +          (AA->getModRefInfo(Inst, StrippedPtr, AccessSize) & MRI_Mod) == 0)          continue;        // May modify the pointer, bail out. diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp index becbae4c5b50..d7896ade3543 100644 --- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp @@ -58,12 +58,12 @@ static cl::opt<unsigned> MemoryCheckMergeThreshold(  /// Maximum SIMD width.  const unsigned VectorizerParams::MaxVectorWidth = 64; -/// \brief We collect interesting dependences up to this threshold. -static cl::opt<unsigned> MaxInterestingDependence( -    "max-interesting-dependences", cl::Hidden, -    cl::desc("Maximum number of interesting dependences collected by " -             "loop-access analysis (default = 100)"), -    cl::init(100)); +/// \brief We collect dependences up to this threshold. +static cl::opt<unsigned> +    MaxDependences("max-dependences", cl::Hidden, +                   cl::desc("Maximum number of dependences collected by " +                            "loop-access analysis (default = 100)"), +                   cl::init(100));  bool VectorizerParams::isInterleaveForced() {    return ::VectorizationInterleave.getNumOccurrences() > 0; @@ -87,11 +87,10 @@ Value *llvm::stripIntegerCast(Value *V) {    return V;  } -const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,                                              const ValueToValueMap &PtrToStride,                                              Value *Ptr, Value *OrigPtr) { - -  const SCEV *OrigSCEV = SE->getSCEV(Ptr); +  const SCEV *OrigSCEV = PSE.getSCEV(Ptr);    // If there is an entry in the map return the SCEV of the pointer with the    // symbolic stride replaced by one. @@ -108,36 +107,82 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE,      ValueToValueMap RewriteMap;      RewriteMap[StrideVal] = One; -    const SCEV *ByOne = -        SCEVParameterRewriter::rewrite(OrigSCEV, *SE, RewriteMap, true); -    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne +    ScalarEvolution *SE = PSE.getSE(); +    const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal)); +    const auto *CT = +        static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType())); + +    PSE.addPredicate(*SE->getEqualPredicate(U, CT)); +    auto *Expr = PSE.getSCEV(Ptr); + +    DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr                   << "\n"); -    return ByOne; +    return Expr;    }    // Otherwise, just return the SCEV of the original pointer. -  return SE->getSCEV(Ptr); +  return OrigSCEV;  }  void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,                                      unsigned DepSetId, unsigned ASId, -                                    const ValueToValueMap &Strides) { +                                    const ValueToValueMap &Strides, +                                    PredicatedScalarEvolution &PSE) {    // Get the stride replaced scev. -  const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Ptr); +  const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);    assert(AR && "Invalid addrec expression"); +  ScalarEvolution *SE = PSE.getSE();    const SCEV *Ex = SE->getBackedgeTakenCount(Lp); + +  const SCEV *ScStart = AR->getStart();    const SCEV *ScEnd = AR->evaluateAtIteration(Ex, *SE); -  Pointers.emplace_back(Ptr, AR->getStart(), ScEnd, WritePtr, DepSetId, ASId, -                        Sc); +  const SCEV *Step = AR->getStepRecurrence(*SE); + +  // For expressions with negative step, the upper bound is ScStart and the +  // lower bound is ScEnd. +  if (const SCEVConstant *CStep = dyn_cast<const SCEVConstant>(Step)) { +    if (CStep->getValue()->isNegative()) +      std::swap(ScStart, ScEnd); +  } else { +    // Fallback case: the step is not constant, but the we can still +    // get the upper and lower bounds of the interval by using min/max +    // expressions. +    ScStart = SE->getUMinExpr(ScStart, ScEnd); +    ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); +  } + +  Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc); +} + +SmallVector<RuntimePointerChecking::PointerCheck, 4> +RuntimePointerChecking::generateChecks() const { +  SmallVector<PointerCheck, 4> Checks; + +  for (unsigned I = 0; I < CheckingGroups.size(); ++I) { +    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { +      const RuntimePointerChecking::CheckingPtrGroup &CGI = CheckingGroups[I]; +      const RuntimePointerChecking::CheckingPtrGroup &CGJ = CheckingGroups[J]; + +      if (needsChecking(CGI, CGJ)) +        Checks.push_back(std::make_pair(&CGI, &CGJ)); +    } +  } +  return Checks; +} + +void RuntimePointerChecking::generateChecks( +    MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { +  assert(Checks.empty() && "Checks is not empty"); +  groupChecks(DepCands, UseDependencies); +  Checks = generateChecks();  } -bool RuntimePointerChecking::needsChecking( -    const CheckingPtrGroup &M, const CheckingPtrGroup &N, -    const SmallVectorImpl<int> *PtrPartition) const { +bool RuntimePointerChecking::needsChecking(const CheckingPtrGroup &M, +                                           const CheckingPtrGroup &N) const {    for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)      for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J) -      if (needsChecking(M.Members[I], N.Members[J], PtrPartition)) +      if (needsChecking(M.Members[I], N.Members[J]))          return true;    return false;  } @@ -204,8 +249,31 @@ void RuntimePointerChecking::groupChecks(    CheckingGroups.clear(); +  // If we need to check two pointers to the same underlying object +  // with a non-constant difference, we shouldn't perform any pointer +  // grouping with those pointers. This is because we can easily get +  // into cases where the resulting check would return false, even when +  // the accesses are safe. +  // +  // The following example shows this: +  // for (i = 0; i < 1000; ++i) +  //   a[5000 + i * m] = a[i] + a[i + 9000] +  // +  // Here grouping gives a check of (5000, 5000 + 1000 * m) against +  // (0, 10000) which is always false. However, if m is 1, there is no +  // dependence. Not grouping the checks for a[i] and a[i + 9000] allows +  // us to perform an accurate check in this case. +  // +  // The above case requires that we have an UnknownDependence between +  // accesses to the same underlying object. This cannot happen unless +  // ShouldRetryWithRuntimeCheck is set, and therefore UseDependencies +  // is also false. In this case we will use the fallback path and create +  // separate checking groups for all pointers. +    // If we don't have the dependency partitions, construct a new -  // checking pointer group for each pointer. +  // checking pointer group for each pointer. This is also required +  // for correctness, because in this case we can have checking between +  // pointers to the same underlying object.    if (!UseDependencies) {      for (unsigned I = 0; I < Pointers.size(); ++I)        CheckingGroups.push_back(CheckingPtrGroup(I, *this)); @@ -222,7 +290,7 @@ void RuntimePointerChecking::groupChecks(    // don't process them twice.    SmallSet<unsigned, 2> Seen; -  // Go through all equivalence classes, get the the "pointer check groups" +  // Go through all equivalence classes, get the "pointer check groups"    // and add them to the overall solution. We use the order in which accesses    // appear in 'Pointers' to enforce determinism.    for (unsigned I = 0; I < Pointers.size(); ++I) { @@ -280,8 +348,14 @@ void RuntimePointerChecking::groupChecks(    }  } -bool RuntimePointerChecking::needsChecking( -    unsigned I, unsigned J, const SmallVectorImpl<int> *PtrPartition) const { +bool RuntimePointerChecking::arePointersInSamePartition( +    const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1, +    unsigned PtrIdx2) { +  return (PtrToPartition[PtrIdx1] != -1 && +          PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); +} + +bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {    const PointerInfo &PointerI = Pointers[I];    const PointerInfo &PointerJ = Pointers[J]; @@ -297,85 +371,45 @@ bool RuntimePointerChecking::needsChecking(    if (PointerI.AliasSetId != PointerJ.AliasSetId)      return false; -  // If PtrPartition is set omit checks between pointers of the same partition. -  // Partition number -1 means that the pointer is used in multiple partitions. -  // In this case we can't omit the check. -  if (PtrPartition && (*PtrPartition)[I] != -1 && -      (*PtrPartition)[I] == (*PtrPartition)[J]) -    return false; -    return true;  } -void RuntimePointerChecking::print( -    raw_ostream &OS, unsigned Depth, -    const SmallVectorImpl<int> *PtrPartition) const { - -  OS.indent(Depth) << "Run-time memory checks:\n"; - +void RuntimePointerChecking::printChecks( +    raw_ostream &OS, const SmallVectorImpl<PointerCheck> &Checks, +    unsigned Depth) const {    unsigned N = 0; -  for (unsigned I = 0; I < CheckingGroups.size(); ++I) -    for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) -      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) { -        OS.indent(Depth) << "Check " << N++ << ":\n"; -        OS.indent(Depth + 2) << "Comparing group " << I << ":\n"; - -        for (unsigned K = 0; K < CheckingGroups[I].Members.size(); ++K) { -          OS.indent(Depth + 2) -              << *Pointers[CheckingGroups[I].Members[K]].PointerValue << "\n"; -          if (PtrPartition) -            OS << " (Partition: " -               << (*PtrPartition)[CheckingGroups[I].Members[K]] << ")" -               << "\n"; -        } +  for (const auto &Check : Checks) { +    const auto &First = Check.first->Members, &Second = Check.second->Members; -        OS.indent(Depth + 2) << "Against group " << J << ":\n"; +    OS.indent(Depth) << "Check " << N++ << ":\n"; -        for (unsigned K = 0; K < CheckingGroups[J].Members.size(); ++K) { -          OS.indent(Depth + 2) -              << *Pointers[CheckingGroups[J].Members[K]].PointerValue << "\n"; -          if (PtrPartition) -            OS << " (Partition: " -               << (*PtrPartition)[CheckingGroups[J].Members[K]] << ")" -               << "\n"; -        } -      } +    OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n"; +    for (unsigned K = 0; K < First.size(); ++K) +      OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n"; -  OS.indent(Depth) << "Grouped accesses:\n"; -  for (unsigned I = 0; I < CheckingGroups.size(); ++I) { -    OS.indent(Depth + 2) << "Group " << I << ":\n"; -    OS.indent(Depth + 4) << "(Low: " << *CheckingGroups[I].Low -                         << " High: " << *CheckingGroups[I].High << ")\n"; -    for (unsigned J = 0; J < CheckingGroups[I].Members.size(); ++J) { -      OS.indent(Depth + 6) << "Member: " -                           << *Pointers[CheckingGroups[I].Members[J]].Expr -                           << "\n"; -    } +    OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n"; +    for (unsigned K = 0; K < Second.size(); ++K) +      OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";    }  } -unsigned RuntimePointerChecking::getNumberOfChecks( -    const SmallVectorImpl<int> *PtrPartition) const { - -  unsigned NumPartitions = CheckingGroups.size(); -  unsigned CheckCount = 0; +void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { -  for (unsigned I = 0; I < NumPartitions; ++I) -    for (unsigned J = I + 1; J < NumPartitions; ++J) -      if (needsChecking(CheckingGroups[I], CheckingGroups[J], PtrPartition)) -        CheckCount++; -  return CheckCount; -} +  OS.indent(Depth) << "Run-time memory checks:\n"; +  printChecks(OS, Checks, Depth); -bool RuntimePointerChecking::needsAnyChecking( -    const SmallVectorImpl<int> *PtrPartition) const { -  unsigned NumPointers = Pointers.size(); +  OS.indent(Depth) << "Grouped accesses:\n"; +  for (unsigned I = 0; I < CheckingGroups.size(); ++I) { +    const auto &CG = CheckingGroups[I]; -  for (unsigned I = 0; I < NumPointers; ++I) -    for (unsigned J = I + 1; J < NumPointers; ++J) -      if (needsChecking(I, J, PtrPartition)) -        return true; -  return false; +    OS.indent(Depth + 2) << "Group " << &CG << ":\n"; +    OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High +                         << ")\n"; +    for (unsigned J = 0; J < CG.Members.size(); ++J) { +      OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr +                           << "\n"; +    } +  }  }  namespace { @@ -390,9 +424,10 @@ public:    typedef SmallPtrSet<MemAccessInfo, 8> MemAccessInfoSet;    AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, -                 MemoryDepChecker::DepCandidates &DA) -      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), -        IsRTCheckAnalysisNeeded(false) {} +                 MemoryDepChecker::DepCandidates &DA, +                 PredicatedScalarEvolution &PSE) +      : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), +        PSE(PSE) {}    /// \brief Register a load  and whether it is only read from.    void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -435,7 +470,7 @@ public:    /// We decided that no dependence analysis would be used.  Reset the state.    void resetDepChecks(MemoryDepChecker &DepChecker) {      CheckDeps.clear(); -    DepChecker.clearInterestingDependences(); +    DepChecker.clearDependences();    }    MemAccessInfoSet &getDependenciesToCheck() { return CheckDeps; } @@ -477,14 +512,18 @@ private:    /// (i.e. ShouldRetryWithRuntimeCheck), isDependencyCheckNeeded is cleared    /// while this remains set if we have potentially dependent accesses.    bool IsRTCheckAnalysisNeeded; + +  /// The SCEV predicate containing all the SCEV-related assumptions. +  PredicatedScalarEvolution &PSE;  };  } // end anonymous namespace  /// \brief Check whether a pointer can participate in a runtime bounds check. -static bool hasComputableBounds(ScalarEvolution *SE, -                                const ValueToValueMap &Strides, Value *Ptr) { -  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Ptr); +static bool hasComputableBounds(PredicatedScalarEvolution &PSE, +                                const ValueToValueMap &Strides, Value *Ptr, +                                Loop *L) { +  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);    if (!AR)      return false; @@ -527,11 +566,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,        else          ++NumReadPtrChecks; -      if (hasComputableBounds(SE, StridesMap, Ptr) && +      if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) &&            // When we run after a failing dependency check we have to make sure            // we don't have wrapping pointers.            (!ShouldCheckStride || -           isStridedPtr(SE, Ptr, TheLoop, StridesMap) == 1)) { +           isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) {          // The id of the dependence set.          unsigned DepId; @@ -545,7 +584,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,            // Each access has its own dependence set.            DepId = RunningDepId++; -        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap); +        RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);          DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');        } else { @@ -599,9 +638,9 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,    }    if (NeedRTCheck && CanDoRT) -    RtCheck.groupChecks(DepCands, IsDepCheckNeeded); +    RtCheck.generateChecks(DepCands, IsDepCheckNeeded); -  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks(nullptr) +  DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()                 << " pointer comparisons.\n");    RtCheck.Need = NeedRTCheck; @@ -706,6 +745,11 @@ void AccessAnalysis::processMemAccesses() {            GetUnderlyingObjects(Ptr, TempObjects, DL, LI);            DEBUG(dbgs() << "Underlying objects for pointer " << *Ptr << "\n");            for (Value *UnderlyingObj : TempObjects) { +            // nullptr never alias, don't join sets for pointer that have "null" +            // in their UnderlyingObjects list. +            if (isa<ConstantPointerNull>(UnderlyingObj)) +              continue; +              UnderlyingObjToAccessMap::iterator Prev =                  ObjToLastAccess.find(UnderlyingObj);              if (Prev != ObjToLastAccess.end()) @@ -775,20 +819,20 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,  }  /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, -                       const ValueToValueMap &StridesMap) { -  const Type *Ty = Ptr->getType(); +int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, +                       const Loop *Lp, const ValueToValueMap &StridesMap) { +  Type *Ty = Ptr->getType();    assert(Ty->isPointerTy() && "Unexpected non-ptr");    // Make sure that the pointer does not point to aggregate types. -  const PointerType *PtrTy = cast<PointerType>(Ty); +  auto *PtrTy = cast<PointerType>(Ty);    if (PtrTy->getElementType()->isAggregateType()) {      DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"            << *Ptr << "\n");      return 0;    } -  const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Ptr); +  const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);    const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);    if (!AR) { @@ -811,16 +855,16 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,    // to access the pointer value "0" which is undefined behavior in address    // space 0, therefore we can also vectorize this case.    bool IsInBoundsGEP = isInBoundsGep(Ptr); -  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp); +  bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp);    bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;    if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {      DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " -          << *Ptr << " SCEV: " << *PtrScev << "\n"); +                 << *Ptr << " SCEV: " << *PtrScev << "\n");      return 0;    }    // Check the step is constant. -  const SCEV *Step = AR->getStepRecurrence(*SE); +  const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());    // Calculate the pointer stride and check if it is constant.    const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); @@ -832,7 +876,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp,    auto &DL = Lp->getHeader()->getModule()->getDataLayout();    int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType()); -  const APInt &APStepVal = C->getValue()->getValue(); +  const APInt &APStepVal = C->getAPInt();    // Huge step value - give up.    if (APStepVal.getBitWidth() > 64) @@ -872,15 +916,15 @@ bool MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {    llvm_unreachable("unexpected DepType!");  } -bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) { +bool MemoryDepChecker::Dependence::isBackward() const {    switch (Type) {    case NoDep:    case Forward: +  case ForwardButPreventsForwarding: +  case Unknown:      return false;    case BackwardVectorizable: -  case Unknown: -  case ForwardButPreventsForwarding:    case Backward:    case BackwardVectorizableButPreventsForwarding:      return true; @@ -889,17 +933,21 @@ bool MemoryDepChecker::Dependence::isInterestingDependence(DepType Type) {  }  bool MemoryDepChecker::Dependence::isPossiblyBackward() const { +  return isBackward() || Type == Unknown; +} + +bool MemoryDepChecker::Dependence::isForward() const {    switch (Type) { -  case NoDep:    case Forward:    case ForwardButPreventsForwarding: -    return false; +    return true; +  case NoDep:    case Unknown:    case BackwardVectorizable:    case Backward:    case BackwardVectorizableButPreventsForwarding: -    return true; +    return false;    }    llvm_unreachable("unexpected DepType!");  } @@ -999,11 +1047,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,        BPtr->getType()->getPointerAddressSpace())      return Dependence::Unknown; -  const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, APtr); -  const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, BPtr); +  const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr); +  const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr); -  int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides); -  int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides); +  int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides); +  int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides);    const SCEV *Src = AScev;    const SCEV *Sink = BScev; @@ -1020,12 +1068,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,      std::swap(StrideAPtr, StrideBPtr);    } -  const SCEV *Dist = SE->getMinusSCEV(Sink, Src); +  const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);    DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink -        << "(Induction step: " << StrideAPtr <<  ")\n"); +               << "(Induction step: " << StrideAPtr << ")\n");    DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " -        << *InstMap[BIdx] << ": " << *Dist << "\n"); +               << *InstMap[BIdx] << ": " << *Dist << "\n");    // Need accesses with constant stride. We don't want to vectorize    // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in @@ -1048,7 +1096,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,    unsigned TypeByteSize = DL.getTypeAllocSize(ATy);    // Negative distances are not plausible dependencies. -  const APInt &Val = C->getValue()->getValue(); +  const APInt &Val = C->getAPInt();    if (Val.isNegative()) {      bool IsTrueDataDependence = (AIsWrite && !BIsWrite);      if (IsTrueDataDependence && @@ -1064,7 +1112,7 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,    // Could be improved to assert type sizes are the same (i32 == float, etc).    if (Val == 0) {      if (ATy == BTy) -      return Dependence::NoDep; +      return Dependence::Forward;      DEBUG(dbgs() << "LAA: Zero dependence difference but different types\n");      return Dependence::Unknown;    } @@ -1203,22 +1251,21 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,                  isDependent(*A.first, A.second, *B.first, B.second, Strides);              SafeForVectorization &= Dependence::isSafeForVectorization(Type); -            // Gather dependences unless we accumulated MaxInterestingDependence +            // Gather dependences unless we accumulated MaxDependences              // dependences.  In that case return as soon as we find the first              // unsafe dependence.  This puts a limit on this quadratic              // algorithm. -            if (RecordInterestingDependences) { -              if (Dependence::isInterestingDependence(Type)) -                InterestingDependences.push_back( -                    Dependence(A.second, B.second, Type)); - -              if (InterestingDependences.size() >= MaxInterestingDependence) { -                RecordInterestingDependences = false; -                InterestingDependences.clear(); +            if (RecordDependences) { +              if (Type != Dependence::NoDep) +                Dependences.push_back(Dependence(A.second, B.second, Type)); + +              if (Dependences.size() >= MaxDependences) { +                RecordDependences = false; +                Dependences.clear();                  DEBUG(dbgs() << "Too many dependences, stopped recording\n");                }              } -            if (!RecordInterestingDependences && !SafeForVectorization) +            if (!RecordDependences && !SafeForVectorization)                return false;            }          ++OI; @@ -1227,8 +1274,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,      }    } -  DEBUG(dbgs() << "Total Interesting Dependences: " -               << InterestingDependences.size() << "\n"); +  DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");    return SafeForVectorization;  } @@ -1298,10 +1344,10 @@ bool LoopAccessInfo::canAnalyzeLoop() {    }    // ScalarEvolution needs to be able to find the exit count. -  const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); -  if (ExitCount == SE->getCouldNotCompute()) { -    emitAnalysis(LoopAccessReport() << -                 "could not determine number of loop iterations"); +  const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); +  if (ExitCount == PSE.getSE()->getCouldNotCompute()) { +    emitAnalysis(LoopAccessReport() +                 << "could not determine number of loop iterations");      DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");      return false;    } @@ -1370,7 +1416,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {        if (it->mayWriteToMemory()) {          StoreInst *St = dyn_cast<StoreInst>(it);          if (!St) { -          emitAnalysis(LoopAccessReport(it) << +          emitAnalysis(LoopAccessReport(&*it) <<                         "instruction cannot be vectorized");            CanVecMem = false;            return; @@ -1402,7 +1448,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {    MemoryDepChecker::DepCandidates DependentAccesses;    AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), -                          AA, LI, DependentAccesses); +                          AA, LI, DependentAccesses, PSE);    // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects    // multiple times on the same object. If the ptr is accessed twice, once @@ -1453,7 +1499,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {      // read a few words, modify, and write a few words, and some of the      // words may be written to the same address.      bool IsReadOnlyPtr = false; -    if (Seen.insert(Ptr).second || !isStridedPtr(SE, Ptr, TheLoop, Strides)) { +    if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) {        ++NumReads;        IsReadOnlyPtr = true;      } @@ -1483,7 +1529,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {    // Find pointers with computable bounds. We are going to use this information    // to place a runtime bound check.    bool CanDoRTIfNeeded = -      Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); +      Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides);    if (!CanDoRTIfNeeded) {      emitAnalysis(LoopAccessReport() << "cannot identify array bounds");      DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " @@ -1510,6 +1556,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) {        PtrRtChecking.reset();        PtrRtChecking.Need = true; +      auto *SE = PSE.getSE();        CanDoRTIfNeeded =            Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); @@ -1552,7 +1599,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {  }  bool LoopAccessInfo::isUniform(Value *V) const { -  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); +  return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop));  }  // FIXME: this function is currently a duplicate of the one in @@ -1566,86 +1613,115 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,    return nullptr;  } -std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck( -    Instruction *Loc, const SmallVectorImpl<int> *PtrPartition) const { -  if (!PtrRtChecking.Need) -    return std::make_pair(nullptr, nullptr); +namespace { +/// \brief IR Values for the lower and upper bounds of a pointer evolution.  We +/// need to use value-handles because SCEV expansion can invalidate previously +/// expanded values.  Thus expansion of a pointer can invalidate the bounds for +/// a previous one. +struct PointerBounds { +  TrackingVH<Value> Start; +  TrackingVH<Value> End; +}; +} // end anonymous namespace -  SmallVector<TrackingVH<Value>, 2> Starts; -  SmallVector<TrackingVH<Value>, 2> Ends; +/// \brief Expand code for the lower and upper bound of the pointer group \p CG +/// in \p TheLoop.  \return the values for the bounds. +static PointerBounds +expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop, +             Instruction *Loc, SCEVExpander &Exp, ScalarEvolution *SE, +             const RuntimePointerChecking &PtrRtChecking) { +  Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue; +  const SCEV *Sc = SE->getSCEV(Ptr); + +  if (SE->isLoopInvariant(Sc, TheLoop)) { +    DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr +                 << "\n"); +    return {Ptr, Ptr}; +  } else { +    unsigned AS = Ptr->getType()->getPointerAddressSpace(); +    LLVMContext &Ctx = Loc->getContext(); + +    // Use this type for pointer arithmetic. +    Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); +    Value *Start = nullptr, *End = nullptr; + +    DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); +    Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc); +    End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc); +    DEBUG(dbgs() << "Start: " << *CG->Low << " End: " << *CG->High << "\n"); +    return {Start, End}; +  } +} -  LLVMContext &Ctx = Loc->getContext(); -  SCEVExpander Exp(*SE, DL, "induction"); -  Instruction *FirstInst = nullptr; +/// \brief Turns a collection of checks into a collection of expanded upper and +/// lower bounds for both pointers in the check. +static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds( +    const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks, +    Loop *L, Instruction *Loc, ScalarEvolution *SE, SCEVExpander &Exp, +    const RuntimePointerChecking &PtrRtChecking) { +  SmallVector<std::pair<PointerBounds, PointerBounds>, 4> ChecksWithBounds; + +  // Here we're relying on the SCEV Expander's cache to only emit code for the +  // same bounds once. +  std::transform( +      PointerChecks.begin(), PointerChecks.end(), +      std::back_inserter(ChecksWithBounds), +      [&](const RuntimePointerChecking::PointerCheck &Check) { +        PointerBounds +          First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking), +          Second = expandBounds(Check.second, L, Loc, Exp, SE, PtrRtChecking); +        return std::make_pair(First, Second); +      }); + +  return ChecksWithBounds; +} -  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { -    const RuntimePointerChecking::CheckingPtrGroup &CG = -        PtrRtChecking.CheckingGroups[i]; -    Value *Ptr = PtrRtChecking.Pointers[CG.Members[0]].PointerValue; -    const SCEV *Sc = SE->getSCEV(Ptr); - -    if (SE->isLoopInvariant(Sc, TheLoop)) { -      DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr -                   << "\n"); -      Starts.push_back(Ptr); -      Ends.push_back(Ptr); -    } else { -      unsigned AS = Ptr->getType()->getPointerAddressSpace(); - -      // Use this type for pointer arithmetic. -      Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS); -      Value *Start = nullptr, *End = nullptr; - -      DEBUG(dbgs() << "LAA: Adding RT check for range:\n"); -      Start = Exp.expandCodeFor(CG.Low, PtrArithTy, Loc); -      End = Exp.expandCodeFor(CG.High, PtrArithTy, Loc); -      DEBUG(dbgs() << "Start: " << *CG.Low << " End: " << *CG.High << "\n"); -      Starts.push_back(Start); -      Ends.push_back(End); -    } -  } +std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeChecks( +    Instruction *Loc, +    const SmallVectorImpl<RuntimePointerChecking::PointerCheck> &PointerChecks) +    const { +  auto *SE = PSE.getSE(); +  SCEVExpander Exp(*SE, DL, "induction"); +  auto ExpandedChecks = +      expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); +  LLVMContext &Ctx = Loc->getContext(); +  Instruction *FirstInst = nullptr;    IRBuilder<> ChkBuilder(Loc);    // Our instructions might fold to a constant.    Value *MemoryRuntimeCheck = nullptr; -  for (unsigned i = 0; i < PtrRtChecking.CheckingGroups.size(); ++i) { -    for (unsigned j = i + 1; j < PtrRtChecking.CheckingGroups.size(); ++j) { -      const RuntimePointerChecking::CheckingPtrGroup &CGI = -          PtrRtChecking.CheckingGroups[i]; -      const RuntimePointerChecking::CheckingPtrGroup &CGJ = -          PtrRtChecking.CheckingGroups[j]; - -      if (!PtrRtChecking.needsChecking(CGI, CGJ, PtrPartition)) -        continue; -      unsigned AS0 = Starts[i]->getType()->getPointerAddressSpace(); -      unsigned AS1 = Starts[j]->getType()->getPointerAddressSpace(); - -      assert((AS0 == Ends[j]->getType()->getPointerAddressSpace()) && -             (AS1 == Ends[i]->getType()->getPointerAddressSpace()) && -             "Trying to bounds check pointers with different address spaces"); - -      Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); -      Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); - -      Value *Start0 = ChkBuilder.CreateBitCast(Starts[i], PtrArithTy0, "bc"); -      Value *Start1 = ChkBuilder.CreateBitCast(Starts[j], PtrArithTy1, "bc"); -      Value *End0 =   ChkBuilder.CreateBitCast(Ends[i],   PtrArithTy1, "bc"); -      Value *End1 =   ChkBuilder.CreateBitCast(Ends[j],   PtrArithTy0, "bc"); - -      Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); -      FirstInst = getFirstInst(FirstInst, Cmp0, Loc); -      Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); -      FirstInst = getFirstInst(FirstInst, Cmp1, Loc); -      Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); +  for (const auto &Check : ExpandedChecks) { +    const PointerBounds &A = Check.first, &B = Check.second; +    // Check if two pointers (A and B) conflict where conflict is computed as: +    // start(A) <= end(B) && start(B) <= end(A) +    unsigned AS0 = A.Start->getType()->getPointerAddressSpace(); +    unsigned AS1 = B.Start->getType()->getPointerAddressSpace(); + +    assert((AS0 == B.End->getType()->getPointerAddressSpace()) && +           (AS1 == A.End->getType()->getPointerAddressSpace()) && +           "Trying to bounds check pointers with different address spaces"); + +    Type *PtrArithTy0 = Type::getInt8PtrTy(Ctx, AS0); +    Type *PtrArithTy1 = Type::getInt8PtrTy(Ctx, AS1); + +    Value *Start0 = ChkBuilder.CreateBitCast(A.Start, PtrArithTy0, "bc"); +    Value *Start1 = ChkBuilder.CreateBitCast(B.Start, PtrArithTy1, "bc"); +    Value *End0 =   ChkBuilder.CreateBitCast(A.End,   PtrArithTy1, "bc"); +    Value *End1 =   ChkBuilder.CreateBitCast(B.End,   PtrArithTy0, "bc"); + +    Value *Cmp0 = ChkBuilder.CreateICmpULE(Start0, End1, "bound0"); +    FirstInst = getFirstInst(FirstInst, Cmp0, Loc); +    Value *Cmp1 = ChkBuilder.CreateICmpULE(Start1, End0, "bound1"); +    FirstInst = getFirstInst(FirstInst, Cmp1, Loc); +    Value *IsConflict = ChkBuilder.CreateAnd(Cmp0, Cmp1, "found.conflict"); +    FirstInst = getFirstInst(FirstInst, IsConflict, Loc); +    if (MemoryRuntimeCheck) { +      IsConflict = +          ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, "conflict.rdx");        FirstInst = getFirstInst(FirstInst, IsConflict, Loc); -      if (MemoryRuntimeCheck) { -        IsConflict = ChkBuilder.CreateOr(MemoryRuntimeCheck, IsConflict, -                                         "conflict.rdx"); -        FirstInst = getFirstInst(FirstInst, IsConflict, Loc); -      } -      MemoryRuntimeCheck = IsConflict;      } +    MemoryRuntimeCheck = IsConflict;    }    if (!MemoryRuntimeCheck) @@ -1661,12 +1737,20 @@ std::pair<Instruction *, Instruction *> LoopAccessInfo::addRuntimeCheck(    return std::make_pair(FirstInst, Check);  } +std::pair<Instruction *, Instruction *> +LoopAccessInfo::addRuntimeChecks(Instruction *Loc) const { +  if (!PtrRtChecking.Need) +    return std::make_pair(nullptr, nullptr); + +  return addRuntimeChecks(Loc, PtrRtChecking.getChecks()); +} +  LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,                                 const DataLayout &DL,                                 const TargetLibraryInfo *TLI, AliasAnalysis *AA,                                 DominatorTree *DT, LoopInfo *LI,                                 const ValueToValueMap &Strides) -    : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), +    : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL),        TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0),        MaxSafeDepDistBytes(-1U), CanVecMem(false),        StoreToLoopInvariantAddress(false) { @@ -1685,14 +1769,14 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {    if (Report)      OS.indent(Depth) << "Report: " << Report->str() << "\n"; -  if (auto *InterestingDependences = DepChecker.getInterestingDependences()) { -    OS.indent(Depth) << "Interesting Dependences:\n"; -    for (auto &Dep : *InterestingDependences) { +  if (auto *Dependences = DepChecker.getDependences()) { +    OS.indent(Depth) << "Dependences:\n"; +    for (auto &Dep : *Dependences) {        Dep.print(OS, Depth + 2, DepChecker.getMemoryInstructions());        OS << "\n";      }    } else -    OS.indent(Depth) << "Too many interesting dependences, not recorded\n"; +    OS.indent(Depth) << "Too many dependences, not recorded\n";    // List the pair of accesses need run-time checks to prove independence.    PtrRtChecking.print(OS, Depth); @@ -1701,6 +1785,9 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {    OS.indent(Depth) << "Store to invariant address was "                     << (StoreToLoopInvariantAddress ? "" : "not ")                     << "found in loop.\n"; + +  OS.indent(Depth) << "SCEV assumptions:\n"; +  PSE.getUnionPredicate().print(OS, Depth);  }  const LoopAccessInfo & @@ -1714,8 +1801,8 @@ LoopAccessAnalysis::getInfo(Loop *L, const ValueToValueMap &Strides) {    if (!LAI) {      const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); -    LAI = llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, -                                            Strides); +    LAI = +        llvm::make_unique<LoopAccessInfo>(L, SE, DL, TLI, AA, DT, LI, Strides);  #ifndef NDEBUG      LAI->NumSymbolicStrides = Strides.size();  #endif @@ -1737,10 +1824,10 @@ void LoopAccessAnalysis::print(raw_ostream &OS, const Module *M) const {  }  bool LoopAccessAnalysis::runOnFunction(Function &F) { -  SE = &getAnalysis<ScalarEvolution>(); +  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();    auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();    TLI = TLIP ? &TLIP->getTLI() : nullptr; -  AA = &getAnalysis<AliasAnalysis>(); +  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();    DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();    LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); @@ -1748,8 +1835,8 @@ bool LoopAccessAnalysis::runOnFunction(Function &F) {  }  void LoopAccessAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { -    AU.addRequired<ScalarEvolution>(); -    AU.addRequired<AliasAnalysis>(); +    AU.addRequired<ScalarEvolutionWrapperPass>(); +    AU.addRequired<AAResultsWrapperPass>();      AU.addRequired<DominatorTreeWrapperPass>();      AU.addRequired<LoopInfoWrapperPass>(); @@ -1761,8 +1848,8 @@ static const char laa_name[] = "Loop Access Analysis";  #define LAA_NAME "loop-accesses"  INITIALIZE_PASS_BEGIN(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)  INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)  INITIALIZE_PASS_END(LoopAccessAnalysis, LAA_NAME, laa_name, false, true) diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index 6b6faf8a66c3..9ab9eead584f 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -102,8 +102,8 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,      return false;    if (I->mayReadFromMemory())      return false; -  // The landingpad instruction is immobile. -  if (isa<LandingPadInst>(I)) +  // EH block instructions are immobile. +  if (I->isEHPad())      return false;    // Determine the insertion point, unless one was given.    if (!InsertPt) { @@ -120,6 +120,13 @@ bool Loop::makeLoopInvariant(Instruction *I, bool &Changed,    // Hoist.    I->moveBefore(InsertPt); + +  // There is possibility of hoisting this instruction above some arbitrary +  // condition. Any metadata defined on it can be control dependent on this +  // condition. Conservatively strip it here so that we don't give any wrong +  // information to the optimizer. +  I->dropUnknownNonDebugMetadata(); +    Changed = true;    return true;  } @@ -172,7 +179,13 @@ PHINode *Loop::getCanonicalInductionVariable() const {  bool Loop::isLCSSAForm(DominatorTree &DT) const {    for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {      BasicBlock *BB = *BI; -    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) +    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) { +      // Tokens can't be used in PHI nodes and live-out tokens prevent loop +      // optimizations, so for the purposes of considered LCSSA form, we +      // can ignore them. +      if (I->getType()->isTokenTy()) +        continue; +        for (Use &U : I->uses()) {          Instruction *UI = cast<Instruction>(U.getUser());          BasicBlock *UserBB = UI->getParent(); @@ -188,11 +201,21 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const {              DT.isReachableFromEntry(UserBB))            return false;        } +    }    }    return true;  } +bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const { +  if (!isLCSSAForm(DT)) +    return false; + +  return std::all_of(begin(), end(), [&](const Loop *L) { +    return L->isRecursivelyLCSSAForm(DT); +  }); +} +  /// isLoopSimplifyForm - Return true if the Loop is in the form that  /// the LoopSimplify form transforms loops to, which is sometimes called  /// normal form. @@ -211,15 +234,23 @@ bool Loop::isSafeToClone() const {      if (isa<IndirectBrInst>((*I)->getTerminator()))        return false; -    if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) +    if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {        if (II->cannotDuplicate())          return false; +      // Return false if any loop blocks contain invokes to EH-pads other than +      // landingpads;  we don't know how to split those edges yet. +      auto *FirstNonPHI = II->getUnwindDest()->getFirstNonPHI(); +      if (FirstNonPHI->isEHPad() && !isa<LandingPadInst>(FirstNonPHI)) +        return false; +    }      for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {        if (const CallInst *CI = dyn_cast<CallInst>(BI)) {          if (CI->cannotDuplicate())            return false;        } +      if (BI->getType()->isTokenTy() && BI->isUsedOutsideOfBlock(*I)) +        return false;      }    }    return true; @@ -602,14 +633,12 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {    return NearLoop;  } -/// updateUnloop - The last backedge has been removed from a loop--now the -/// "unloop". Find a new parent for the blocks contained within unloop and -/// update the loop tree. We don't necessarily have valid dominators at this -/// point, but LoopInfo is still valid except for the removal of this loop. -/// -/// Note that Unloop may now be an empty loop. Calling Loop::getHeader without -/// checking first is illegal. +LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) { +  analyze(DomTree); +} +  void LoopInfo::updateUnloop(Loop *Unloop) { +  Unloop->markUnlooped();    // First handle the special case of no parent loop to simplify the algorithm.    if (!Unloop->getParentLoop()) { @@ -675,7 +704,7 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> *AM) {    // objects. I don't want to add that kind of complexity until the scope of    // the problem is better understood.    LoopInfo LI; -  LI.Analyze(AM->getResult<DominatorTreeAnalysis>(F)); +  LI.analyze(AM->getResult<DominatorTreeAnalysis>(F));    return LI;  } @@ -685,6 +714,20 @@ PreservedAnalyses LoopPrinterPass::run(Function &F,    return PreservedAnalyses::all();  } +PrintLoopPass::PrintLoopPass() : OS(dbgs()) {} +PrintLoopPass::PrintLoopPass(raw_ostream &OS, const std::string &Banner) +    : OS(OS), Banner(Banner) {} + +PreservedAnalyses PrintLoopPass::run(Loop &L) { +  OS << Banner; +  for (auto *Block : L.blocks()) +    if (Block) +      Block->print(OS); +    else +      OS << "Printing <null> block"; +  return PreservedAnalyses::all(); +} +  //===----------------------------------------------------------------------===//  // LoopInfo implementation  // @@ -698,7 +741,7 @@ INITIALIZE_PASS_END(LoopInfoWrapperPass, "loops", "Natural Loop Information",  bool LoopInfoWrapperPass::runOnFunction(Function &) {    releaseMemory(); -  LI.Analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree()); +  LI.analyze(getAnalysis<DominatorTreeWrapperPass>().getDomTree());    return false;  } diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index e9fcf02118b9..dc424734dd56 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -16,6 +16,7 @@  #include "llvm/Analysis/LoopPass.h"  #include "llvm/IR/IRPrintingPasses.h"  #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/Timer.h"  #include "llvm/Support/raw_ostream.h" @@ -27,35 +28,26 @@ namespace {  /// PrintLoopPass - Print a Function corresponding to a Loop.  /// -class PrintLoopPass : public LoopPass { -private: -  std::string Banner; -  raw_ostream &Out;       // raw_ostream to print on. +class PrintLoopPassWrapper : public LoopPass { +  PrintLoopPass P;  public:    static char ID; -  PrintLoopPass(const std::string &B, raw_ostream &o) -      : LoopPass(ID), Banner(B), Out(o) {} +  PrintLoopPassWrapper() : LoopPass(ID) {} +  PrintLoopPassWrapper(raw_ostream &OS, const std::string &Banner) +      : LoopPass(ID), P(OS, Banner) {}    void getAnalysisUsage(AnalysisUsage &AU) const override {      AU.setPreservesAll();    }    bool runOnLoop(Loop *L, LPPassManager &) override { -    Out << Banner; -    for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); -         b != be; -         ++b) { -      if (*b) -        (*b)->print(Out); -      else -        Out << "Printing <null> block"; -    } +    P.run(*L);      return false;    }  }; -char PrintLoopPass::ID = 0; +char PrintLoopPassWrapper::ID = 0;  }  //===----------------------------------------------------------------------===// @@ -66,81 +58,34 @@ char LPPassManager::ID = 0;  LPPassManager::LPPassManager()    : FunctionPass(ID), PMDataManager() { -  skipThisLoop = false; -  redoThisLoop = false;    LI = nullptr;    CurrentLoop = nullptr;  } -/// Delete loop from the loop queue and loop hierarchy (LoopInfo). -void LPPassManager::deleteLoopFromQueue(Loop *L) { - -  LI->updateUnloop(L); - -  // Notify passes that the loop is being deleted. -  deleteSimpleAnalysisLoop(L); - -  // If L is current loop then skip rest of the passes and let -  // runOnFunction remove L from LQ. Otherwise, remove L from LQ now -  // and continue applying other passes on CurrentLoop. -  if (CurrentLoop == L) -    skipThisLoop = true; - -  delete L; - -  if (skipThisLoop) -    return; - -  for (std::deque<Loop *>::iterator I = LQ.begin(), -         E = LQ.end(); I != E; ++I) { -    if (*I == L) { -      LQ.erase(I); -      break; -    } -  } -} -  // Inset loop into loop nest (LoopInfo) and loop queue (LQ). -void LPPassManager::insertLoop(Loop *L, Loop *ParentLoop) { - -  assert (CurrentLoop != L && "Cannot insert CurrentLoop"); +Loop &LPPassManager::addLoop(Loop *ParentLoop) { +  // Create a new loop. LI will take ownership. +  Loop *L = new Loop(); -  // Insert into loop nest -  if (ParentLoop) -    ParentLoop->addChildLoop(L); -  else +  // Insert into the loop nest and the loop queue. +  if (!ParentLoop) { +    // This is the top level loop.      LI->addTopLevelLoop(L); - -  insertLoopIntoQueue(L); -} - -void LPPassManager::insertLoopIntoQueue(Loop *L) { -  // Insert L into loop queue -  if (L == CurrentLoop) -    redoLoop(L); -  else if (!L->getParentLoop()) -    // This is top level loop.      LQ.push_front(L); -  else { -    // Insert L after the parent loop. -    for (std::deque<Loop *>::iterator I = LQ.begin(), -           E = LQ.end(); I != E; ++I) { -      if (*I == L->getParentLoop()) { -        // deque does not support insert after. -        ++I; -        LQ.insert(I, 1, L); -        break; -      } -    } +    return *L;    } -} -// Reoptimize this loop. LPPassManager will re-insert this loop into the -// queue. This allows LoopPass to change loop nest for the loop. This -// utility may send LPPassManager into infinite loops so use caution. -void LPPassManager::redoLoop(Loop *L) { -  assert (CurrentLoop == L && "Can redo only CurrentLoop"); -  redoThisLoop = true; +  ParentLoop->addChildLoop(L); +  // Insert L into the loop queue after the parent loop. +  for (auto I = LQ.begin(), E = LQ.end(); I != E; ++I) { +    if (*I == L->getParentLoop()) { +      // deque does not support insert after. +      ++I; +      LQ.insert(I, 1, L); +      break; +    } +  } +  return *L;  }  /// cloneBasicBlockSimpleAnalysis - Invoke cloneBasicBlockAnalysis hook for @@ -230,10 +175,7 @@ bool LPPassManager::runOnFunction(Function &F) {    // Walk Loops    while (!LQ.empty()) { -    CurrentLoop  = LQ.back(); -    skipThisLoop = false; -    redoThisLoop = false; - +    CurrentLoop = LQ.back();      // Run all passes on the current Loop.      for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {        LoopPass *P = getContainedPass(Index); @@ -253,11 +195,15 @@ bool LPPassManager::runOnFunction(Function &F) {        if (Changed)          dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG, -                     skipThisLoop ? "<deleted>" : -                                    CurrentLoop->getHeader()->getName()); +                     CurrentLoop->isUnloop() +                         ? "<deleted>" +                         : CurrentLoop->getHeader()->getName());        dumpPreservedSet(P); -      if (!skipThisLoop) { +      if (CurrentLoop->isUnloop()) { +        // Notify passes that the loop is being deleted. +        deleteSimpleAnalysisLoop(CurrentLoop); +      } else {          // Manually check that this loop is still healthy. This is done          // instead of relying on LoopInfo::verifyLoop since LoopInfo          // is a function pass and it's really expensive to verify every @@ -276,12 +222,12 @@ bool LPPassManager::runOnFunction(Function &F) {        removeNotPreservedAnalysis(P);        recordAvailableAnalysis(P); -      removeDeadPasses(P, -                       skipThisLoop ? "<deleted>" : -                                      CurrentLoop->getHeader()->getName(), +      removeDeadPasses(P, CurrentLoop->isUnloop() +                              ? "<deleted>" +                              : CurrentLoop->getHeader()->getName(),                         ON_LOOP_MSG); -      if (skipThisLoop) +      if (CurrentLoop->isUnloop())          // Do not run other passes on this loop.          break;      } @@ -289,17 +235,16 @@ bool LPPassManager::runOnFunction(Function &F) {      // If the loop was deleted, release all the loop passes. This frees up      // some memory, and avoids trouble with the pass manager trying to call      // verifyAnalysis on them. -    if (skipThisLoop) +    if (CurrentLoop->isUnloop()) {        for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {          Pass *P = getContainedPass(Index);          freePass(P, "<deleted>", ON_LOOP_MSG);        } +      delete CurrentLoop; +    }      // Pop the loop from queue after running all passes.      LQ.pop_back(); - -    if (redoThisLoop) -      LQ.push_back(CurrentLoop);    }    // Finalization @@ -327,7 +272,7 @@ void LPPassManager::dumpPassStructure(unsigned Offset) {  Pass *LoopPass::createPrinterPass(raw_ostream &O,                                    const std::string &Banner) const { -  return new PrintLoopPass(Banner, O); +  return new PrintLoopPassWrapper(O, Banner);  }  // Check if this pass is suitable for the current LPPassManager, if diff --git a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp index da3b829b6d31..078cefe51807 100644 --- a/contrib/llvm/lib/Analysis/MemDepPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDepPrinter.cpp @@ -49,7 +49,7 @@ namespace {      void print(raw_ostream &OS, const Module * = nullptr) const override;      void getAnalysisUsage(AnalysisUsage &AU) const override { -      AU.addRequiredTransitive<AliasAnalysis>(); +      AU.addRequiredTransitive<AAResultsWrapperPass>();        AU.addRequiredTransitive<MemoryDependenceAnalysis>();        AU.setPreservesAll();      } @@ -96,7 +96,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {    // All this code uses non-const interfaces because MemDep is not    // const-friendly, though nothing is actually modified. -  for (auto &I : inst_range(F)) { +  for (auto &I : instructions(F)) {      Instruction *Inst = &I;      if (!Inst->mayReadFromMemory() && !Inst->mayWriteToMemory()) @@ -135,7 +135,7 @@ bool MemDepPrinter::runOnFunction(Function &F) {  }  void MemDepPrinter::print(raw_ostream &OS, const Module *M) const { -  for (const auto &I : inst_range(*F)) { +  for (const auto &I : instructions(*F)) {      const Instruction *Inst = &I;      DepSetMap::const_iterator DI = Deps.find(Inst); diff --git a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp index fa292a28ec87..36f1424c8cf9 100644 --- a/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp +++ b/contrib/llvm/lib/Analysis/MemDerefPrinter.cpp @@ -22,7 +22,8 @@ using namespace llvm;  namespace {    struct MemDerefPrinter : public FunctionPass { -    SmallVector<Value *, 4> Vec; +    SmallVector<Value *, 4> Deref; +    SmallPtrSet<Value *, 4> DerefAndAligned;      static char ID; // Pass identification, replacement for typeid      MemDerefPrinter() : FunctionPass(ID) { @@ -34,7 +35,8 @@ namespace {      bool runOnFunction(Function &F) override;      void print(raw_ostream &OS, const Module * = nullptr) const override;      void releaseMemory() override { -      Vec.clear(); +      Deref.clear(); +      DerefAndAligned.clear();      }    };  } @@ -51,11 +53,13 @@ FunctionPass *llvm::createMemDerefPrinter() {  bool MemDerefPrinter::runOnFunction(Function &F) {    const DataLayout &DL = F.getParent()->getDataLayout(); -  for (auto &I: inst_range(F)) { +  for (auto &I: instructions(F)) {      if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {        Value *PO = LI->getPointerOperand();        if (isDereferenceablePointer(PO, DL)) -        Vec.push_back(PO); +        Deref.push_back(PO); +      if (isDereferenceableAndAlignedPointer(PO, LI->getAlignment(), DL)) +        DerefAndAligned.insert(PO);      }    }    return false; @@ -63,8 +67,12 @@ bool MemDerefPrinter::runOnFunction(Function &F) {  void MemDerefPrinter::print(raw_ostream &OS, const Module *M) const {    OS << "The following are dereferenceable:\n"; -  for (auto &V: Vec) { +  for (Value *V: Deref) {      V->print(OS); +    if (DerefAndAligned.count(V)) +      OS << "\t(aligned)"; +    else +      OS << "\t(unaligned)";      OS << "\n\n";    }  } diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 8ddac8ffb971..b19ecadd3161 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -31,7 +31,7 @@ using namespace llvm;  #define DEBUG_TYPE "memory-builtins" -enum AllocType { +enum AllocType : uint8_t {    OpNewLike          = 1<<0, // allocates; never returns null    MallocLike         = 1<<1 | OpNewLike, // allocates; may return null    CallocLike         = 1<<2, // allocates + bzero @@ -62,6 +62,14 @@ static const AllocFnsTy AllocationFnData[] = {    {LibFunc::ZnajRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new[](unsigned int, nothrow)    {LibFunc::Znam,                OpNewLike,   1, 0,  -1}, // new[](unsigned long)    {LibFunc::ZnamRKSt9nothrow_t,  MallocLike,  2, 0,  -1}, // new[](unsigned long, nothrow) +  {LibFunc::msvc_new_int,         OpNewLike,   1, 0,  -1}, // new(unsigned int) +  {LibFunc::msvc_new_int_nothrow, MallocLike,  2, 0,  -1}, // new(unsigned int, nothrow) +  {LibFunc::msvc_new_longlong,         OpNewLike,   1, 0,  -1}, // new(unsigned long long) +  {LibFunc::msvc_new_longlong_nothrow, MallocLike,  2, 0,  -1}, // new(unsigned long long, nothrow) +  {LibFunc::msvc_new_array_int,         OpNewLike,   1, 0,  -1}, // new[](unsigned int) +  {LibFunc::msvc_new_array_int_nothrow, MallocLike,  2, 0,  -1}, // new[](unsigned int, nothrow) +  {LibFunc::msvc_new_array_longlong,         OpNewLike,   1, 0,  -1}, // new[](unsigned long long) +  {LibFunc::msvc_new_array_longlong_nothrow, MallocLike,  2, 0,  -1}, // new[](unsigned long long, nothrow)    {LibFunc::calloc,              CallocLike,  2, 0,   1},    {LibFunc::realloc,             ReallocLike, 2, 1,  -1},    {LibFunc::reallocf,            ReallocLike, 2, 1,  -1}, @@ -107,18 +115,13 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy,    if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))      return nullptr; -  unsigned i = 0; -  bool found = false; -  for ( ; i < array_lengthof(AllocationFnData); ++i) { -    if (AllocationFnData[i].Func == TLIFn) { -      found = true; -      break; -    } -  } -  if (!found) +  const AllocFnsTy *FnData = +      std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData), +                   [TLIFn](const AllocFnsTy &Fn) { return Fn.Func == TLIFn; }); + +  if (FnData == std::end(AllocationFnData))      return nullptr; -  const AllocFnsTy *FnData = &AllocationFnData[i];    if ((FnData->AllocTy & AllocTy) != FnData->AllocTy)      return nullptr; @@ -185,13 +188,6 @@ bool llvm::isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,  }  /// \brief Tests if a value is a call or invoke to a library function that -/// reallocates memory (such as realloc). -bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, -                           bool LookThroughBitCast) { -  return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast); -} - -/// \brief Tests if a value is a call or invoke to a library function that  /// allocates memory and never returns null (such as operator new).  bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI,                                 bool LookThroughBitCast) { @@ -313,14 +309,26 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) {    unsigned ExpectedNumParams;    if (TLIFn == LibFunc::free ||        TLIFn == LibFunc::ZdlPv || // operator delete(void*) -      TLIFn == LibFunc::ZdaPv)   // operator delete[](void*) +      TLIFn == LibFunc::ZdaPv || // operator delete[](void*) +      TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*) +      TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*) +      TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*) +      TLIFn == LibFunc::msvc_delete_array_ptr64)   // operator delete[](void*)      ExpectedNumParams = 1;    else if (TLIFn == LibFunc::ZdlPvj ||              // delete(void*, uint)             TLIFn == LibFunc::ZdlPvm ||              // delete(void*, ulong)             TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow)             TLIFn == LibFunc::ZdaPvj ||              // delete[](void*, uint)             TLIFn == LibFunc::ZdaPvm ||              // delete[](void*, ulong) -           TLIFn == LibFunc::ZdaPvRKSt9nothrow_t)   // delete[](void*, nothrow) +           TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow) +           TLIFn == LibFunc::msvc_delete_ptr32_int ||      // delete(void*, uint) +           TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong) +           TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow) +           TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow) +           TLIFn == LibFunc::msvc_delete_array_ptr32_int ||      // delete[](void*, uint) +           TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong) +           TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) +           TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow)   // delete[](void*, nothrow)      ExpectedNumParams = 2;    else      return nullptr; @@ -621,7 +629,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {    // always generate code immediately before the instruction being    // processed, so that the generated code dominates the same BBs -  Instruction *PrevInsertPoint = Builder.GetInsertPoint(); +  BuilderTy::InsertPointGuard Guard(Builder);    if (Instruction *I = dyn_cast<Instruction>(V))      Builder.SetInsertPoint(I); @@ -650,9 +658,6 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) {      Result = unknown();    } -  if (PrevInsertPoint) -    Builder.SetInsertPoint(PrevInsertPoint); -    // Don't reuse CacheIt since it may be invalid at this point.    CacheMap[V] = Result;    return Result; @@ -742,7 +747,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitPHINode(PHINode &PHI) {    // compute offset/size for each PHI incoming pointer    for (unsigned i = 0, e = PHI.getNumIncomingValues(); i != e; ++i) { -    Builder.SetInsertPoint(PHI.getIncomingBlock(i)->getFirstInsertionPt()); +    Builder.SetInsertPoint(&*PHI.getIncomingBlock(i)->getFirstInsertionPt());      SizeOffsetEvalType EdgeData = compute_(PHI.getIncomingValue(i));      if (!bothKnown(EdgeData)) { diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index 782a67bf72d5..3e80bfe1fdfb 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -22,7 +22,9 @@  #include "llvm/Analysis/InstructionSimplify.h"  #include "llvm/Analysis/MemoryBuiltins.h"  #include "llvm/Analysis/PHITransAddr.h" +#include "llvm/Analysis/OrderedBasicBlock.h"  #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/IR/DataLayout.h"  #include "llvm/IR/Dominators.h"  #include "llvm/IR/Function.h" @@ -49,7 +51,11 @@ STATISTIC(NumCacheCompleteNonLocalPtr,            "Number of block queries that were completely cached");  // Limit for the number of instructions to scan in a block. -static const unsigned int BlockScanLimit = 100; + +static cl::opt<unsigned> BlockScanLimit( +    "memdep-block-scan-limit", cl::Hidden, cl::init(100), +    cl::desc("The number of instructions to scan in a block in memory " +             "dependency analysis (default = 100)"));  // Limit on the number of memdep results to process.  static const unsigned int NumResultsLimit = 100; @@ -60,7 +66,8 @@ char MemoryDependenceAnalysis::ID = 0;  INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep",                  "Memory Dependence Analysis", false, true)  INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)  INITIALIZE_PASS_END(MemoryDependenceAnalysis, "memdep",                        "Memory Dependence Analysis", false, true) @@ -87,15 +94,17 @@ void MemoryDependenceAnalysis::releaseMemory() {  void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {    AU.setPreservesAll();    AU.addRequired<AssumptionCacheTracker>(); -  AU.addRequiredTransitive<AliasAnalysis>(); +  AU.addRequiredTransitive<AAResultsWrapperPass>(); +  AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();  }  bool MemoryDependenceAnalysis::runOnFunction(Function &F) { -  AA = &getAnalysis<AliasAnalysis>(); +  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();    AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);    DominatorTreeWrapperPass *DTWP =        getAnalysisIfAvailable<DominatorTreeWrapperPass>();    DT = DTWP ? &DTWP->getDomTree() : nullptr; +  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();    return false;  } @@ -118,43 +127,43 @@ static void RemoveFromReverseMap(DenseMap<Instruction*,  /// location, fill in Loc with the details, otherwise set Loc.Ptr to null.  /// Return a ModRefInfo value describing the general behavior of the  /// instruction. -static AliasAnalysis::ModRefResult -GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) { +static ModRefInfo GetLocation(const Instruction *Inst, MemoryLocation &Loc, +                              const TargetLibraryInfo &TLI) {    if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {      if (LI->isUnordered()) {        Loc = MemoryLocation::get(LI); -      return AliasAnalysis::Ref; +      return MRI_Ref;      }      if (LI->getOrdering() == Monotonic) {        Loc = MemoryLocation::get(LI); -      return AliasAnalysis::ModRef; +      return MRI_ModRef;      }      Loc = MemoryLocation(); -    return AliasAnalysis::ModRef; +    return MRI_ModRef;    }    if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {      if (SI->isUnordered()) {        Loc = MemoryLocation::get(SI); -      return AliasAnalysis::Mod; +      return MRI_Mod;      }      if (SI->getOrdering() == Monotonic) {        Loc = MemoryLocation::get(SI); -      return AliasAnalysis::ModRef; +      return MRI_ModRef;      }      Loc = MemoryLocation(); -    return AliasAnalysis::ModRef; +    return MRI_ModRef;    }    if (const VAArgInst *V = dyn_cast<VAArgInst>(Inst)) {      Loc = MemoryLocation::get(V); -    return AliasAnalysis::ModRef; +    return MRI_ModRef;    } -  if (const CallInst *CI = isFreeCall(Inst, AA->getTargetLibraryInfo())) { +  if (const CallInst *CI = isFreeCall(Inst, &TLI)) {      // calls to free() deallocate the entire structure      Loc = MemoryLocation(CI->getArgOperand(0)); -    return AliasAnalysis::Mod; +    return MRI_Mod;    }    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -170,7 +179,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {            cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(), AAInfo);        // These intrinsics don't really modify the memory, but returning Mod        // will allow them to be handled conservatively. -      return AliasAnalysis::Mod; +      return MRI_Mod;      case Intrinsic::invariant_end:        II->getAAMetadata(AAInfo);        Loc = MemoryLocation( @@ -178,7 +187,7 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {            cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(), AAInfo);        // These intrinsics don't really modify the memory, but returning Mod        // will allow them to be handled conservatively. -      return AliasAnalysis::Mod; +      return MRI_Mod;      default:        break;      } @@ -186,10 +195,10 @@ GetLocation(const Instruction *Inst, MemoryLocation &Loc, AliasAnalysis *AA) {    // Otherwise, just do the coarse-grained thing that always works.    if (Inst->mayWriteToMemory()) -    return AliasAnalysis::ModRef; +    return MRI_ModRef;    if (Inst->mayReadFromMemory()) -    return AliasAnalysis::Ref; -  return AliasAnalysis::NoModRef; +    return MRI_Ref; +  return MRI_NoModRef;  }  /// getCallSiteDependencyFrom - Private helper for finding the local @@ -207,14 +216,14 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,      if (!Limit)        return MemDepResult::getUnknown(); -    Instruction *Inst = --ScanIt; +    Instruction *Inst = &*--ScanIt;      // If this inst is a memory op, get the pointer it accessed      MemoryLocation Loc; -    AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); +    ModRefInfo MR = GetLocation(Inst, Loc, *TLI);      if (Loc.Ptr) {        // A simple instruction. -      if (AA->getModRefInfo(CS, Loc) != AliasAnalysis::NoModRef) +      if (AA->getModRefInfo(CS, Loc) != MRI_NoModRef)          return MemDepResult::getClobber(Inst);        continue;      } @@ -224,10 +233,10 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,        if (isa<DbgInfoIntrinsic>(Inst)) continue;        // If these two calls do not interfere, look past it.        switch (AA->getModRefInfo(CS, InstCS)) { -      case AliasAnalysis::NoModRef: +      case MRI_NoModRef:          // If the two calls are the same, return InstCS as a Def, so that          // CS can be found redundant and eliminated. -        if (isReadOnlyCall && !(MR & AliasAnalysis::Mod) && +        if (isReadOnlyCall && !(MR & MRI_Mod) &&              CS.getInstruction()->isIdenticalToWhenDefined(Inst))            return MemDepResult::getDef(Inst); @@ -241,7 +250,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall,      // If we could not obtain a pointer for the instruction and the instruction      // touches memory then assume that this is a dependency. -    if (MR != AliasAnalysis::NoModRef) +    if (MR != MRI_NoModRef)        return MemDepResult::getClobber(Inst);    } @@ -371,6 +380,75 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(      const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,      BasicBlock *BB, Instruction *QueryInst) { +  if (QueryInst != nullptr) { +    if (auto *LI = dyn_cast<LoadInst>(QueryInst)) { +      MemDepResult invariantGroupDependency = +          getInvariantGroupPointerDependency(LI, BB); + +      if (invariantGroupDependency.isDef()) +        return invariantGroupDependency; +    } +  } +  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst); +} + +MemDepResult +MemoryDependenceAnalysis::getInvariantGroupPointerDependency(LoadInst *LI, +                                                             BasicBlock *BB) { +  Value *LoadOperand = LI->getPointerOperand(); +  // It's is not safe to walk the use list of global value, because function +  // passes aren't allowed to look outside their functions. +  if (isa<GlobalValue>(LoadOperand)) +    return MemDepResult::getUnknown(); + +  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group); +  if (!InvariantGroupMD) +    return MemDepResult::getUnknown(); + +  MemDepResult Result = MemDepResult::getUnknown(); +  llvm::SmallSet<Value *, 14> Seen; +  // Queue to process all pointers that are equivalent to load operand. +  llvm::SmallVector<Value *, 8> LoadOperandsQueue; +  LoadOperandsQueue.push_back(LoadOperand); +  while (!LoadOperandsQueue.empty()) { +    Value *Ptr = LoadOperandsQueue.pop_back_val(); +    if (isa<GlobalValue>(Ptr)) +      continue; + +    if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) { +      if (!Seen.count(BCI->getOperand(0))) { +        LoadOperandsQueue.push_back(BCI->getOperand(0)); +        Seen.insert(BCI->getOperand(0)); +      } +    } + +    for (Use &Us : Ptr->uses()) { +      auto *U = dyn_cast<Instruction>(Us.getUser()); +      if (!U || U == LI || !DT->dominates(U, LI)) +        continue; + +      if (auto *BCI = dyn_cast<BitCastInst>(U)) { +        if (!Seen.count(BCI)) { +          LoadOperandsQueue.push_back(BCI); +          Seen.insert(BCI); +        } +        continue; +      } +      // If we hit load/store with the same invariant.group metadata (and the +      // same pointer operand) we can assume that value pointed by pointer +      // operand didn't change. +      if ((isa<LoadInst>(U) || isa<StoreInst>(U)) && U->getParent() == BB && +          U->getMetadata(LLVMContext::MD_invariant_group) == InvariantGroupMD) +        return MemDepResult::getDef(U); +    } +  } +  return Result; +} + +MemDepResult MemoryDependenceAnalysis::getSimplePointerDependencyFrom( +    const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, +    BasicBlock *BB, Instruction *QueryInst) { +    const Value *MemLocBase = nullptr;    int64_t MemLocOffset = 0;    unsigned Limit = BlockScanLimit; @@ -416,9 +494,15 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(    const DataLayout &DL = BB->getModule()->getDataLayout(); +  // Create a numbered basic block to lazily compute and cache instruction +  // positions inside a BB. This is used to provide fast queries for relative +  // position between two instructions in a BB and can be used by +  // AliasAnalysis::callCapturesBefore. +  OrderedBasicBlock OBB(BB); +    // Walk backwards through the basic block, looking for dependencies.    while (ScanIt != BB->begin()) { -    Instruction *Inst = --ScanIt; +    Instruction *Inst = &*--ScanIt;      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))        // Debug intrinsics don't (and can't) cause dependencies. @@ -567,7 +651,7 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(        // If alias analysis can tell that this store is guaranteed to not modify        // the query pointer, ignore it.  Use getModRefInfo to handle cases where        // the query pointer points to constant memory etc. -      if (AA->getModRefInfo(SI, MemLoc) == AliasAnalysis::NoModRef) +      if (AA->getModRefInfo(SI, MemLoc) == MRI_NoModRef)          continue;        // Ok, this store might clobber the query pointer.  Check to see if it is @@ -594,7 +678,6 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(      // a subsequent bitcast of the malloc call result.  There can be stores to      // the malloced memory between the malloc call and its bitcast uses, and we      // need to continue scanning until the malloc call. -    const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo();      if (isa<AllocaInst>(Inst) || isNoAliasFn(Inst, TLI)) {        const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, DL); @@ -616,17 +699,17 @@ MemDepResult MemoryDependenceAnalysis::getPointerDependencyFrom(         continue;      // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. -    AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); +    ModRefInfo MR = AA->getModRefInfo(Inst, MemLoc);      // If necessary, perform additional analysis. -    if (MR == AliasAnalysis::ModRef) -      MR = AA->callCapturesBefore(Inst, MemLoc, DT); +    if (MR == MRI_ModRef) +      MR = AA->callCapturesBefore(Inst, MemLoc, DT, &OBB);      switch (MR) { -    case AliasAnalysis::NoModRef: +    case MRI_NoModRef:        // If the call has no effect on the queried pointer, just ignore it.        continue; -    case AliasAnalysis::Mod: +    case MRI_Mod:        return MemDepResult::getClobber(Inst); -    case AliasAnalysis::Ref: +    case MRI_Ref:        // If the call is known to never store to the pointer, and if this is a        // load query, we can safely ignore it (scan past it).        if (isLoad) @@ -677,20 +760,20 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {        LocalCache = MemDepResult::getNonFuncLocal();    } else {      MemoryLocation MemLoc; -    AliasAnalysis::ModRefResult MR = GetLocation(QueryInst, MemLoc, AA); +    ModRefInfo MR = GetLocation(QueryInst, MemLoc, *TLI);      if (MemLoc.Ptr) {        // If we can do a pointer scan, make it happen. -      bool isLoad = !(MR & AliasAnalysis::Mod); +      bool isLoad = !(MR & MRI_Mod);        if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))          isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; -      LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, -                                            QueryParent, QueryInst); +      LocalCache = getPointerDependencyFrom( +          MemLoc, isLoad, ScanPos->getIterator(), QueryParent, QueryInst);      } else if (isa<CallInst>(QueryInst) || isa<InvokeInst>(QueryInst)) {        CallSite QueryCS(QueryInst);        bool isReadOnly = AA->onlyReadsMemory(QueryCS); -      LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos, -                                             QueryParent); +      LocalCache = getCallSiteDependencyFrom( +          QueryCS, isReadOnly, ScanPos->getIterator(), QueryParent);      } else        // Non-memory instruction.        LocalCache = MemDepResult::getUnknown(); @@ -813,7 +896,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {      BasicBlock::iterator ScanPos = DirtyBB->end();      if (ExistingResult) {        if (Instruction *Inst = ExistingResult->getResult().getInst()) { -        ScanPos = Inst; +        ScanPos = Inst->getIterator();          // We're removing QueryInst's use of Inst.          RemoveFromReverseMap(ReverseNonLocalDeps, Inst,                               QueryCS.getInstruction()); @@ -952,11 +1035,11 @@ MemDepResult MemoryDependenceAnalysis::GetNonLocalInfoForBlock(      assert(ExistingResult->getResult().getInst()->getParent() == BB &&             "Instruction invalidated?");      ++NumCacheDirtyNonLocalPtr; -    ScanPos = ExistingResult->getResult().getInst(); +    ScanPos = ExistingResult->getResult().getInst()->getIterator();      // Eliminating the dirty entry from 'Cache', so update the reverse info.      ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); -    RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); +    RemoveFromReverseMap(ReverseNonLocalPtrDeps, &*ScanPos, CacheKey);    } else {      ++NumUncacheNonLocalPtr;    } @@ -1507,7 +1590,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {    // the entire block to get to this point.    MemDepResult NewDirtyVal;    if (!RemInst->isTerminator()) -    NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); +    NewDirtyVal = MemDepResult::getDirty(&*++RemInst->getIterator());    ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst);    if (ReverseDepIt != ReverseLocalDeps.end()) { @@ -1614,7 +1697,6 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {    assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); -  AA->deleteValue(RemInst);    DEBUG(verifyRemoved(RemInst));  }  /// verifyRemoved - Verify that the specified instruction does not occur diff --git a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp deleted file mode 100644 index 322a9a80de4c..000000000000 --- a/contrib/llvm/lib/Analysis/NoAliasAnalysis.cpp +++ /dev/null @@ -1,95 +0,0 @@ -//===- NoAliasAnalysis.cpp - Minimal Alias Analysis Impl ------------------===// -// -//                     The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the default implementation of the Alias Analysis interface -// that simply returns "I don't know" for all queries. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -using namespace llvm; - -namespace { -  /// NoAA - This class implements the -no-aa pass, which always returns "I -  /// don't know" for alias queries.  NoAA is unlike other alias analysis -  /// implementations, in that it does not chain to a previous analysis.  As -  /// such it doesn't follow many of the rules that other alias analyses must. -  /// -  struct NoAA : public ImmutablePass, public AliasAnalysis { -    static char ID; // Class identification, replacement for typeinfo -    NoAA() : ImmutablePass(ID) { -      initializeNoAAPass(*PassRegistry::getPassRegistry()); -    } - -    void getAnalysisUsage(AnalysisUsage &AU) const override {} - -    bool doInitialization(Module &M) override { -      // Note: NoAA does not call InitializeAliasAnalysis because it's -      // special and does not support chaining. -      DL = &M.getDataLayout(); -      return true; -    } - -    AliasResult alias(const MemoryLocation &LocA, -                      const MemoryLocation &LocB) override { -      return MayAlias; -    } - -    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override { -      return UnknownModRefBehavior; -    } -    ModRefBehavior getModRefBehavior(const Function *F) override { -      return UnknownModRefBehavior; -    } - -    bool pointsToConstantMemory(const MemoryLocation &Loc, -                                bool OrLocal) override { -      return false; -    } -    ModRefResult getArgModRefInfo(ImmutableCallSite CS, -                                  unsigned ArgIdx) override { -      return ModRef; -    } - -    ModRefResult getModRefInfo(ImmutableCallSite CS, -                               const MemoryLocation &Loc) override { -      return ModRef; -    } -    ModRefResult getModRefInfo(ImmutableCallSite CS1, -                               ImmutableCallSite CS2) override { -      return ModRef; -    } - -    void deleteValue(Value *V) override {} -    void addEscapingUse(Use &U) override {} - -    /// getAdjustedAnalysisPointer - This method is used when a pass implements -    /// an analysis interface through multiple inheritance.  If needed, it -    /// should override this to adjust the this pointer as needed for the -    /// specified pass info. -    void *getAdjustedAnalysisPointer(const void *ID) override { -      if (ID == &AliasAnalysis::ID) -        return (AliasAnalysis*)this; -      return this; -    } -  }; -}  // End of anonymous namespace - -// Register this pass... -char NoAA::ID = 0; -INITIALIZE_AG_PASS(NoAA, AliasAnalysis, "no-aa", -                   "No Alias Analysis (always returns 'may' alias)", -                   true, true, true) - -ImmutablePass *llvm::createNoAAPass() { return new NoAA(); } diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp new file mode 100644 index 000000000000..25f660ffe221 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp @@ -0,0 +1,170 @@ +//===- ObjCARCAliasAnalysis.cpp - ObjC ARC Optimization -------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines a simple ARC-aware AliasAnalysis using special knowledge +/// of Objective C to enhance other optimization passes which rely on the Alias +/// Analysis infrastructure. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +/// TODO: Theoretically we could check for dependencies between objc_* calls +/// and FMRB_OnlyAccessesArgumentPointees calls or other well-behaved calls. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ObjCARCAliasAnalysis.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Value.h" +#include "llvm/InitializePasses.h" +#include "llvm/PassAnalysisSupport.h" +#include "llvm/PassSupport.h" + +#define DEBUG_TYPE "objc-arc-aa" + +using namespace llvm; +using namespace llvm::objcarc; + +AliasResult ObjCARCAAResult::alias(const MemoryLocation &LocA, +                                   const MemoryLocation &LocB) { +  if (!EnableARCOpts) +    return AAResultBase::alias(LocA, LocB); + +  // First, strip off no-ops, including ObjC-specific no-ops, and try making a +  // precise alias query. +  const Value *SA = GetRCIdentityRoot(LocA.Ptr); +  const Value *SB = GetRCIdentityRoot(LocB.Ptr); +  AliasResult Result = +      AAResultBase::alias(MemoryLocation(SA, LocA.Size, LocA.AATags), +                          MemoryLocation(SB, LocB.Size, LocB.AATags)); +  if (Result != MayAlias) +    return Result; + +  // If that failed, climb to the underlying object, including climbing through +  // ObjC-specific no-ops, and try making an imprecise alias query. +  const Value *UA = GetUnderlyingObjCPtr(SA, DL); +  const Value *UB = GetUnderlyingObjCPtr(SB, DL); +  if (UA != SA || UB != SB) { +    Result = AAResultBase::alias(MemoryLocation(UA), MemoryLocation(UB)); +    // We can't use MustAlias or PartialAlias results here because +    // GetUnderlyingObjCPtr may return an offsetted pointer value. +    if (Result == NoAlias) +      return NoAlias; +  } + +  // If that failed, fail. We don't need to chain here, since that's covered +  // by the earlier precise query. +  return MayAlias; +} + +bool ObjCARCAAResult::pointsToConstantMemory(const MemoryLocation &Loc, +                                             bool OrLocal) { +  if (!EnableARCOpts) +    return AAResultBase::pointsToConstantMemory(Loc, OrLocal); + +  // First, strip off no-ops, including ObjC-specific no-ops, and try making +  // a precise alias query. +  const Value *S = GetRCIdentityRoot(Loc.Ptr); +  if (AAResultBase::pointsToConstantMemory( +          MemoryLocation(S, Loc.Size, Loc.AATags), OrLocal)) +    return true; + +  // If that failed, climb to the underlying object, including climbing through +  // ObjC-specific no-ops, and try making an imprecise alias query. +  const Value *U = GetUnderlyingObjCPtr(S, DL); +  if (U != S) +    return AAResultBase::pointsToConstantMemory(MemoryLocation(U), OrLocal); + +  // If that failed, fail. We don't need to chain here, since that's covered +  // by the earlier precise query. +  return false; +} + +FunctionModRefBehavior ObjCARCAAResult::getModRefBehavior(const Function *F) { +  if (!EnableARCOpts) +    return AAResultBase::getModRefBehavior(F); + +  switch (GetFunctionClass(F)) { +  case ARCInstKind::NoopCast: +    return FMRB_DoesNotAccessMemory; +  default: +    break; +  } + +  return AAResultBase::getModRefBehavior(F); +} + +ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS, +                                          const MemoryLocation &Loc) { +  if (!EnableARCOpts) +    return AAResultBase::getModRefInfo(CS, Loc); + +  switch (GetBasicARCInstKind(CS.getInstruction())) { +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::Autorelease: +  case ARCInstKind::AutoreleaseRV: +  case ARCInstKind::NoopCast: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +    // These functions don't access any memory visible to the compiler. +    // Note that this doesn't include objc_retainBlock, because it updates +    // pointers when it copies block data. +    return MRI_NoModRef; +  default: +    break; +  } + +  return AAResultBase::getModRefInfo(CS, Loc); +} + +ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> *AM) { +  return ObjCARCAAResult(F.getParent()->getDataLayout(), +                         AM->getResult<TargetLibraryAnalysis>(F)); +} + +char ObjCARCAA::PassID; + +char ObjCARCAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(ObjCARCAAWrapperPass, "objc-arc-aa", +                      "ObjC-ARC-Based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ObjCARCAAWrapperPass, "objc-arc-aa", +                    "ObjC-ARC-Based Alias Analysis", false, true) + +ImmutablePass *llvm::createObjCARCAAWrapperPass() { +  return new ObjCARCAAWrapperPass(); +} + +ObjCARCAAWrapperPass::ObjCARCAAWrapperPass() : ImmutablePass(ID) { +  initializeObjCARCAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool ObjCARCAAWrapperPass::doInitialization(Module &M) { +  Result.reset(new ObjCARCAAResult( +      M.getDataLayout(), getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); +  return false; +} + +bool ObjCARCAAWrapperPass::doFinalization(Module &M) { +  Result.reset(); +  return false; +} + +void ObjCARCAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequired<TargetLibraryInfoWrapperPass>(); +} diff --git a/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp new file mode 100644 index 000000000000..e3e74aa249da --- /dev/null +++ b/contrib/llvm/lib/Analysis/ObjCARCAnalysisUtils.cpp @@ -0,0 +1,28 @@ +//===- ObjCARCAnalysisUtils.cpp -------------------------------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements common infrastructure for libLLVMObjCARCOpts.a, which +// implements several scalar transformations over the LLVM intermediate +// representation, including the C bindings for that library. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/Support/CommandLine.h" + +using namespace llvm; +using namespace llvm::objcarc; + +/// \brief A handy option to enable/disable all ARC Optimizations. +bool llvm::objcarc::EnableARCOpts; +static cl::opt<bool, true> +EnableARCOptimizations("enable-objc-arc-opts", +                       cl::desc("enable/disable all ARC Optimizations"), +                       cl::location(EnableARCOpts), +                       cl::init(true)); diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp new file mode 100644 index 000000000000..133b63513c87 --- /dev/null +++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp @@ -0,0 +1,675 @@ +//===- ARCInstKind.cpp - ObjC ARC Optimization ----------------------------===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file defines several utility functions used by various ARC +/// optimizations which are IMHO too big to be in a header file. +/// +/// WARNING: This file knows about certain library functions. It recognizes them +/// by name, and hardwires knowledge of their semantics. +/// +/// WARNING: This file knows about how certain Objective-C library functions are +/// used. Naive LLVM IR transformations which would otherwise be +/// behavior-preserving may break these assumptions. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/ObjCARCInstKind.h" +#include "llvm/Analysis/ObjCARCAnalysisUtils.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/IR/Intrinsics.h" + +using namespace llvm; +using namespace llvm::objcarc; + +raw_ostream &llvm::objcarc::operator<<(raw_ostream &OS, +                                       const ARCInstKind Class) { +  switch (Class) { +  case ARCInstKind::Retain: +    return OS << "ARCInstKind::Retain"; +  case ARCInstKind::RetainRV: +    return OS << "ARCInstKind::RetainRV"; +  case ARCInstKind::RetainBlock: +    return OS << "ARCInstKind::RetainBlock"; +  case ARCInstKind::Release: +    return OS << "ARCInstKind::Release"; +  case ARCInstKind::Autorelease: +    return OS << "ARCInstKind::Autorelease"; +  case ARCInstKind::AutoreleaseRV: +    return OS << "ARCInstKind::AutoreleaseRV"; +  case ARCInstKind::AutoreleasepoolPush: +    return OS << "ARCInstKind::AutoreleasepoolPush"; +  case ARCInstKind::AutoreleasepoolPop: +    return OS << "ARCInstKind::AutoreleasepoolPop"; +  case ARCInstKind::NoopCast: +    return OS << "ARCInstKind::NoopCast"; +  case ARCInstKind::FusedRetainAutorelease: +    return OS << "ARCInstKind::FusedRetainAutorelease"; +  case ARCInstKind::FusedRetainAutoreleaseRV: +    return OS << "ARCInstKind::FusedRetainAutoreleaseRV"; +  case ARCInstKind::LoadWeakRetained: +    return OS << "ARCInstKind::LoadWeakRetained"; +  case ARCInstKind::StoreWeak: +    return OS << "ARCInstKind::StoreWeak"; +  case ARCInstKind::InitWeak: +    return OS << "ARCInstKind::InitWeak"; +  case ARCInstKind::LoadWeak: +    return OS << "ARCInstKind::LoadWeak"; +  case ARCInstKind::MoveWeak: +    return OS << "ARCInstKind::MoveWeak"; +  case ARCInstKind::CopyWeak: +    return OS << "ARCInstKind::CopyWeak"; +  case ARCInstKind::DestroyWeak: +    return OS << "ARCInstKind::DestroyWeak"; +  case ARCInstKind::StoreStrong: +    return OS << "ARCInstKind::StoreStrong"; +  case ARCInstKind::CallOrUser: +    return OS << "ARCInstKind::CallOrUser"; +  case ARCInstKind::Call: +    return OS << "ARCInstKind::Call"; +  case ARCInstKind::User: +    return OS << "ARCInstKind::User"; +  case ARCInstKind::IntrinsicUser: +    return OS << "ARCInstKind::IntrinsicUser"; +  case ARCInstKind::None: +    return OS << "ARCInstKind::None"; +  } +  llvm_unreachable("Unknown instruction class!"); +} + +ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) { +  Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end(); + +  // No (mandatory) arguments. +  if (AI == AE) +    return StringSwitch<ARCInstKind>(F->getName()) +        .Case("objc_autoreleasePoolPush", ARCInstKind::AutoreleasepoolPush) +        .Case("clang.arc.use", ARCInstKind::IntrinsicUser) +        .Default(ARCInstKind::CallOrUser); + +  // One argument. +  const Argument *A0 = &*AI++; +  if (AI == AE) +    // Argument is a pointer. +    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) { +      Type *ETy = PTy->getElementType(); +      // Argument is i8*. +      if (ETy->isIntegerTy(8)) +        return StringSwitch<ARCInstKind>(F->getName()) +            .Case("objc_retain", ARCInstKind::Retain) +            .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV) +            .Case("objc_retainBlock", ARCInstKind::RetainBlock) +            .Case("objc_release", ARCInstKind::Release) +            .Case("objc_autorelease", ARCInstKind::Autorelease) +            .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV) +            .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop) +            .Case("objc_retainedObject", ARCInstKind::NoopCast) +            .Case("objc_unretainedObject", ARCInstKind::NoopCast) +            .Case("objc_unretainedPointer", ARCInstKind::NoopCast) +            .Case("objc_retain_autorelease", +                  ARCInstKind::FusedRetainAutorelease) +            .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease) +            .Case("objc_retainAutoreleaseReturnValue", +                  ARCInstKind::FusedRetainAutoreleaseRV) +            .Case("objc_sync_enter", ARCInstKind::User) +            .Case("objc_sync_exit", ARCInstKind::User) +            .Default(ARCInstKind::CallOrUser); + +      // Argument is i8** +      if (PointerType *Pte = dyn_cast<PointerType>(ETy)) +        if (Pte->getElementType()->isIntegerTy(8)) +          return StringSwitch<ARCInstKind>(F->getName()) +              .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained) +              .Case("objc_loadWeak", ARCInstKind::LoadWeak) +              .Case("objc_destroyWeak", ARCInstKind::DestroyWeak) +              .Default(ARCInstKind::CallOrUser); +    } + +  // Two arguments, first is i8**. +  const Argument *A1 = &*AI++; +  if (AI == AE) +    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) +      if (PointerType *Pte = dyn_cast<PointerType>(PTy->getElementType())) +        if (Pte->getElementType()->isIntegerTy(8)) +          if (PointerType *PTy1 = dyn_cast<PointerType>(A1->getType())) { +            Type *ETy1 = PTy1->getElementType(); +            // Second argument is i8* +            if (ETy1->isIntegerTy(8)) +              return StringSwitch<ARCInstKind>(F->getName()) +                  .Case("objc_storeWeak", ARCInstKind::StoreWeak) +                  .Case("objc_initWeak", ARCInstKind::InitWeak) +                  .Case("objc_storeStrong", ARCInstKind::StoreStrong) +                  .Default(ARCInstKind::CallOrUser); +            // Second argument is i8**. +            if (PointerType *Pte1 = dyn_cast<PointerType>(ETy1)) +              if (Pte1->getElementType()->isIntegerTy(8)) +                return StringSwitch<ARCInstKind>(F->getName()) +                    .Case("objc_moveWeak", ARCInstKind::MoveWeak) +                    .Case("objc_copyWeak", ARCInstKind::CopyWeak) +                    // Ignore annotation calls. This is important to stop the +                    // optimizer from treating annotations as uses which would +                    // make the state of the pointers they are attempting to +                    // elucidate to be incorrect. +                    .Case("llvm.arc.annotation.topdown.bbstart", +                          ARCInstKind::None) +                    .Case("llvm.arc.annotation.topdown.bbend", +                          ARCInstKind::None) +                    .Case("llvm.arc.annotation.bottomup.bbstart", +                          ARCInstKind::None) +                    .Case("llvm.arc.annotation.bottomup.bbend", +                          ARCInstKind::None) +                    .Default(ARCInstKind::CallOrUser); +          } + +  // Anything else. +  return ARCInstKind::CallOrUser; +} + +// A whitelist of intrinsics that we know do not use objc pointers or decrement +// ref counts. +static bool isInertIntrinsic(unsigned ID) { +  // TODO: Make this into a covered switch. +  switch (ID) { +  case Intrinsic::returnaddress: +  case Intrinsic::frameaddress: +  case Intrinsic::stacksave: +  case Intrinsic::stackrestore: +  case Intrinsic::vastart: +  case Intrinsic::vacopy: +  case Intrinsic::vaend: +  case Intrinsic::objectsize: +  case Intrinsic::prefetch: +  case Intrinsic::stackprotector: +  case Intrinsic::eh_return_i32: +  case Intrinsic::eh_return_i64: +  case Intrinsic::eh_typeid_for: +  case Intrinsic::eh_dwarf_cfa: +  case Intrinsic::eh_sjlj_lsda: +  case Intrinsic::eh_sjlj_functioncontext: +  case Intrinsic::init_trampoline: +  case Intrinsic::adjust_trampoline: +  case Intrinsic::lifetime_start: +  case Intrinsic::lifetime_end: +  case Intrinsic::invariant_start: +  case Intrinsic::invariant_end: +  // Don't let dbg info affect our results. +  case Intrinsic::dbg_declare: +  case Intrinsic::dbg_value: +    // Short cut: Some intrinsics obviously don't use ObjC pointers. +    return true; +  default: +    return false; +  } +} + +// A whitelist of intrinsics that we know do not use objc pointers or decrement +// ref counts. +static bool isUseOnlyIntrinsic(unsigned ID) { +  // We are conservative and even though intrinsics are unlikely to touch +  // reference counts, we white list them for safety. +  // +  // TODO: Expand this into a covered switch. There is a lot more here. +  switch (ID) { +  case Intrinsic::memcpy: +  case Intrinsic::memmove: +  case Intrinsic::memset: +    return true; +  default: +    return false; +  } +} + +/// \brief Determine what kind of construct V is. +ARCInstKind llvm::objcarc::GetARCInstKind(const Value *V) { +  if (const Instruction *I = dyn_cast<Instruction>(V)) { +    // Any instruction other than bitcast and gep with a pointer operand have a +    // use of an objc pointer. Bitcasts, GEPs, Selects, PHIs transfer a pointer +    // to a subsequent use, rather than using it themselves, in this sense. +    // As a short cut, several other opcodes are known to have no pointer +    // operands of interest. And ret is never followed by a release, so it's +    // not interesting to examine. +    switch (I->getOpcode()) { +    case Instruction::Call: { +      const CallInst *CI = cast<CallInst>(I); +      // See if we have a function that we know something about. +      if (const Function *F = CI->getCalledFunction()) { +        ARCInstKind Class = GetFunctionClass(F); +        if (Class != ARCInstKind::CallOrUser) +          return Class; +        Intrinsic::ID ID = F->getIntrinsicID(); +        if (isInertIntrinsic(ID)) +          return ARCInstKind::None; +        if (isUseOnlyIntrinsic(ID)) +          return ARCInstKind::User; +      } + +      // Otherwise, be conservative. +      return GetCallSiteClass(CI); +    } +    case Instruction::Invoke: +      // Otherwise, be conservative. +      return GetCallSiteClass(cast<InvokeInst>(I)); +    case Instruction::BitCast: +    case Instruction::GetElementPtr: +    case Instruction::Select: +    case Instruction::PHI: +    case Instruction::Ret: +    case Instruction::Br: +    case Instruction::Switch: +    case Instruction::IndirectBr: +    case Instruction::Alloca: +    case Instruction::VAArg: +    case Instruction::Add: +    case Instruction::FAdd: +    case Instruction::Sub: +    case Instruction::FSub: +    case Instruction::Mul: +    case Instruction::FMul: +    case Instruction::SDiv: +    case Instruction::UDiv: +    case Instruction::FDiv: +    case Instruction::SRem: +    case Instruction::URem: +    case Instruction::FRem: +    case Instruction::Shl: +    case Instruction::LShr: +    case Instruction::AShr: +    case Instruction::And: +    case Instruction::Or: +    case Instruction::Xor: +    case Instruction::SExt: +    case Instruction::ZExt: +    case Instruction::Trunc: +    case Instruction::IntToPtr: +    case Instruction::FCmp: +    case Instruction::FPTrunc: +    case Instruction::FPExt: +    case Instruction::FPToUI: +    case Instruction::FPToSI: +    case Instruction::UIToFP: +    case Instruction::SIToFP: +    case Instruction::InsertElement: +    case Instruction::ExtractElement: +    case Instruction::ShuffleVector: +    case Instruction::ExtractValue: +      break; +    case Instruction::ICmp: +      // Comparing a pointer with null, or any other constant, isn't an +      // interesting use, because we don't care what the pointer points to, or +      // about the values of any other dynamic reference-counted pointers. +      if (IsPotentialRetainableObjPtr(I->getOperand(1))) +        return ARCInstKind::User; +      break; +    default: +      // For anything else, check all the operands. +      // Note that this includes both operands of a Store: while the first +      // operand isn't actually being dereferenced, it is being stored to +      // memory where we can no longer track who might read it and dereference +      // it, so we have to consider it potentially used. +      for (User::const_op_iterator OI = I->op_begin(), OE = I->op_end(); +           OI != OE; ++OI) +        if (IsPotentialRetainableObjPtr(*OI)) +          return ARCInstKind::User; +    } +  } + +  // Otherwise, it's totally inert for ARC purposes. +  return ARCInstKind::None; +} + +/// \brief Test if the given class is a kind of user. +bool llvm::objcarc::IsUser(ARCInstKind Class) { +  switch (Class) { +  case ARCInstKind::User: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::IntrinsicUser: +    return true; +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::RetainBlock: +  case ARCInstKind::Release: +  case ARCInstKind::Autorelease: +  case ARCInstKind::AutoreleaseRV: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::AutoreleasepoolPop: +  case ARCInstKind::NoopCast: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::Call: +  case ARCInstKind::None: +    return false; +  } +  llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class is objc_retain or equivalent. +bool llvm::objcarc::IsRetain(ARCInstKind Class) { +  switch (Class) { +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +    return true; +  // I believe we treat retain block as not a retain since it can copy its +  // block. +  case ARCInstKind::RetainBlock: +  case ARCInstKind::Release: +  case ARCInstKind::Autorelease: +  case ARCInstKind::AutoreleaseRV: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::AutoreleasepoolPop: +  case ARCInstKind::NoopCast: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::IntrinsicUser: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::Call: +  case ARCInstKind::User: +  case ARCInstKind::None: +    return false; +  } +  llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class is objc_autorelease or equivalent. +bool llvm::objcarc::IsAutorelease(ARCInstKind Class) { +  switch (Class) { +  case ARCInstKind::Autorelease: +  case ARCInstKind::AutoreleaseRV: +    return true; +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::RetainBlock: +  case ARCInstKind::Release: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::AutoreleasepoolPop: +  case ARCInstKind::NoopCast: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::IntrinsicUser: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::Call: +  case ARCInstKind::User: +  case ARCInstKind::None: +    return false; +  } +  llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which return their +/// argument verbatim. +bool llvm::objcarc::IsForwarding(ARCInstKind Class) { +  switch (Class) { +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::Autorelease: +  case ARCInstKind::AutoreleaseRV: +  case ARCInstKind::NoopCast: +    return true; +  case ARCInstKind::RetainBlock: +  case ARCInstKind::Release: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::AutoreleasepoolPop: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::IntrinsicUser: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::Call: +  case ARCInstKind::User: +  case ARCInstKind::None: +    return false; +  } +  llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which do nothing if +/// passed a null pointer. +bool llvm::objcarc::IsNoopOnNull(ARCInstKind Class) { +  switch (Class) { +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::Release: +  case ARCInstKind::Autorelease: +  case ARCInstKind::AutoreleaseRV: +  case ARCInstKind::RetainBlock: +    return true; +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::AutoreleasepoolPop: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::IntrinsicUser: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::Call: +  case ARCInstKind::User: +  case ARCInstKind::None: +  case ARCInstKind::NoopCast: +    return false; +  } +  llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the "tail" keyword. +bool llvm::objcarc::IsAlwaysTail(ARCInstKind Class) { +  // ARCInstKind::RetainBlock may be given a stack argument. +  switch (Class) { +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::AutoreleaseRV: +    return true; +  case ARCInstKind::Release: +  case ARCInstKind::Autorelease: +  case ARCInstKind::RetainBlock: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::AutoreleasepoolPop: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::IntrinsicUser: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::Call: +  case ARCInstKind::User: +  case ARCInstKind::None: +  case ARCInstKind::NoopCast: +    return false; +  } +  llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which are never safe +/// to mark with the "tail" keyword. +bool llvm::objcarc::IsNeverTail(ARCInstKind Class) { +  /// It is never safe to tail call objc_autorelease since by tail calling +  /// objc_autorelease: fast autoreleasing causing our object to be potentially +  /// reclaimed from the autorelease pool which violates the semantics of +  /// __autoreleasing types in ARC. +  switch (Class) { +  case ARCInstKind::Autorelease: +    return true; +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::AutoreleaseRV: +  case ARCInstKind::Release: +  case ARCInstKind::RetainBlock: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::AutoreleasepoolPop: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::IntrinsicUser: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::Call: +  case ARCInstKind::User: +  case ARCInstKind::None: +  case ARCInstKind::NoopCast: +    return false; +  } +  llvm_unreachable("covered switch isn't covered?"); +} + +/// \brief Test if the given class represents instructions which are always safe +/// to mark with the nounwind attribute. +bool llvm::objcarc::IsNoThrow(ARCInstKind Class) { +  // objc_retainBlock is not nounwind because it calls user copy constructors +  // which could theoretically throw. +  switch (Class) { +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::Release: +  case ARCInstKind::Autorelease: +  case ARCInstKind::AutoreleaseRV: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::AutoreleasepoolPop: +    return true; +  case ARCInstKind::RetainBlock: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::IntrinsicUser: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::Call: +  case ARCInstKind::User: +  case ARCInstKind::None: +  case ARCInstKind::NoopCast: +    return false; +  } +  llvm_unreachable("covered switch isn't covered?"); +} + +/// Test whether the given instruction can autorelease any pointer or cause an +/// autoreleasepool pop. +/// +/// This means that it *could* interrupt the RV optimization. +bool llvm::objcarc::CanInterruptRV(ARCInstKind Class) { +  switch (Class) { +  case ARCInstKind::AutoreleasepoolPop: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::Call: +  case ARCInstKind::Autorelease: +  case ARCInstKind::AutoreleaseRV: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +    return true; +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::Release: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::RetainBlock: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::IntrinsicUser: +  case ARCInstKind::User: +  case ARCInstKind::None: +  case ARCInstKind::NoopCast: +    return false; +  } +  llvm_unreachable("covered switch isn't covered?"); +} + +bool llvm::objcarc::CanDecrementRefCount(ARCInstKind Kind) { +  switch (Kind) { +  case ARCInstKind::Retain: +  case ARCInstKind::RetainRV: +  case ARCInstKind::Autorelease: +  case ARCInstKind::AutoreleaseRV: +  case ARCInstKind::NoopCast: +  case ARCInstKind::FusedRetainAutorelease: +  case ARCInstKind::FusedRetainAutoreleaseRV: +  case ARCInstKind::IntrinsicUser: +  case ARCInstKind::User: +  case ARCInstKind::None: +    return false; + +  // The cases below are conservative. + +  // RetainBlock can result in user defined copy constructors being called +  // implying releases may occur. +  case ARCInstKind::RetainBlock: +  case ARCInstKind::Release: +  case ARCInstKind::AutoreleasepoolPush: +  case ARCInstKind::AutoreleasepoolPop: +  case ARCInstKind::LoadWeakRetained: +  case ARCInstKind::StoreWeak: +  case ARCInstKind::InitWeak: +  case ARCInstKind::LoadWeak: +  case ARCInstKind::MoveWeak: +  case ARCInstKind::CopyWeak: +  case ARCInstKind::DestroyWeak: +  case ARCInstKind::StoreStrong: +  case ARCInstKind::CallOrUser: +  case ARCInstKind::Call: +    return true; +  } + +  llvm_unreachable("covered switch isn't covered?"); +} diff --git a/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp new file mode 100644 index 000000000000..0f0016f22cc0 --- /dev/null +++ b/contrib/llvm/lib/Analysis/OrderedBasicBlock.cpp @@ -0,0 +1,85 @@ +//===- OrderedBasicBlock.cpp --------------------------------- -*- C++ -*-===// +// +//                     The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the OrderedBasicBlock class. OrderedBasicBlock +// maintains an interface where clients can query if one instruction comes +// before another in a BasicBlock. Since BasicBlock currently lacks a reliable +// way to query relative position between instructions one can use +// OrderedBasicBlock to do such queries. OrderedBasicBlock is lazily built on a +// source BasicBlock and maintains an internal Instruction -> Position map. A +// OrderedBasicBlock instance should be discarded whenever the source +// BasicBlock changes. +// +// It's currently used by the CaptureTracker in order to find relative +// positions of a pair of instructions inside a BasicBlock. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/OrderedBasicBlock.h" +#include "llvm/IR/Instruction.h" +using namespace llvm; + +OrderedBasicBlock::OrderedBasicBlock(const BasicBlock *BasicB) +    : NextInstPos(0), BB(BasicB) { +  LastInstFound = BB->end(); +} + +/// \brief Given no cached results, find if \p A comes before \p B in \p BB. +/// Cache and number out instruction while walking \p BB. +bool OrderedBasicBlock::comesBefore(const Instruction *A, +                                    const Instruction *B) { +  const Instruction *Inst = nullptr; +  assert(!(LastInstFound == BB->end() && NextInstPos != 0) && +         "Instruction supposed to be in NumberedInsts"); + +  // Start the search with the instruction found in the last lookup round. +  auto II = BB->begin(); +  auto IE = BB->end(); +  if (LastInstFound != IE) +    II = std::next(LastInstFound); + +  // Number all instructions up to the point where we find 'A' or 'B'. +  for (; II != IE; ++II) { +    Inst = cast<Instruction>(II); +    NumberedInsts[Inst] = NextInstPos++; +    if (Inst == A || Inst == B) +      break; +  } + +  assert(II != IE && "Instruction not found?"); +  assert((Inst == A || Inst == B) && "Should find A or B"); +  LastInstFound = II; +  return Inst == A; +} + +/// \brief Find out whether \p A dominates \p B, meaning whether \p A +/// comes before \p B in \p BB. This is a simplification that considers +/// cached instruction positions and ignores other basic blocks, being +/// only relevant to compare relative instructions positions inside \p BB. +bool OrderedBasicBlock::dominates(const Instruction *A, const Instruction *B) { +  assert(A->getParent() == B->getParent() && +         "Instructions must be in the same basic block!"); + +  // First we lookup the instructions. If they don't exist, lookup will give us +  // back ::end(). If they both exist, we compare the numbers. Otherwise, if NA +  // exists and NB doesn't, it means NA must come before NB because we would +  // have numbered NB as well if it didn't. The same is true for NB. If it +  // exists, but NA does not, NA must come after it. If neither exist, we need +  // to number the block and cache the results (by calling comesBefore). +  auto NAI = NumberedInsts.find(A); +  auto NBI = NumberedInsts.find(B); +  if (NAI != NumberedInsts.end() && NBI != NumberedInsts.end()) +    return NAI->second < NBI->second; +  if (NAI != NumberedInsts.end()) +    return true; +  if (NBI != NumberedInsts.end()) +    return false; + +  return comesBefore(A, B); +} diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp index 8cd85348fdcc..f59d26730327 100644 --- a/contrib/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -21,6 +21,9 @@  #include <algorithm>  #include <iterator>  #include <set> +#ifndef NDEBUG +#include "llvm/Analysis/RegionPrinter.h" +#endif  using namespace llvm; @@ -103,6 +106,12 @@ void RegionInfo::recalculate(Function &F, DominatorTree *DT_,    calculate(F);  } +#ifndef NDEBUG +void RegionInfo::view() { viewRegion(this); } + +void RegionInfo::viewOnly() { viewRegionOnly(this); } +#endif +  //===----------------------------------------------------------------------===//  // RegionInfoPass implementation  // diff --git a/contrib/llvm/lib/Analysis/RegionPrinter.cpp b/contrib/llvm/lib/Analysis/RegionPrinter.cpp index d7f510984881..acb218d5fea0 100644 --- a/contrib/llvm/lib/Analysis/RegionPrinter.cpp +++ b/contrib/llvm/lib/Analysis/RegionPrinter.cpp @@ -20,6 +20,9 @@  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/raw_ostream.h" +#ifndef NDEBUG +#include "llvm/IR/LegacyPassManager.h" +#endif  using namespace llvm; @@ -55,25 +58,22 @@ struct DOTGraphTraits<RegionNode*> : public DefaultDOTGraphTraits {    }  }; -template<> -struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> { +template <> +struct DOTGraphTraits<RegionInfo *> : public DOTGraphTraits<RegionNode *> {    DOTGraphTraits (bool isSimple = false)      : DOTGraphTraits<RegionNode*>(isSimple) {} -  static std::string getGraphName(RegionInfoPass *DT) { -    return "Region Graph"; -  } +  static std::string getGraphName(const RegionInfo *) { return "Region Graph"; } -  std::string getNodeLabel(RegionNode *Node, RegionInfoPass *G) { -    RegionInfo &RI = G->getRegionInfo(); -    return DOTGraphTraits<RegionNode*>::getNodeLabel(Node, -                                                     reinterpret_cast<RegionNode*>(RI.getTopLevelRegion())); +  std::string getNodeLabel(RegionNode *Node, RegionInfo *G) { +    return DOTGraphTraits<RegionNode *>::getNodeLabel( +        Node, reinterpret_cast<RegionNode *>(G->getTopLevelRegion()));    }    std::string getEdgeAttributes(RegionNode *srcNode, -    GraphTraits<RegionInfo*>::ChildIteratorType CI, RegionInfoPass *G) { -    RegionInfo &RI = G->getRegionInfo(); +                                GraphTraits<RegionInfo *>::ChildIteratorType CI, +                                RegionInfo *G) {      RegionNode *destNode = *CI;      if (srcNode->isSubRegion() || destNode->isSubRegion()) @@ -83,7 +83,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {      BasicBlock *srcBB = srcNode->getNodeAs<BasicBlock>();      BasicBlock *destBB = destNode->getNodeAs<BasicBlock>(); -    Region *R = RI.getRegionFor(destBB); +    Region *R = G->getRegionFor(destBB);      while (R && R->getParent())        if (R->getParent()->getEntry() == destBB) @@ -91,7 +91,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {        else          break; -    if (R->getEntry() == destBB && R->contains(srcBB)) +    if (R && R->getEntry() == destBB && R->contains(srcBB))        return "constraint=false";      return ""; @@ -99,8 +99,7 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {    // Print the cluster of the subregions. This groups the single basic blocks    // and adds a different background color for each group. -  static void printRegionCluster(const Region &R, -                                 GraphWriter<RegionInfoPass*> &GW, +  static void printRegionCluster(const Region &R, GraphWriter<RegionInfo *> &GW,                                   unsigned depth = 0) {      raw_ostream &O = GW.getOStream();      O.indent(2 * depth) << "subgraph cluster_" << static_cast<const void*>(&R) @@ -132,50 +131,81 @@ struct DOTGraphTraits<RegionInfoPass*> : public DOTGraphTraits<RegionNode*> {      O.indent(2 * depth) << "}\n";    } -  static void addCustomGraphFeatures(const RegionInfoPass* RIP, -                                     GraphWriter<RegionInfoPass*> &GW) { -    const RegionInfo &RI = RIP->getRegionInfo(); +  static void addCustomGraphFeatures(const RegionInfo *G, +                                     GraphWriter<RegionInfo *> &GW) {      raw_ostream &O = GW.getOStream();      O << "\tcolorscheme = \"paired12\"\n"; -    printRegionCluster(*RI.getTopLevelRegion(), GW, 4); +    printRegionCluster(*G->getTopLevelRegion(), GW, 4);    }  };  } //end namespace llvm  namespace { +struct RegionInfoPassGraphTraits { +  static RegionInfo *getGraph(RegionInfoPass *RIP) { +    return &RIP->getRegionInfo(); +  } +}; + +struct RegionPrinter +    : public DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *, +                                   RegionInfoPassGraphTraits> { +  static char ID; +  RegionPrinter() +      : DOTGraphTraitsPrinter<RegionInfoPass, false, RegionInfo *, +                              RegionInfoPassGraphTraits>("reg", ID) { +    initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); +  } +}; +char RegionPrinter::ID = 0; + +struct RegionOnlyPrinter +    : public DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *, +                                   RegionInfoPassGraphTraits> { +  static char ID; +  RegionOnlyPrinter() +      : DOTGraphTraitsPrinter<RegionInfoPass, true, RegionInfo *, +                              RegionInfoPassGraphTraits>("reg", ID) { +    initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); +  } +}; +char RegionOnlyPrinter::ID = 0; +  struct RegionViewer -  : public DOTGraphTraitsViewer<RegionInfoPass, false> { +    : public DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *, +                                  RegionInfoPassGraphTraits> {    static char ID; -  RegionViewer() : DOTGraphTraitsViewer<RegionInfoPass, false>("reg", ID){ +  RegionViewer() +      : DOTGraphTraitsViewer<RegionInfoPass, false, RegionInfo *, +                             RegionInfoPassGraphTraits>("reg", ID) {      initializeRegionViewerPass(*PassRegistry::getPassRegistry());    }  };  char RegionViewer::ID = 0;  struct RegionOnlyViewer -  : public DOTGraphTraitsViewer<RegionInfoPass, true> { +    : public DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *, +                                  RegionInfoPassGraphTraits> {    static char ID; -  RegionOnlyViewer() : DOTGraphTraitsViewer<RegionInfoPass, true>("regonly", ID) { +  RegionOnlyViewer() +      : DOTGraphTraitsViewer<RegionInfoPass, true, RegionInfo *, +                             RegionInfoPassGraphTraits>("regonly", ID) {      initializeRegionOnlyViewerPass(*PassRegistry::getPassRegistry());    }  };  char RegionOnlyViewer::ID = 0; -struct RegionPrinter -  : public DOTGraphTraitsPrinter<RegionInfoPass, false> { -  static char ID; -  RegionPrinter() : -    DOTGraphTraitsPrinter<RegionInfoPass, false>("reg", ID) { -      initializeRegionPrinterPass(*PassRegistry::getPassRegistry()); -    } -}; -char RegionPrinter::ID = 0;  } //end anonymous namespace  INITIALIZE_PASS(RegionPrinter, "dot-regions",                  "Print regions of function to 'dot' file", true, true) +INITIALIZE_PASS( +    RegionOnlyPrinter, "dot-regions-only", +    "Print regions of function to 'dot' file (with no function bodies)", true, +    true) +  INITIALIZE_PASS(RegionViewer, "view-regions", "View regions of function",                  true, true) @@ -183,25 +213,12 @@ INITIALIZE_PASS(RegionOnlyViewer, "view-regions-only",                  "View regions of function (with no function bodies)",                  true, true) -namespace { - -struct RegionOnlyPrinter -  : public DOTGraphTraitsPrinter<RegionInfoPass, true> { -  static char ID; -  RegionOnlyPrinter() : -    DOTGraphTraitsPrinter<RegionInfoPass, true>("reg", ID) { -      initializeRegionOnlyPrinterPass(*PassRegistry::getPassRegistry()); -    } -}; +FunctionPass *llvm::createRegionPrinterPass() { return new RegionPrinter(); } +FunctionPass *llvm::createRegionOnlyPrinterPass() { +  return new RegionOnlyPrinter();  } -char RegionOnlyPrinter::ID = 0; -INITIALIZE_PASS(RegionOnlyPrinter, "dot-regions-only", -                "Print regions of function to 'dot' file " -                "(with no function bodies)", -                true, true) -  FunctionPass* llvm::createRegionViewerPass() {    return new RegionViewer();  } @@ -210,11 +227,41 @@ FunctionPass* llvm::createRegionOnlyViewerPass() {    return new RegionOnlyViewer();  } -FunctionPass* llvm::createRegionPrinterPass() { -  return new RegionPrinter(); +#ifndef NDEBUG +static void viewRegionInfo(RegionInfo *RI, bool ShortNames) { +  assert(RI && "Argument must be non-null"); + +  llvm::Function *F = RI->getTopLevelRegion()->getEntry()->getParent(); +  std::string GraphName = DOTGraphTraits<RegionInfo *>::getGraphName(RI); + +  llvm::ViewGraph(RI, "reg", ShortNames, +                  Twine(GraphName) + " for '" + F->getName() + "' function");  } -FunctionPass* llvm::createRegionOnlyPrinterPass() { -  return new RegionOnlyPrinter(); +static void invokeFunctionPass(const Function *F, FunctionPass *ViewerPass) { +  assert(F && "Argument must be non-null"); +  assert(!F->isDeclaration() && "Function must have an implementation"); + +  // The viewer and analysis passes do not modify anything, so we can safely +  // remove the const qualifier +  auto NonConstF = const_cast<Function *>(F); + +  llvm::legacy::FunctionPassManager FPM(NonConstF->getParent()); +  FPM.add(ViewerPass); +  FPM.doInitialization(); +  FPM.run(*NonConstF); +  FPM.doFinalization();  } +void llvm::viewRegion(RegionInfo *RI) { viewRegionInfo(RI, false); } + +void llvm::viewRegion(const Function *F) { +  invokeFunctionPass(F, createRegionViewerPass()); +} + +void llvm::viewRegionOnly(RegionInfo *RI) { viewRegionInfo(RI, true); } + +void llvm::viewRegionOnly(const Function *F) { +  invokeFunctionPass(F, createRegionOnlyViewerPass()); +} +#endif diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index 9c7c1754e387..34074efd1ceb 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -83,11 +83,13 @@  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/Metadata.h"  #include "llvm/IR/Operator.h" +#include "llvm/IR/PatternMatch.h"  #include "llvm/Support/CommandLine.h"  #include "llvm/Support/Debug.h"  #include "llvm/Support/ErrorHandling.h"  #include "llvm/Support/MathExtras.h"  #include "llvm/Support/raw_ostream.h" +#include "llvm/Support/SaveAndRestore.h"  #include <algorithm>  using namespace llvm; @@ -114,16 +116,6 @@ static cl::opt<bool>  VerifySCEV("verify-scev",             cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); -INITIALIZE_PASS_BEGIN(ScalarEvolution, "scalar-evolution", -                "Scalar Evolution Analysis", false, true) -INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) -INITIALIZE_PASS_END(ScalarEvolution, "scalar-evolution", -                "Scalar Evolution Analysis", false, true) -char ScalarEvolution::ID = 0; -  //===----------------------------------------------------------------------===//  //                           SCEV class definitions  //===----------------------------------------------------------------------===// @@ -132,12 +124,11 @@ char ScalarEvolution::ID = 0;  // Implementation of the SCEV class.  // -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD  void SCEV::dump() const {    print(dbgs());    dbgs() << '\n';  } -#endif  void SCEV::print(raw_ostream &OS) const {    switch (static_cast<SCEVTypes>(getSCEVType())) { @@ -303,7 +294,7 @@ bool SCEV::isNonConstantNegative() const {    if (!SC) return false;    // Return true if the value is negative, this matches things like (-42 * V). -  return SC->getValue()->getValue().isNegative(); +  return SC->getAPInt().isNegative();  }  SCEVCouldNotCompute::SCEVCouldNotCompute() : @@ -455,179 +446,179 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {  //===----------------------------------------------------------------------===//  namespace { -  /// SCEVComplexityCompare - Return true if the complexity of the LHS is less -  /// than the complexity of the RHS.  This comparator is used to canonicalize -  /// expressions. -  class SCEVComplexityCompare { -    const LoopInfo *const LI; -  public: -    explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} - -    // Return true or false if LHS is less than, or at least RHS, respectively. -    bool operator()(const SCEV *LHS, const SCEV *RHS) const { -      return compare(LHS, RHS) < 0; -    } - -    // Return negative, zero, or positive, if LHS is less than, equal to, or -    // greater than RHS, respectively. A three-way result allows recursive -    // comparisons to be more efficient. -    int compare(const SCEV *LHS, const SCEV *RHS) const { -      // Fast-path: SCEVs are uniqued so we can do a quick equality check. -      if (LHS == RHS) -        return 0; - -      // Primarily, sort the SCEVs by their getSCEVType(). -      unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); -      if (LType != RType) -        return (int)LType - (int)RType; - -      // Aside from the getSCEVType() ordering, the particular ordering -      // isn't very important except that it's beneficial to be consistent, -      // so that (a + b) and (b + a) don't end up as different expressions. -      switch (static_cast<SCEVTypes>(LType)) { -      case scUnknown: { -        const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); -        const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); - -        // Sort SCEVUnknown values with some loose heuristics. TODO: This is -        // not as complete as it could be. -        const Value *LV = LU->getValue(), *RV = RU->getValue(); - -        // Order pointer values after integer values. This helps SCEVExpander -        // form GEPs. -        bool LIsPointer = LV->getType()->isPointerTy(), -             RIsPointer = RV->getType()->isPointerTy(); -        if (LIsPointer != RIsPointer) -          return (int)LIsPointer - (int)RIsPointer; - -        // Compare getValueID values. -        unsigned LID = LV->getValueID(), -                 RID = RV->getValueID(); -        if (LID != RID) -          return (int)LID - (int)RID; - -        // Sort arguments by their position. -        if (const Argument *LA = dyn_cast<Argument>(LV)) { -          const Argument *RA = cast<Argument>(RV); -          unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); -          return (int)LArgNo - (int)RArgNo; -        } - -        // For instructions, compare their loop depth, and their operand -        // count.  This is pretty loose. -        if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { -          const Instruction *RInst = cast<Instruction>(RV); - -          // Compare loop depths. -          const BasicBlock *LParent = LInst->getParent(), -                           *RParent = RInst->getParent(); -          if (LParent != RParent) { -            unsigned LDepth = LI->getLoopDepth(LParent), -                     RDepth = LI->getLoopDepth(RParent); -            if (LDepth != RDepth) -              return (int)LDepth - (int)RDepth; -          } - -          // Compare the number of operands. -          unsigned LNumOps = LInst->getNumOperands(), -                   RNumOps = RInst->getNumOperands(); -          return (int)LNumOps - (int)RNumOps; -        } +/// SCEVComplexityCompare - Return true if the complexity of the LHS is less +/// than the complexity of the RHS.  This comparator is used to canonicalize +/// expressions. +class SCEVComplexityCompare { +  const LoopInfo *const LI; +public: +  explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} -        return 0; -      } +  // Return true or false if LHS is less than, or at least RHS, respectively. +  bool operator()(const SCEV *LHS, const SCEV *RHS) const { +    return compare(LHS, RHS) < 0; +  } -      case scConstant: { -        const SCEVConstant *LC = cast<SCEVConstant>(LHS); -        const SCEVConstant *RC = cast<SCEVConstant>(RHS); - -        // Compare constant values. -        const APInt &LA = LC->getValue()->getValue(); -        const APInt &RA = RC->getValue()->getValue(); -        unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); -        if (LBitWidth != RBitWidth) -          return (int)LBitWidth - (int)RBitWidth; -        return LA.ult(RA) ? -1 : 1; +  // Return negative, zero, or positive, if LHS is less than, equal to, or +  // greater than RHS, respectively. A three-way result allows recursive +  // comparisons to be more efficient. +  int compare(const SCEV *LHS, const SCEV *RHS) const { +    // Fast-path: SCEVs are uniqued so we can do a quick equality check. +    if (LHS == RHS) +      return 0; + +    // Primarily, sort the SCEVs by their getSCEVType(). +    unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); +    if (LType != RType) +      return (int)LType - (int)RType; + +    // Aside from the getSCEVType() ordering, the particular ordering +    // isn't very important except that it's beneficial to be consistent, +    // so that (a + b) and (b + a) don't end up as different expressions. +    switch (static_cast<SCEVTypes>(LType)) { +    case scUnknown: { +      const SCEVUnknown *LU = cast<SCEVUnknown>(LHS); +      const SCEVUnknown *RU = cast<SCEVUnknown>(RHS); + +      // Sort SCEVUnknown values with some loose heuristics. TODO: This is +      // not as complete as it could be. +      const Value *LV = LU->getValue(), *RV = RU->getValue(); + +      // Order pointer values after integer values. This helps SCEVExpander +      // form GEPs. +      bool LIsPointer = LV->getType()->isPointerTy(), +        RIsPointer = RV->getType()->isPointerTy(); +      if (LIsPointer != RIsPointer) +        return (int)LIsPointer - (int)RIsPointer; + +      // Compare getValueID values. +      unsigned LID = LV->getValueID(), +        RID = RV->getValueID(); +      if (LID != RID) +        return (int)LID - (int)RID; + +      // Sort arguments by their position. +      if (const Argument *LA = dyn_cast<Argument>(LV)) { +        const Argument *RA = cast<Argument>(RV); +        unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); +        return (int)LArgNo - (int)RArgNo;        } -      case scAddRecExpr: { -        const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); -        const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); - -        // Compare addrec loop depths. -        const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); -        if (LLoop != RLoop) { -          unsigned LDepth = LLoop->getLoopDepth(), -                   RDepth = RLoop->getLoopDepth(); +      // For instructions, compare their loop depth, and their operand +      // count.  This is pretty loose. +      if (const Instruction *LInst = dyn_cast<Instruction>(LV)) { +        const Instruction *RInst = cast<Instruction>(RV); + +        // Compare loop depths. +        const BasicBlock *LParent = LInst->getParent(), +          *RParent = RInst->getParent(); +        if (LParent != RParent) { +          unsigned LDepth = LI->getLoopDepth(LParent), +            RDepth = LI->getLoopDepth(RParent);            if (LDepth != RDepth)              return (int)LDepth - (int)RDepth;          } -        // Addrec complexity grows with operand count. -        unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); -        if (LNumOps != RNumOps) -          return (int)LNumOps - (int)RNumOps; +        // Compare the number of operands. +        unsigned LNumOps = LInst->getNumOperands(), +          RNumOps = RInst->getNumOperands(); +        return (int)LNumOps - (int)RNumOps; +      } -        // Lexicographically compare. -        for (unsigned i = 0; i != LNumOps; ++i) { -          long X = compare(LA->getOperand(i), RA->getOperand(i)); -          if (X != 0) -            return X; -        } +      return 0; +    } -        return 0; +    case scConstant: { +      const SCEVConstant *LC = cast<SCEVConstant>(LHS); +      const SCEVConstant *RC = cast<SCEVConstant>(RHS); + +      // Compare constant values. +      const APInt &LA = LC->getAPInt(); +      const APInt &RA = RC->getAPInt(); +      unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); +      if (LBitWidth != RBitWidth) +        return (int)LBitWidth - (int)RBitWidth; +      return LA.ult(RA) ? -1 : 1; +    } + +    case scAddRecExpr: { +      const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS); +      const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS); + +      // Compare addrec loop depths. +      const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); +      if (LLoop != RLoop) { +        unsigned LDepth = LLoop->getLoopDepth(), +          RDepth = RLoop->getLoopDepth(); +        if (LDepth != RDepth) +          return (int)LDepth - (int)RDepth;        } -      case scAddExpr: -      case scMulExpr: -      case scSMaxExpr: -      case scUMaxExpr: { -        const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); -        const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); - -        // Lexicographically compare n-ary expressions. -        unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); -        if (LNumOps != RNumOps) -          return (int)LNumOps - (int)RNumOps; - -        for (unsigned i = 0; i != LNumOps; ++i) { -          if (i >= RNumOps) -            return 1; -          long X = compare(LC->getOperand(i), RC->getOperand(i)); -          if (X != 0) -            return X; -        } +      // Addrec complexity grows with operand count. +      unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); +      if (LNumOps != RNumOps)          return (int)LNumOps - (int)RNumOps; + +      // Lexicographically compare. +      for (unsigned i = 0; i != LNumOps; ++i) { +        long X = compare(LA->getOperand(i), RA->getOperand(i)); +        if (X != 0) +          return X;        } -      case scUDivExpr: { -        const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); -        const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); +      return 0; +    } + +    case scAddExpr: +    case scMulExpr: +    case scSMaxExpr: +    case scUMaxExpr: { +      const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS); +      const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS); -        // Lexicographically compare udiv expressions. -        long X = compare(LC->getLHS(), RC->getLHS()); +      // Lexicographically compare n-ary expressions. +      unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); +      if (LNumOps != RNumOps) +        return (int)LNumOps - (int)RNumOps; + +      for (unsigned i = 0; i != LNumOps; ++i) { +        if (i >= RNumOps) +          return 1; +        long X = compare(LC->getOperand(i), RC->getOperand(i));          if (X != 0)            return X; -        return compare(LC->getRHS(), RC->getRHS());        } +      return (int)LNumOps - (int)RNumOps; +    } -      case scTruncate: -      case scZeroExtend: -      case scSignExtend: { -        const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); -        const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); +    case scUDivExpr: { +      const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS); +      const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS); -        // Compare cast expressions by operand. -        return compare(LC->getOperand(), RC->getOperand()); -      } +      // Lexicographically compare udiv expressions. +      long X = compare(LC->getLHS(), RC->getLHS()); +      if (X != 0) +        return X; +      return compare(LC->getRHS(), RC->getRHS()); +    } -      case scCouldNotCompute: -        llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); -      } -      llvm_unreachable("Unknown SCEV kind!"); +    case scTruncate: +    case scZeroExtend: +    case scSignExtend: { +      const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS); +      const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS); + +      // Compare cast expressions by operand. +      return compare(LC->getOperand(), RC->getOperand());      } -  }; -} + +    case scCouldNotCompute: +      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); +    } +    llvm_unreachable("Unknown SCEV kind!"); +  } +}; +}  // end anonymous namespace  /// GroupByComplexity - Given a list of SCEV objects, order them by their  /// complexity, and group objects of the same complexity together by value. @@ -675,24 +666,22 @@ static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,    }  } -namespace { -struct FindSCEVSize { -  int Size; -  FindSCEVSize() : Size(0) {} - -  bool follow(const SCEV *S) { -    ++Size; -    // Keep looking at all operands of S. -    return true; -  } -  bool isDone() const { -    return false; -  } -}; -} -  // Returns the size of the SCEV S.  static inline int sizeOfSCEV(const SCEV *S) { +  struct FindSCEVSize { +    int Size; +    FindSCEVSize() : Size(0) {} + +    bool follow(const SCEV *S) { +      ++Size; +      // Keep looking at all operands of S. +      return true; +    } +    bool isDone() const { +      return false; +    } +  }; +    FindSCEVSize F;    SCEVTraversal<FindSCEVSize> ST(F);    ST.visitAll(S); @@ -771,8 +760,8 @@ public:    void visitConstant(const SCEVConstant *Numerator) {      if (const SCEVConstant *D = dyn_cast<SCEVConstant>(Denominator)) { -      APInt NumeratorVal = Numerator->getValue()->getValue(); -      APInt DenominatorVal = D->getValue()->getValue(); +      APInt NumeratorVal = Numerator->getAPInt(); +      APInt DenominatorVal = D->getAPInt();        uint32_t NumeratorBW = NumeratorVal.getBitWidth();        uint32_t DenominatorBW = DenominatorVal.getBitWidth(); @@ -792,17 +781,15 @@ public:    void visitAddRecExpr(const SCEVAddRecExpr *Numerator) {      const SCEV *StartQ, *StartR, *StepQ, *StepR; -    assert(Numerator->isAffine() && "Numerator should be affine"); +    if (!Numerator->isAffine()) +      return cannotDivide(Numerator);      divide(SE, Numerator->getStart(), Denominator, &StartQ, &StartR);      divide(SE, Numerator->getStepRecurrence(SE), Denominator, &StepQ, &StepR);      // Bail out if the types do not match.      Type *Ty = Denominator->getType();      if (Ty != StartQ->getType() || Ty != StartR->getType() || -        Ty != StepQ->getType() || Ty != StepR->getType()) { -      Quotient = Zero; -      Remainder = Numerator; -      return; -    } +        Ty != StepQ->getType() || Ty != StepR->getType()) +      return cannotDivide(Numerator);      Quotient = SE.getAddRecExpr(StartQ, StepQ, Numerator->getLoop(),                                  Numerator->getNoWrapFlags());      Remainder = SE.getAddRecExpr(StartR, StepR, Numerator->getLoop(), @@ -818,11 +805,8 @@ public:        divide(SE, Op, Denominator, &Q, &R);        // Bail out if types do not match. -      if (Ty != Q->getType() || Ty != R->getType()) { -        Quotient = Zero; -        Remainder = Numerator; -        return; -      } +      if (Ty != Q->getType() || Ty != R->getType()) +        return cannotDivide(Numerator);        Qs.push_back(Q);        Rs.push_back(R); @@ -845,11 +829,8 @@ public:      bool FoundDenominatorTerm = false;      for (const SCEV *Op : Numerator->operands()) {        // Bail out if types do not match. -      if (Ty != Op->getType()) { -        Quotient = Zero; -        Remainder = Numerator; -        return; -      } +      if (Ty != Op->getType()) +        return cannotDivide(Numerator);        if (FoundDenominatorTerm) {          Qs.push_back(Op); @@ -865,11 +846,8 @@ public:        }        // Bail out if types do not match. -      if (Ty != Q->getType()) { -        Quotient = Zero; -        Remainder = Numerator; -        return; -      } +      if (Ty != Q->getType()) +        return cannotDivide(Numerator);        FoundDenominatorTerm = true;        Qs.push_back(Q); @@ -884,11 +862,8 @@ public:        return;      } -    if (!isa<SCEVUnknown>(Denominator)) { -      Quotient = Zero; -      Remainder = Numerator; -      return; -    } +    if (!isa<SCEVUnknown>(Denominator)) +      return cannotDivide(Numerator);      // The Remainder is obtained by replacing Denominator by 0 in Numerator.      ValueToValueMap RewriteMap; @@ -908,15 +883,12 @@ public:      // Quotient is (Numerator - Remainder) divided by Denominator.      const SCEV *Q, *R;      const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); -    if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) { -      // This SCEV does not seem to simplify: fail the division here. -      Quotient = Zero; -      Remainder = Numerator; -      return; -    } +    // This SCEV does not seem to simplify: fail the division here. +    if (sizeOfSCEV(Diff) > sizeOfSCEV(Numerator)) +      return cannotDivide(Numerator);      divide(SE, Diff, Denominator, &Q, &R); -    assert(R == Zero && -           "(Numerator - Remainder) should evenly divide Denominator"); +    if (R != Zero) +      return cannotDivide(Numerator);      Quotient = Q;    } @@ -924,11 +896,18 @@ private:    SCEVDivision(ScalarEvolution &S, const SCEV *Numerator,                 const SCEV *Denominator)        : SE(S), Denominator(Denominator) { -    Zero = SE.getConstant(Denominator->getType(), 0); -    One = SE.getConstant(Denominator->getType(), 1); +    Zero = SE.getZero(Denominator->getType()); +    One = SE.getOne(Denominator->getType()); + +    // We generally do not know how to divide Expr by Denominator. We +    // initialize the division to a "cannot divide" state to simplify the rest +    // of the code. +    cannotDivide(Numerator); +  } -    // By default, we don't know how to divide Expr by Denominator. -    // Providing the default here simplifies the rest of the code. +  // Convenience function for giving up on the division. We set the quotient to +  // be equal to zero and the remainder to be equal to the numerator. +  void cannotDivide(const SCEV *Numerator) {      Quotient = Zero;      Remainder = Numerator;    } @@ -1151,8 +1130,8 @@ const SCEV *ScalarEvolution::getTruncateExpr(const SCEV *Op,    // If the input value is a chrec scev, truncate the chrec's operands.    if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) {      SmallVector<const SCEV *, 4> Operands; -    for (unsigned i = 0, e = AddRec->getNumOperands(); i != e; ++i) -      Operands.push_back(getTruncateExpr(AddRec->getOperand(i), Ty)); +    for (const SCEV *Op : AddRec->operands()) +      Operands.push_back(getTruncateExpr(Op, Ty));      return getAddRecExpr(Operands, AddRec->getLoop(), SCEV::FlagAnyWrap);    } @@ -1287,7 +1266,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,    // `Step`:    // 1. NSW/NUW flags on the step increment. -  const SCEV *PreStart = SE->getAddExpr(DiffOps, SA->getNoWrapFlags()); +  auto PreStartFlags = +    ScalarEvolution::maskFlags(SA->getNoWrapFlags(), SCEV::FlagNUW); +  const SCEV *PreStart = SE->getAddExpr(DiffOps, PreStartFlags);    const SCEVAddRecExpr *PreAR = dyn_cast<SCEVAddRecExpr>(        SE->getAddRecExpr(PreStart, Step, L, SCEV::FlagAnyWrap)); @@ -1322,9 +1303,9 @@ static const SCEV *getPreStartForExtend(const SCEVAddRecExpr *AR, Type *Ty,        ExtendOpTraits<ExtendOpTy>::getOverflowLimitForStep(Step, &Pred, SE);    if (OverflowLimit && -      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit)) { +      SE->isLoopEntryGuardedByCond(L, Pred, PreStart, OverflowLimit))      return PreStart; -  } +    return nullptr;  } @@ -1390,24 +1371,22 @@ bool ScalarEvolution::proveNoWrapByVaryingStart(const SCEV *Start,    if (!StartC)      return false; -  APInt StartAI = StartC->getValue()->getValue(); +  APInt StartAI = StartC->getAPInt();    for (unsigned Delta : {-2, -1, 1, 2}) {      const SCEV *PreStart = getConstant(StartAI - Delta); +    FoldingSetNodeID ID; +    ID.AddInteger(scAddRecExpr); +    ID.AddPointer(PreStart); +    ID.AddPointer(Step); +    ID.AddPointer(L); +    void *IP = nullptr; +    const auto *PreAR = +      static_cast<SCEVAddRecExpr *>(UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); +      // Give up if we don't already have the add recurrence we need because      // actually constructing an add recurrence is relatively expensive. -    const SCEVAddRecExpr *PreAR = [&]() { -      FoldingSetNodeID ID; -      ID.AddInteger(scAddRecExpr); -      ID.AddPointer(PreStart); -      ID.AddPointer(Step); -      ID.AddPointer(L); -      void *IP = nullptr; -      return static_cast<SCEVAddRecExpr *>( -          this->UniqueSCEVs.FindNodeOrInsertPos(ID, IP)); -    }(); -      if (PreAR && PreAR->getNoWrapFlags(WrapType)) {  // proves (2)        const SCEV *DeltaS = getConstant(StartC->getType(), Delta);        ICmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; @@ -1578,6 +1557,18 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,        }      } +  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) { +    // zext((A + B + ...)<nuw>) --> (zext(A) + zext(B) + ...)<nuw> +    if (SA->getNoWrapFlags(SCEV::FlagNUW)) { +      // If the addition does not unsign overflow then we can, by definition, +      // commute the zero extension with the addition operation. +      SmallVector<const SCEV *, 4> Ops; +      for (const auto *Op : SA->operands()) +        Ops.push_back(getZeroExtendExpr(Op, Ty)); +      return getAddExpr(Ops, SCEV::FlagNUW); +    } +  } +    // The cast wasn't folded; create an explicit cast node.    // Recompute the insert position, as it may have been invalidated.    if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; @@ -1635,14 +1626,14 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,    }    // sext(C1 + (C2 * x)) --> C1 + sext(C2 * x) if C1 < C2 -  if (auto SA = dyn_cast<SCEVAddExpr>(Op)) { +  if (auto *SA = dyn_cast<SCEVAddExpr>(Op)) {      if (SA->getNumOperands() == 2) { -      auto SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0)); -      auto SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1)); +      auto *SC1 = dyn_cast<SCEVConstant>(SA->getOperand(0)); +      auto *SMul = dyn_cast<SCEVMulExpr>(SA->getOperand(1));        if (SMul && SC1) { -        if (auto SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) { -          const APInt &C1 = SC1->getValue()->getValue(); -          const APInt &C2 = SC2->getValue()->getValue(); +        if (auto *SC2 = dyn_cast<SCEVConstant>(SMul->getOperand(0))) { +          const APInt &C1 = SC1->getAPInt(); +          const APInt &C2 = SC2->getAPInt();            if (C1.isStrictlyPositive() && C2.isStrictlyPositive() &&                C2.ugt(C1) && C2.isPowerOf2())              return getAddExpr(getSignExtendExpr(SC1, Ty), @@ -1650,6 +1641,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,          }        }      } + +    // sext((A + B + ...)<nsw>) --> (sext(A) + sext(B) + ...)<nsw> +    if (SA->getNoWrapFlags(SCEV::FlagNSW)) { +      // If the addition does not sign overflow then we can, by definition, +      // commute the sign extension with the addition operation. +      SmallVector<const SCEV *, 4> Ops; +      for (const auto *Op : SA->operands()) +        Ops.push_back(getSignExtendExpr(Op, Ty)); +      return getAddExpr(Ops, SCEV::FlagNSW); +    }    }    // If the input value is a chrec scev, and we can prove that the value    // did not overflow the old, smaller, value, we can sign extend all of the @@ -1754,16 +1755,16 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,        // If Start and Step are constants, check if we can apply this        // transformation:        // sext{C1,+,C2} --> C1 + sext{0,+,C2} if C1 < C2 -      auto SC1 = dyn_cast<SCEVConstant>(Start); -      auto SC2 = dyn_cast<SCEVConstant>(Step); +      auto *SC1 = dyn_cast<SCEVConstant>(Start); +      auto *SC2 = dyn_cast<SCEVConstant>(Step);        if (SC1 && SC2) { -        const APInt &C1 = SC1->getValue()->getValue(); -        const APInt &C2 = SC2->getValue()->getValue(); +        const APInt &C1 = SC1->getAPInt(); +        const APInt &C2 = SC2->getAPInt();          if (C1.isStrictlyPositive() && C2.isStrictlyPositive() && C2.ugt(C1) &&              C2.isPowerOf2()) {            Start = getSignExtendExpr(Start, Ty); -          const SCEV *NewAR = getAddRecExpr(getConstant(AR->getType(), 0), Step, -                                            L, AR->getNoWrapFlags()); +          const SCEV *NewAR = getAddRecExpr(getZero(AR->getType()), Step, L, +                                            AR->getNoWrapFlags());            return getAddExpr(Start, getSignExtendExpr(NewAR, Ty));          }        } @@ -1798,7 +1799,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op,    // Sign-extend negative constants.    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Op)) -    if (SC->getValue()->getValue().isNegative()) +    if (SC->getAPInt().isNegative())        return getSignExtendExpr(Op, Ty);    // Peel off a truncate cast. @@ -1876,7 +1877,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,      // Pull a buried constant out to the outside.      if (Scale != 1 || AccumulatedConstant != 0 || C->getValue()->isZero())        Interesting = true; -    AccumulatedConstant += Scale * C->getValue()->getValue(); +    AccumulatedConstant += Scale * C->getAPInt();    }    // Next comes everything else. We're especially interested in multiplies @@ -1885,7 +1886,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,      const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[i]);      if (Mul && isa<SCEVConstant>(Mul->getOperand(0))) {        APInt NewScale = -        Scale * cast<SCEVConstant>(Mul->getOperand(0))->getValue()->getValue(); +          Scale * cast<SCEVConstant>(Mul->getOperand(0))->getAPInt();        if (Mul->getNumOperands() == 2 && isa<SCEVAddExpr>(Mul->getOperand(1))) {          // A multiplication of a constant with another add; recurse.          const SCEVAddExpr *Add = cast<SCEVAddExpr>(Mul->getOperand(1)); @@ -1898,8 +1899,7 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,          // the map.          SmallVector<const SCEV *, 4> MulOps(Mul->op_begin()+1, Mul->op_end());          const SCEV *Key = SE.getMulExpr(MulOps); -        std::pair<DenseMap<const SCEV *, APInt>::iterator, bool> Pair = -          M.insert(std::make_pair(Key, NewScale)); +        auto Pair = M.insert(std::make_pair(Key, NewScale));          if (Pair.second) {            NewOps.push_back(Pair.first->first);          } else { @@ -1927,22 +1927,15 @@ CollectAddOperandsWithScales(DenseMap<const SCEV *, APInt> &M,    return Interesting;  } -namespace { -  struct APIntCompare { -    bool operator()(const APInt &LHS, const APInt &RHS) const { -      return LHS.ult(RHS); -    } -  }; -} -  // We're trying to construct a SCEV of type `Type' with `Ops' as operands and  // `OldFlags' as can't-wrap behavior.  Infer a more aggressive set of  // can't-overflow flags for the operation if possible.  static SCEV::NoWrapFlags  StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,                        const SmallVectorImpl<const SCEV *> &Ops, -                      SCEV::NoWrapFlags OldFlags) { +                      SCEV::NoWrapFlags Flags) {    using namespace std::placeholders; +  typedef OverflowingBinaryOperator OBO;    bool CanAnalyze =        Type == scAddExpr || Type == scAddRecExpr || Type == scMulExpr; @@ -1951,18 +1944,42 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,    int SignOrUnsignMask = SCEV::FlagNUW | SCEV::FlagNSW;    SCEV::NoWrapFlags SignOrUnsignWrap = -      ScalarEvolution::maskFlags(OldFlags, SignOrUnsignMask); +      ScalarEvolution::maskFlags(Flags, SignOrUnsignMask);    // If FlagNSW is true and all the operands are non-negative, infer FlagNUW. -  auto IsKnownNonNegative = -    std::bind(std::mem_fn(&ScalarEvolution::isKnownNonNegative), SE, _1); +  auto IsKnownNonNegative = [&](const SCEV *S) { +    return SE->isKnownNonNegative(S); +  }; + +  if (SignOrUnsignWrap == SCEV::FlagNSW && all_of(Ops, IsKnownNonNegative)) +    Flags = +        ScalarEvolution::setFlags(Flags, (SCEV::NoWrapFlags)SignOrUnsignMask); -  if (SignOrUnsignWrap == SCEV::FlagNSW && -      std::all_of(Ops.begin(), Ops.end(), IsKnownNonNegative)) -    return ScalarEvolution::setFlags(OldFlags, -                                     (SCEV::NoWrapFlags)SignOrUnsignMask); +  SignOrUnsignWrap = ScalarEvolution::maskFlags(Flags, SignOrUnsignMask); + +  if (SignOrUnsignWrap != SignOrUnsignMask && Type == scAddExpr && +      Ops.size() == 2 && isa<SCEVConstant>(Ops[0])) { + +    // (A + C) --> (A + C)<nsw> if the addition does not sign overflow +    // (A + C) --> (A + C)<nuw> if the addition does not unsign overflow + +    const APInt &C = cast<SCEVConstant>(Ops[0])->getAPInt(); +    if (!(SignOrUnsignWrap & SCEV::FlagNSW)) { +      auto NSWRegion = +        ConstantRange::makeNoWrapRegion(Instruction::Add, C, OBO::NoSignedWrap); +      if (NSWRegion.contains(SE->getSignedRange(Ops[1]))) +        Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); +    } +    if (!(SignOrUnsignWrap & SCEV::FlagNUW)) { +      auto NUWRegion = +        ConstantRange::makeNoWrapRegion(Instruction::Add, C, +                                        OBO::NoUnsignedWrap); +      if (NUWRegion.contains(SE->getUnsignedRange(Ops[1]))) +        Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); +    } +  } -  return OldFlags; +  return Flags;  }  /// getAddExpr - Get a canonical add expression, or something simpler if @@ -1980,10 +1997,10 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,             "SCEVAddExpr operand types don't match!");  #endif -  Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags); -    // Sort by complexity, this groups all similar expression types together. -  GroupByComplexity(Ops, LI); +  GroupByComplexity(Ops, &LI); + +  Flags = StrengthenNoWrapFlags(this, scAddExpr, Ops, Flags);    // If there are any constants, fold them together.    unsigned Idx = 0; @@ -1992,8 +2009,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,      assert(Idx < Ops.size());      while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {        // We found two constants, fold them together! -      Ops[0] = getConstant(LHSC->getValue()->getValue() + -                           RHSC->getValue()->getValue()); +      Ops[0] = getConstant(LHSC->getAPInt() + RHSC->getAPInt());        if (Ops.size() == 2) return Ops[0];        Ops.erase(Ops.begin()+1);  // Erase the folded element        LHSC = cast<SCEVConstant>(Ops[0]); @@ -2063,8 +2079,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,                break;              }              LargeMulOps.push_back(T->getOperand()); -          } else if (const SCEVConstant *C = -                       dyn_cast<SCEVConstant>(M->getOperand(j))) { +          } else if (const auto *C = dyn_cast<SCEVConstant>(M->getOperand(j))) {              LargeMulOps.push_back(getAnyExtendExpr(C, SrcType));            } else {              Ok = false; @@ -2123,24 +2138,28 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,      if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant,                                       Ops.data(), Ops.size(),                                       APInt(BitWidth, 1), *this)) { +      struct APIntCompare { +        bool operator()(const APInt &LHS, const APInt &RHS) const { +          return LHS.ult(RHS); +        } +      }; +        // Some interesting folding opportunity is present, so its worthwhile to        // re-generate the operands list. Group the operands by constant scale,        // to avoid multiplying by the same constant scale multiple times.        std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare> MulOpLists; -      for (SmallVectorImpl<const SCEV *>::const_iterator I = NewOps.begin(), -           E = NewOps.end(); I != E; ++I) -        MulOpLists[M.find(*I)->second].push_back(*I); +      for (const SCEV *NewOp : NewOps) +        MulOpLists[M.find(NewOp)->second].push_back(NewOp);        // Re-generate the operands list.        Ops.clear();        if (AccumulatedConstant != 0)          Ops.push_back(getConstant(AccumulatedConstant)); -      for (std::map<APInt, SmallVector<const SCEV *, 4>, APIntCompare>::iterator -           I = MulOpLists.begin(), E = MulOpLists.end(); I != E; ++I) -        if (I->first != 0) -          Ops.push_back(getMulExpr(getConstant(I->first), -                                   getAddExpr(I->second))); +      for (auto &MulOp : MulOpLists) +        if (MulOp.first != 0) +          Ops.push_back(getMulExpr(getConstant(MulOp.first), +                                   getAddExpr(MulOp.second)));        if (Ops.empty()) -        return getConstant(Ty, 0); +        return getZero(Ty);        if (Ops.size() == 1)          return Ops[0];        return getAddExpr(Ops); @@ -2168,7 +2187,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,              MulOps.append(Mul->op_begin()+MulOp+1, Mul->op_end());              InnerMul = getMulExpr(MulOps);            } -          const SCEV *One = getConstant(Ty, 1); +          const SCEV *One = getOne(Ty);            const SCEV *AddOne = getAddExpr(One, InnerMul);            const SCEV *OuterMul = getMulExpr(AddOne, MulOpSCEV);            if (Ops.size() == 2) return OuterMul; @@ -2279,8 +2298,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,                                                 AddRec->op_end());          for (; OtherIdx != Ops.size() && isa<SCEVAddRecExpr>(Ops[OtherIdx]);               ++OtherIdx) -          if (const SCEVAddRecExpr *OtherAddRec = -                dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx])) +          if (const auto *OtherAddRec = dyn_cast<SCEVAddRecExpr>(Ops[OtherIdx]))              if (OtherAddRec->getLoop() == AddRecLoop) {                for (unsigned i = 0, e = OtherAddRec->getNumOperands();                     i != e; ++i) { @@ -2388,10 +2406,10 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,             "SCEVMulExpr operand types don't match!");  #endif -  Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags); -    // Sort by complexity, this groups all similar expression types together. -  GroupByComplexity(Ops, LI); +  GroupByComplexity(Ops, &LI); + +  Flags = StrengthenNoWrapFlags(this, scMulExpr, Ops, Flags);    // If there are any constants, fold them together.    unsigned Idx = 0; @@ -2410,9 +2428,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,      ++Idx;      while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {        // We found two constants, fold them together! -      ConstantInt *Fold = ConstantInt::get(getContext(), -                                           LHSC->getValue()->getValue() * -                                           RHSC->getValue()->getValue()); +      ConstantInt *Fold = +          ConstantInt::get(getContext(), LHSC->getAPInt() * RHSC->getAPInt());        Ops[0] = getConstant(Fold);        Ops.erase(Ops.begin()+1);  // Erase the folded element        if (Ops.size() == 1) return Ops[0]; @@ -2433,23 +2450,19 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,          if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Ops[1])) {            SmallVector<const SCEV *, 4> NewOps;            bool AnyFolded = false; -          for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), -                 E = Add->op_end(); I != E; ++I) { -            const SCEV *Mul = getMulExpr(Ops[0], *I); +          for (const SCEV *AddOp : Add->operands()) { +            const SCEV *Mul = getMulExpr(Ops[0], AddOp);              if (!isa<SCEVMulExpr>(Mul)) AnyFolded = true;              NewOps.push_back(Mul);            }            if (AnyFolded)              return getAddExpr(NewOps); -        } -        else if (const SCEVAddRecExpr * -                 AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) { +        } else if (const auto *AddRec = dyn_cast<SCEVAddRecExpr>(Ops[1])) {            // Negation preserves a recurrence's no self-wrap property.            SmallVector<const SCEV *, 4> Operands; -          for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(), -                 E = AddRec->op_end(); I != E; ++I) { -            Operands.push_back(getMulExpr(Ops[0], *I)); -          } +          for (const SCEV *AddRecOp : AddRec->operands()) +            Operands.push_back(getMulExpr(Ops[0], AddRecOp)); +            return getAddRecExpr(Operands, AddRec->getLoop(),                                 AddRec->getNoWrapFlags(SCEV::FlagNW));          } @@ -2560,7 +2573,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,        SmallVector<const SCEV*, 7> AddRecOps;        for (int x = 0, xe = AddRec->getNumOperands() +               OtherAddRec->getNumOperands() - 1; x != xe && !Overflow; ++x) { -        const SCEV *Term = getConstant(Ty, 0); +        const SCEV *Term = getZero(Ty);          for (int y = x, ye = 2*x+1; y != ye && !Overflow; ++y) {            uint64_t Coeff1 = Choose(x, 2*x - y, Overflow);            for (int z = std::max(y-x, y-(int)AddRec->getNumOperands()+1), @@ -2638,11 +2651,11 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,        // its operands.        // TODO: Generalize this to non-constants by using known-bits information.        Type *Ty = LHS->getType(); -      unsigned LZ = RHSC->getValue()->getValue().countLeadingZeros(); +      unsigned LZ = RHSC->getAPInt().countLeadingZeros();        unsigned MaxShiftAmt = getTypeSizeInBits(Ty) - LZ - 1;        // For non-power-of-two values, effectively round the value up to the        // nearest power of two. -      if (!RHSC->getValue()->getValue().isPowerOf2()) +      if (!RHSC->getAPInt().isPowerOf2())          ++MaxShiftAmt;        IntegerType *ExtTy =          IntegerType::get(getContext(), getTypeSizeInBits(Ty) + MaxShiftAmt); @@ -2650,18 +2663,17 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,          if (const SCEVConstant *Step =              dyn_cast<SCEVConstant>(AR->getStepRecurrence(*this))) {            // {X,+,N}/C --> {X/C,+,N/C} if safe and N/C can be folded. -          const APInt &StepInt = Step->getValue()->getValue(); -          const APInt &DivInt = RHSC->getValue()->getValue(); +          const APInt &StepInt = Step->getAPInt(); +          const APInt &DivInt = RHSC->getAPInt();            if (!StepInt.urem(DivInt) &&                getZeroExtendExpr(AR, ExtTy) ==                getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),                              getZeroExtendExpr(Step, ExtTy),                              AR->getLoop(), SCEV::FlagAnyWrap)) {              SmallVector<const SCEV *, 4> Operands; -            for (unsigned i = 0, e = AR->getNumOperands(); i != e; ++i) -              Operands.push_back(getUDivExpr(AR->getOperand(i), RHS)); -            return getAddRecExpr(Operands, AR->getLoop(), -                                 SCEV::FlagNW); +            for (const SCEV *Op : AR->operands()) +              Operands.push_back(getUDivExpr(Op, RHS)); +            return getAddRecExpr(Operands, AR->getLoop(), SCEV::FlagNW);            }            /// Get a canonical UDivExpr for a recurrence.            /// {X,+,N}/C => {Y,+,N}/C where Y=X-(X%N). Safe when C%N=0. @@ -2672,7 +2684,7 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,                getAddRecExpr(getZeroExtendExpr(AR->getStart(), ExtTy),                              getZeroExtendExpr(Step, ExtTy),                              AR->getLoop(), SCEV::FlagAnyWrap)) { -            const APInt &StartInt = StartC->getValue()->getValue(); +            const APInt &StartInt = StartC->getAPInt();              const APInt &StartRem = StartInt.urem(StepInt);              if (StartRem != 0)                LHS = getAddRecExpr(getConstant(StartInt - StartRem), Step, @@ -2682,8 +2694,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,        // (A*B)/C --> A*(B/C) if safe and B/C can be folded.        if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(LHS)) {          SmallVector<const SCEV *, 4> Operands; -        for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) -          Operands.push_back(getZeroExtendExpr(M->getOperand(i), ExtTy)); +        for (const SCEV *Op : M->operands()) +          Operands.push_back(getZeroExtendExpr(Op, ExtTy));          if (getZeroExtendExpr(M, ExtTy) == getMulExpr(Operands))            // Find an operand that's safely divisible.            for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) { @@ -2700,8 +2712,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,        // (A+B)/C --> (A/C + B/C) if safe and A/C and B/C can be folded.        if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(LHS)) {          SmallVector<const SCEV *, 4> Operands; -        for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) -          Operands.push_back(getZeroExtendExpr(A->getOperand(i), ExtTy)); +        for (const SCEV *Op : A->operands()) +          Operands.push_back(getZeroExtendExpr(Op, ExtTy));          if (getZeroExtendExpr(A, ExtTy) == getAddExpr(Operands)) {            Operands.clear();            for (unsigned i = 0, e = A->getNumOperands(); i != e; ++i) { @@ -2739,8 +2751,8 @@ const SCEV *ScalarEvolution::getUDivExpr(const SCEV *LHS,  }  static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { -  APInt A = C1->getValue()->getValue().abs(); -  APInt B = C2->getValue()->getValue().abs(); +  APInt A = C1->getAPInt().abs(); +  APInt B = C2->getAPInt().abs();    uint32_t ABW = A.getBitWidth();    uint32_t BBW = B.getBitWidth(); @@ -2769,8 +2781,7 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,    if (const SCEVConstant *RHSCst = dyn_cast<SCEVConstant>(RHS)) {      // If the mulexpr multiplies by a constant, then that constant must be the      // first element of the mulexpr. -    if (const SCEVConstant *LHSCst = -            dyn_cast<SCEVConstant>(Mul->getOperand(0))) { +    if (const auto *LHSCst = dyn_cast<SCEVConstant>(Mul->getOperand(0))) {        if (LHSCst == RHSCst) {          SmallVector<const SCEV *, 2> Operands;          Operands.append(Mul->op_begin() + 1, Mul->op_end()); @@ -2782,10 +2793,10 @@ const SCEV *ScalarEvolution::getUDivExactExpr(const SCEV *LHS,        // check.        APInt Factor = gcd(LHSCst, RHSCst);        if (!Factor.isIntN(1)) { -        LHSCst = cast<SCEVConstant>( -            getConstant(LHSCst->getValue()->getValue().udiv(Factor))); -        RHSCst = cast<SCEVConstant>( -            getConstant(RHSCst->getValue()->getValue().udiv(Factor))); +        LHSCst = +            cast<SCEVConstant>(getConstant(LHSCst->getAPInt().udiv(Factor))); +        RHSCst = +            cast<SCEVConstant>(getConstant(RHSCst->getAPInt().udiv(Factor)));          SmallVector<const SCEV *, 2> Operands;          Operands.push_back(LHSCst);          Operands.append(Mul->op_begin() + 1, Mul->op_end()); @@ -2859,22 +2870,19 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,    // Canonicalize nested AddRecs in by nesting them in order of loop depth.    if (const SCEVAddRecExpr *NestedAR = dyn_cast<SCEVAddRecExpr>(Operands[0])) {      const Loop *NestedLoop = NestedAR->getLoop(); -    if (L->contains(NestedLoop) ? -        (L->getLoopDepth() < NestedLoop->getLoopDepth()) : -        (!NestedLoop->contains(L) && -         DT->dominates(L->getHeader(), NestedLoop->getHeader()))) { +    if (L->contains(NestedLoop) +            ? (L->getLoopDepth() < NestedLoop->getLoopDepth()) +            : (!NestedLoop->contains(L) && +               DT.dominates(L->getHeader(), NestedLoop->getHeader()))) {        SmallVector<const SCEV *, 4> NestedOperands(NestedAR->op_begin(),                                                    NestedAR->op_end());        Operands[0] = NestedAR->getStart();        // AddRecs require their operands be loop-invariant with respect to their        // loops. Don't perform this transformation if it would break this        // requirement. -      bool AllInvariant = true; -      for (unsigned i = 0, e = Operands.size(); i != e; ++i) -        if (!isLoopInvariant(Operands[i], L)) { -          AllInvariant = false; -          break; -        } +      bool AllInvariant = all_of( +          Operands, [&](const SCEV *Op) { return isLoopInvariant(Op, L); }); +        if (AllInvariant) {          // Create a recurrence for the outer loop with the same step size.          // @@ -2884,12 +2892,10 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,            maskFlags(Flags, SCEV::FlagNW | NestedAR->getNoWrapFlags());          NestedOperands[0] = getAddRecExpr(Operands, L, OuterFlags); -        AllInvariant = true; -        for (unsigned i = 0, e = NestedOperands.size(); i != e; ++i) -          if (!isLoopInvariant(NestedOperands[i], NestedLoop)) { -            AllInvariant = false; -            break; -          } +        AllInvariant = all_of(NestedOperands, [&](const SCEV *Op) { +          return isLoopInvariant(Op, NestedLoop); +        }); +          if (AllInvariant) {            // Ok, both add recurrences are valid after the transformation.            // @@ -2936,10 +2942,11 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,    // FIXME(PR23527): Don't blindly transfer the inbounds flag from the GEP    // instruction to its SCEV, because the Instruction may be guarded by control    // flow and the no-overflow bits may not be valid for the expression in any -  // context. +  // context. This can be fixed similarly to how these flags are handled for +  // adds.    SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap; -  const SCEV *TotalOffset = getConstant(IntPtrTy, 0); +  const SCEV *TotalOffset = getZero(IntPtrTy);    // The address space is unimportant. The first thing we do on CurTy is getting    // its element type.    Type *CurTy = PointerType::getUnqual(PointeeType); @@ -2996,7 +3003,7 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {  #endif    // Sort by complexity, this groups all similar expression types together. -  GroupByComplexity(Ops, LI); +  GroupByComplexity(Ops, &LI);    // If there are any constants, fold them together.    unsigned Idx = 0; @@ -3005,9 +3012,8 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {      assert(Idx < Ops.size());      while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {        // We found two constants, fold them together! -      ConstantInt *Fold = ConstantInt::get(getContext(), -                              APIntOps::smax(LHSC->getValue()->getValue(), -                                             RHSC->getValue()->getValue())); +      ConstantInt *Fold = ConstantInt::get( +          getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));        Ops[0] = getConstant(Fold);        Ops.erase(Ops.begin()+1);  // Erase the folded element        if (Ops.size() == 1) return Ops[0]; @@ -3100,7 +3106,7 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {  #endif    // Sort by complexity, this groups all similar expression types together. -  GroupByComplexity(Ops, LI); +  GroupByComplexity(Ops, &LI);    // If there are any constants, fold them together.    unsigned Idx = 0; @@ -3109,9 +3115,8 @@ ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {      assert(Idx < Ops.size());      while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {        // We found two constants, fold them together! -      ConstantInt *Fold = ConstantInt::get(getContext(), -                              APIntOps::umax(LHSC->getValue()->getValue(), -                                             RHSC->getValue()->getValue())); +      ConstantInt *Fold = ConstantInt::get( +          getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));        Ops[0] = getConstant(Fold);        Ops.erase(Ops.begin()+1);  // Erase the folded element        if (Ops.size() == 1) return Ops[0]; @@ -3200,8 +3205,7 @@ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {    // We can bypass creating a target-independent    // constant expression and then folding it back into a ConstantInt.    // This is just a compile-time optimization. -  return getConstant(IntTy, -                     F->getParent()->getDataLayout().getTypeAllocSize(AllocTy)); +  return getConstant(IntTy, getDataLayout().getTypeAllocSize(AllocTy));  }  const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, @@ -3211,9 +3215,7 @@ const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy,    // constant expression and then folding it back into a ConstantInt.    // This is just a compile-time optimization.    return getConstant( -      IntTy, -      F->getParent()->getDataLayout().getStructLayout(STy)->getElementOffset( -          FieldNo)); +      IntTy, getDataLayout().getStructLayout(STy)->getElementOffset(FieldNo));  }  const SCEV *ScalarEvolution::getUnknown(Value *V) { @@ -3255,7 +3257,7 @@ bool ScalarEvolution::isSCEVable(Type *Ty) const {  /// for which isSCEVable must return true.  uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {    assert(isSCEVable(Ty) && "Type is not SCEVable!"); -  return F->getParent()->getDataLayout().getTypeSizeInBits(Ty); +  return getDataLayout().getTypeSizeInBits(Ty);  }  /// getEffectiveSCEVType - Return a type with the same bitwidth as @@ -3265,20 +3267,20 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const {  Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const {    assert(isSCEVable(Ty) && "Type is not SCEVable!"); -  if (Ty->isIntegerTy()) { +  if (Ty->isIntegerTy())      return Ty; -  }    // The only other support type is pointer.    assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); -  return F->getParent()->getDataLayout().getIntPtrType(Ty); +  return getDataLayout().getIntPtrType(Ty);  }  const SCEV *ScalarEvolution::getCouldNotCompute() { -  return &CouldNotCompute; +  return CouldNotCompute.get();  } -namespace { + +bool ScalarEvolution::checkValidity(const SCEV *S) const {    // Helper class working with SCEVTraversal to figure out if a SCEV contains    // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne    // is set iff if find such SCEVUnknown. @@ -3300,9 +3302,7 @@ namespace {      }      bool isDone() const { return FindOne; }    }; -} -bool ScalarEvolution::checkValidity(const SCEV *S) const {    FindInvalidSCEVUnknown F;    SCEVTraversal<FindInvalidSCEVUnknown> ST(F);    ST.visitAll(S); @@ -3315,35 +3315,39 @@ bool ScalarEvolution::checkValidity(const SCEV *S) const {  const SCEV *ScalarEvolution::getSCEV(Value *V) {    assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); +  const SCEV *S = getExistingSCEV(V); +  if (S == nullptr) { +    S = createSCEV(V); +    ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); +  } +  return S; +} + +const SCEV *ScalarEvolution::getExistingSCEV(Value *V) { +  assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); +    ValueExprMapType::iterator I = ValueExprMap.find_as(V);    if (I != ValueExprMap.end()) {      const SCEV *S = I->second;      if (checkValidity(S))        return S; -    else -      ValueExprMap.erase(I); +    ValueExprMap.erase(I);    } -  const SCEV *S = createSCEV(V); - -  // The process of creating a SCEV for V may have caused other SCEVs -  // to have been created, so it's necessary to insert the new entry -  // from scratch, rather than trying to remember the insert position -  // above. -  ValueExprMap.insert(std::make_pair(SCEVCallbackVH(V, this), S)); -  return S; +  return nullptr;  }  /// getNegativeSCEV - Return a SCEV corresponding to -V = -1*V  /// -const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V) { +const SCEV *ScalarEvolution::getNegativeSCEV(const SCEV *V, +                                             SCEV::NoWrapFlags Flags) {    if (const SCEVConstant *VC = dyn_cast<SCEVConstant>(V))      return getConstant(                 cast<ConstantInt>(ConstantExpr::getNeg(VC->getValue())));    Type *Ty = V->getType();    Ty = getEffectiveSCEVType(Ty); -  return getMulExpr(V, -                  getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty)))); +  return getMulExpr( +      V, getConstant(cast<ConstantInt>(Constant::getAllOnesValue(Ty))), Flags);  }  /// getNotSCEV - Return a SCEV corresponding to ~V = -1-V @@ -3362,15 +3366,40 @@ const SCEV *ScalarEvolution::getNotSCEV(const SCEV *V) {  /// getMinusSCEV - Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.  const SCEV *ScalarEvolution::getMinusSCEV(const SCEV *LHS, const SCEV *RHS,                                            SCEV::NoWrapFlags Flags) { -  assert(!maskFlags(Flags, SCEV::FlagNUW) && "subtraction does not have NUW"); -    // Fast path: X - X --> 0.    if (LHS == RHS) -    return getConstant(LHS->getType(), 0); +    return getZero(LHS->getType()); + +  // We represent LHS - RHS as LHS + (-1)*RHS. This transformation +  // makes it so that we cannot make much use of NUW. +  auto AddFlags = SCEV::FlagAnyWrap; +  const bool RHSIsNotMinSigned = +      !getSignedRange(RHS).getSignedMin().isMinSignedValue(); +  if (maskFlags(Flags, SCEV::FlagNSW) == SCEV::FlagNSW) { +    // Let M be the minimum representable signed value. Then (-1)*RHS +    // signed-wraps if and only if RHS is M. That can happen even for +    // a NSW subtraction because e.g. (-1)*M signed-wraps even though +    // -1 - M does not. So to transfer NSW from LHS - RHS to LHS + +    // (-1)*RHS, we need to prove that RHS != M. +    // +    // If LHS is non-negative and we know that LHS - RHS does not +    // signed-wrap, then RHS cannot be M. So we can rule out signed-wrap +    // either by proving that RHS > M or that LHS >= 0. +    if (RHSIsNotMinSigned || isKnownNonNegative(LHS)) { +      AddFlags = SCEV::FlagNSW; +    } +  } + +  // FIXME: Find a correct way to transfer NSW to (-1)*M when LHS - +  // RHS is NSW and LHS >= 0. +  // +  // The difficulty here is that the NSW flag may have been proven +  // relative to a loop that is to be found in a recurrence in LHS and +  // not in RHS. Applying NSW to (-1)*M may then let the NSW have a +  // larger scope than intended. +  auto NegFlags = RHSIsNotMinSigned ? SCEV::FlagNSW : SCEV::FlagAnyWrap; -  // X - Y --> X + -Y. -  // X -(nsw || nuw) Y --> X + -Y. -  return getAddExpr(LHS, getNegativeSCEV(RHS)); +  return getAddExpr(LHS, getNegativeSCEV(RHS, NegFlags), AddFlags);  }  /// getTruncateOrZeroExtend - Return a SCEV corresponding to a conversion of the @@ -3513,16 +3542,14 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) {    if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(V)) {      return getPointerBase(Cast->getOperand()); -  } -  else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) { +  } else if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(V)) {      const SCEV *PtrOp = nullptr; -    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); -         I != E; ++I) { -      if ((*I)->getType()->isPointerTy()) { +    for (const SCEV *NAryOp : NAry->operands()) { +      if (NAryOp->getType()->isPointerTy()) {          // Cannot find the base of an expression with multiple pointer operands.          if (PtrOp)            return V; -        PtrOp = *I; +        PtrOp = NAryOp;        }      }      if (!PtrOp) @@ -3558,8 +3585,7 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {      if (!Visited.insert(I).second)        continue; -    ValueExprMapType::iterator It = -      ValueExprMap.find_as(static_cast<Value *>(I)); +    auto It = ValueExprMap.find_as(static_cast<Value *>(I));      if (It != ValueExprMap.end()) {        const SCEV *Old = It->second; @@ -3587,165 +3613,476 @@ ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {    }  } -/// createNodeForPHI - PHI nodes have two cases.  Either the PHI node exists in -/// a loop header, making it a potential recurrence, or it doesn't. -/// -const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { -  if (const Loop *L = LI->getLoopFor(PN->getParent())) -    if (L->getHeader() == PN->getParent()) { -      // The loop may have multiple entrances or multiple exits; we can analyze -      // this phi as an addrec if it has a unique entry value and a unique -      // backedge value. -      Value *BEValueV = nullptr, *StartValueV = nullptr; -      for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { -        Value *V = PN->getIncomingValue(i); -        if (L->contains(PN->getIncomingBlock(i))) { -          if (!BEValueV) { -            BEValueV = V; -          } else if (BEValueV != V) { -            BEValueV = nullptr; -            break; -          } -        } else if (!StartValueV) { -          StartValueV = V; -        } else if (StartValueV != V) { -          StartValueV = nullptr; -          break; -        } -      } -      if (BEValueV && StartValueV) { -        // While we are analyzing this PHI node, handle its value symbolically. -        const SCEV *SymbolicName = getUnknown(PN); -        assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && -               "PHI node already processed?"); -        ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); - -        // Using this symbolic name for the PHI, analyze the value coming around -        // the back-edge. -        const SCEV *BEValue = getSCEV(BEValueV); - -        // NOTE: If BEValue is loop invariant, we know that the PHI node just -        // has a special value for the first iteration of the loop. - -        // If the value coming around the backedge is an add with the symbolic -        // value we just inserted, then we found a simple induction variable! -        if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { -          // If there is a single occurrence of the symbolic value, replace it -          // with a recurrence. -          unsigned FoundIndex = Add->getNumOperands(); -          for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) -            if (Add->getOperand(i) == SymbolicName) -              if (FoundIndex == e) { -                FoundIndex = i; -                break; -              } +namespace { +class SCEVInitRewriter : public SCEVRewriteVisitor<SCEVInitRewriter> { +public: +  static const SCEV *rewrite(const SCEV *Scev, const Loop *L, +                             ScalarEvolution &SE) { +    SCEVInitRewriter Rewriter(L, SE); +    const SCEV *Result = Rewriter.visit(Scev); +    return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); +  } -          if (FoundIndex != Add->getNumOperands()) { -            // Create an add with everything but the specified operand. -            SmallVector<const SCEV *, 8> Ops; -            for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) -              if (i != FoundIndex) -                Ops.push_back(Add->getOperand(i)); -            const SCEV *Accum = getAddExpr(Ops); - -            // This is not a valid addrec if the step amount is varying each -            // loop iteration, but is not itself an addrec in this loop. -            if (isLoopInvariant(Accum, L) || -                (isa<SCEVAddRecExpr>(Accum) && -                 cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { -              SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; - -              // If the increment doesn't overflow, then neither the addrec nor -              // the post-increment will overflow. -              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { -                if (OBO->getOperand(0) == PN) { -                  if (OBO->hasNoUnsignedWrap()) -                    Flags = setFlags(Flags, SCEV::FlagNUW); -                  if (OBO->hasNoSignedWrap()) -                    Flags = setFlags(Flags, SCEV::FlagNSW); -                } -              } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { -                // If the increment is an inbounds GEP, then we know the address -                // space cannot be wrapped around. We cannot make any guarantee -                // about signed or unsigned overflow because pointers are -                // unsigned but we may have a negative index from the base -                // pointer. We can guarantee that no unsigned wrap occurs if the -                // indices form a positive value. -                if (GEP->isInBounds() && GEP->getOperand(0) == PN) { -                  Flags = setFlags(Flags, SCEV::FlagNW); - -                  const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); -                  if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) -                    Flags = setFlags(Flags, SCEV::FlagNUW); -                } +  SCEVInitRewriter(const Loop *L, ScalarEvolution &SE) +      : SCEVRewriteVisitor(SE), L(L), Valid(true) {} -                // We cannot transfer nuw and nsw flags from subtraction -                // operations -- sub nuw X, Y is not the same as add nuw X, -Y -                // for instance. -              } +  const SCEV *visitUnknown(const SCEVUnknown *Expr) { +    if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) +      Valid = false; +    return Expr; +  } -              const SCEV *StartVal = getSCEV(StartValueV); -              const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); - -              // Since the no-wrap flags are on the increment, they apply to the -              // post-incremented value as well. -              if (isLoopInvariant(Accum, L)) -                (void)getAddRecExpr(getAddExpr(StartVal, Accum), -                                    Accum, L, Flags); - -              // Okay, for the entire analysis of this edge we assumed the PHI -              // to be symbolic.  We now need to go back and purge all of the -              // entries for the scalars that use the symbolic expression. -              ForgetSymbolicName(PN, SymbolicName); -              ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; -              return PHISCEV; -            } +  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { +    // Only allow AddRecExprs for this loop. +    if (Expr->getLoop() == L) +      return Expr->getStart(); +    Valid = false; +    return Expr; +  } + +  bool isValid() { return Valid; } + +private: +  const Loop *L; +  bool Valid; +}; + +class SCEVShiftRewriter : public SCEVRewriteVisitor<SCEVShiftRewriter> { +public: +  static const SCEV *rewrite(const SCEV *Scev, const Loop *L, +                             ScalarEvolution &SE) { +    SCEVShiftRewriter Rewriter(L, SE); +    const SCEV *Result = Rewriter.visit(Scev); +    return Rewriter.isValid() ? Result : SE.getCouldNotCompute(); +  } + +  SCEVShiftRewriter(const Loop *L, ScalarEvolution &SE) +      : SCEVRewriteVisitor(SE), L(L), Valid(true) {} + +  const SCEV *visitUnknown(const SCEVUnknown *Expr) { +    // Only allow AddRecExprs for this loop. +    if (!(SE.getLoopDisposition(Expr, L) == ScalarEvolution::LoopInvariant)) +      Valid = false; +    return Expr; +  } + +  const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { +    if (Expr->getLoop() == L && Expr->isAffine()) +      return SE.getMinusSCEV(Expr, Expr->getStepRecurrence(SE)); +    Valid = false; +    return Expr; +  } +  bool isValid() { return Valid; } + +private: +  const Loop *L; +  bool Valid; +}; +} // end anonymous namespace + +const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) { +  const Loop *L = LI.getLoopFor(PN->getParent()); +  if (!L || L->getHeader() != PN->getParent()) +    return nullptr; + +  // The loop may have multiple entrances or multiple exits; we can analyze +  // this phi as an addrec if it has a unique entry value and a unique +  // backedge value. +  Value *BEValueV = nullptr, *StartValueV = nullptr; +  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { +    Value *V = PN->getIncomingValue(i); +    if (L->contains(PN->getIncomingBlock(i))) { +      if (!BEValueV) { +        BEValueV = V; +      } else if (BEValueV != V) { +        BEValueV = nullptr; +        break; +      } +    } else if (!StartValueV) { +      StartValueV = V; +    } else if (StartValueV != V) { +      StartValueV = nullptr; +      break; +    } +  } +  if (BEValueV && StartValueV) { +    // While we are analyzing this PHI node, handle its value symbolically. +    const SCEV *SymbolicName = getUnknown(PN); +    assert(ValueExprMap.find_as(PN) == ValueExprMap.end() && +           "PHI node already processed?"); +    ValueExprMap.insert(std::make_pair(SCEVCallbackVH(PN, this), SymbolicName)); + +    // Using this symbolic name for the PHI, analyze the value coming around +    // the back-edge. +    const SCEV *BEValue = getSCEV(BEValueV); + +    // NOTE: If BEValue is loop invariant, we know that the PHI node just +    // has a special value for the first iteration of the loop. + +    // If the value coming around the backedge is an add with the symbolic +    // value we just inserted, then we found a simple induction variable! +    if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(BEValue)) { +      // If there is a single occurrence of the symbolic value, replace it +      // with a recurrence. +      unsigned FoundIndex = Add->getNumOperands(); +      for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) +        if (Add->getOperand(i) == SymbolicName) +          if (FoundIndex == e) { +            FoundIndex = i; +            break;            } -        } else if (const SCEVAddRecExpr *AddRec = -                     dyn_cast<SCEVAddRecExpr>(BEValue)) { -          // Otherwise, this could be a loop like this: -          //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; } -          // In this case, j = {1,+,1}  and BEValue is j. -          // Because the other in-value of i (0) fits the evolution of BEValue -          // i really is an addrec evolution. -          if (AddRec->getLoop() == L && AddRec->isAffine()) { -            const SCEV *StartVal = getSCEV(StartValueV); - -            // If StartVal = j.start - j.stride, we can use StartVal as the -            // initial step of the addrec evolution. -            if (StartVal == getMinusSCEV(AddRec->getOperand(0), -                                         AddRec->getOperand(1))) { -              // FIXME: For constant StartVal, we should be able to infer -              // no-wrap flags. -              const SCEV *PHISCEV = -                getAddRecExpr(StartVal, AddRec->getOperand(1), L, -                              SCEV::FlagAnyWrap); - -              // Okay, for the entire analysis of this edge we assumed the PHI -              // to be symbolic.  We now need to go back and purge all of the -              // entries for the scalars that use the symbolic expression. -              ForgetSymbolicName(PN, SymbolicName); -              ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; -              return PHISCEV; + +      if (FoundIndex != Add->getNumOperands()) { +        // Create an add with everything but the specified operand. +        SmallVector<const SCEV *, 8> Ops; +        for (unsigned i = 0, e = Add->getNumOperands(); i != e; ++i) +          if (i != FoundIndex) +            Ops.push_back(Add->getOperand(i)); +        const SCEV *Accum = getAddExpr(Ops); + +        // This is not a valid addrec if the step amount is varying each +        // loop iteration, but is not itself an addrec in this loop. +        if (isLoopInvariant(Accum, L) || +            (isa<SCEVAddRecExpr>(Accum) && +             cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) { +          SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; + +          // If the increment doesn't overflow, then neither the addrec nor +          // the post-increment will overflow. +          if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV)) { +            if (OBO->getOperand(0) == PN) { +              if (OBO->hasNoUnsignedWrap()) +                Flags = setFlags(Flags, SCEV::FlagNUW); +              if (OBO->hasNoSignedWrap()) +                Flags = setFlags(Flags, SCEV::FlagNSW); +            } +          } else if (GEPOperator *GEP = dyn_cast<GEPOperator>(BEValueV)) { +            // If the increment is an inbounds GEP, then we know the address +            // space cannot be wrapped around. We cannot make any guarantee +            // about signed or unsigned overflow because pointers are +            // unsigned but we may have a negative index from the base +            // pointer. We can guarantee that no unsigned wrap occurs if the +            // indices form a positive value. +            if (GEP->isInBounds() && GEP->getOperand(0) == PN) { +              Flags = setFlags(Flags, SCEV::FlagNW); + +              const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); +              if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) +                Flags = setFlags(Flags, SCEV::FlagNUW);              } + +            // We cannot transfer nuw and nsw flags from subtraction +            // operations -- sub nuw X, Y is not the same as add nuw X, -Y +            // for instance.            } + +          const SCEV *StartVal = getSCEV(StartValueV); +          const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags); + +          // Since the no-wrap flags are on the increment, they apply to the +          // post-incremented value as well. +          if (isLoopInvariant(Accum, L)) +            (void)getAddRecExpr(getAddExpr(StartVal, Accum), Accum, L, Flags); + +          // Okay, for the entire analysis of this edge we assumed the PHI +          // to be symbolic.  We now need to go back and purge all of the +          // entries for the scalars that use the symbolic expression. +          ForgetSymbolicName(PN, SymbolicName); +          ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV; +          return PHISCEV; +        } +      } +    } else { +      // Otherwise, this could be a loop like this: +      //     i = 0;  for (j = 1; ..; ++j) { ....  i = j; } +      // In this case, j = {1,+,1}  and BEValue is j. +      // Because the other in-value of i (0) fits the evolution of BEValue +      // i really is an addrec evolution. +      // +      // We can generalize this saying that i is the shifted value of BEValue +      // by one iteration: +      //   PHI(f(0), f({1,+,1})) --> f({0,+,1}) +      const SCEV *Shifted = SCEVShiftRewriter::rewrite(BEValue, L, *this); +      const SCEV *Start = SCEVInitRewriter::rewrite(Shifted, L, *this); +      if (Shifted != getCouldNotCompute() && +          Start != getCouldNotCompute()) { +        const SCEV *StartVal = getSCEV(StartValueV); +        if (Start == StartVal) { +          // Okay, for the entire analysis of this edge we assumed the PHI +          // to be symbolic.  We now need to go back and purge all of the +          // entries for the scalars that use the symbolic expression. +          ForgetSymbolicName(PN, SymbolicName); +          ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted; +          return Shifted;          }        }      } +  } + +  return nullptr; +} + +// Checks if the SCEV S is available at BB.  S is considered available at BB +// if S can be materialized at BB without introducing a fault. +static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, +                               BasicBlock *BB) { +  struct CheckAvailable { +    bool TraversalDone = false; +    bool Available = true; + +    const Loop *L = nullptr;  // The loop BB is in (can be nullptr) +    BasicBlock *BB = nullptr; +    DominatorTree &DT; + +    CheckAvailable(const Loop *L, BasicBlock *BB, DominatorTree &DT) +      : L(L), BB(BB), DT(DT) {} + +    bool setUnavailable() { +      TraversalDone = true; +      Available = false; +      return false; +    } + +    bool follow(const SCEV *S) { +      switch (S->getSCEVType()) { +      case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: +      case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: +        // These expressions are available if their operand(s) is/are. +        return true; + +      case scAddRecExpr: { +        // We allow add recurrences that are on the loop BB is in, or some +        // outer loop.  This guarantees availability because the value of the +        // add recurrence at BB is simply the "current" value of the induction +        // variable.  We can relax this in the future; for instance an add +        // recurrence on a sibling dominating loop is also available at BB. +        const auto *ARLoop = cast<SCEVAddRecExpr>(S)->getLoop(); +        if (L && (ARLoop == L || ARLoop->contains(L))) +          return true; + +        return setUnavailable(); +      } + +      case scUnknown: { +        // For SCEVUnknown, we check for simple dominance. +        const auto *SU = cast<SCEVUnknown>(S); +        Value *V = SU->getValue(); + +        if (isa<Argument>(V)) +          return false; + +        if (isa<Instruction>(V) && DT.dominates(cast<Instruction>(V), BB)) +          return false; + +        return setUnavailable(); +      } + +      case scUDivExpr: +      case scCouldNotCompute: +        // We do not try to smart about these at all. +        return setUnavailable(); +      } +      llvm_unreachable("switch should be fully covered!"); +    } + +    bool isDone() { return TraversalDone; } +  }; + +  CheckAvailable CA(L, BB, DT); +  SCEVTraversal<CheckAvailable> ST(CA); + +  ST.visitAll(S); +  return CA.Available; +} + +// Try to match a control flow sequence that branches out at BI and merges back +// at Merge into a "C ? LHS : RHS" select pattern.  Return true on a successful +// match. +static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge, +                          Value *&C, Value *&LHS, Value *&RHS) { +  C = BI->getCondition(); + +  BasicBlockEdge LeftEdge(BI->getParent(), BI->getSuccessor(0)); +  BasicBlockEdge RightEdge(BI->getParent(), BI->getSuccessor(1)); + +  if (!LeftEdge.isSingleEdge()) +    return false; + +  assert(RightEdge.isSingleEdge() && "Follows from LeftEdge.isSingleEdge()"); + +  Use &LeftUse = Merge->getOperandUse(0); +  Use &RightUse = Merge->getOperandUse(1); + +  if (DT.dominates(LeftEdge, LeftUse) && DT.dominates(RightEdge, RightUse)) { +    LHS = LeftUse; +    RHS = RightUse; +    return true; +  } + +  if (DT.dominates(LeftEdge, RightUse) && DT.dominates(RightEdge, LeftUse)) { +    LHS = RightUse; +    RHS = LeftUse; +    return true; +  } + +  return false; +} + +const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) { +  if (PN->getNumIncomingValues() == 2) { +    const Loop *L = LI.getLoopFor(PN->getParent()); + +    // We don't want to break LCSSA, even in a SCEV expression tree. +    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) +      if (LI.getLoopFor(PN->getIncomingBlock(i)) != L) +        return nullptr; + +    // Try to match +    // +    //  br %cond, label %left, label %right +    // left: +    //  br label %merge +    // right: +    //  br label %merge +    // merge: +    //  V = phi [ %x, %left ], [ %y, %right ] +    // +    // as "select %cond, %x, %y" + +    BasicBlock *IDom = DT[PN->getParent()]->getIDom()->getBlock(); +    assert(IDom && "At least the entry block should dominate PN"); + +    auto *BI = dyn_cast<BranchInst>(IDom->getTerminator()); +    Value *Cond = nullptr, *LHS = nullptr, *RHS = nullptr; + +    if (BI && BI->isConditional() && +        BrPHIToSelect(DT, BI, PN, Cond, LHS, RHS) && +        IsAvailableOnEntry(L, DT, getSCEV(LHS), PN->getParent()) && +        IsAvailableOnEntry(L, DT, getSCEV(RHS), PN->getParent())) +      return createNodeForSelectOrPHI(PN, Cond, LHS, RHS); +  } + +  return nullptr; +} + +const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { +  if (const SCEV *S = createAddRecFromPHI(PN)) +    return S; + +  if (const SCEV *S = createNodeFromSelectLikePHI(PN)) +    return S;    // If the PHI has a single incoming value, follow that value, unless the    // PHI's incoming blocks are in a different loop, in which case doing so    // risks breaking LCSSA form. Instcombine would normally zap these, but    // it doesn't have DominatorTree information, so it may miss cases. -  if (Value *V = -          SimplifyInstruction(PN, F->getParent()->getDataLayout(), TLI, DT, AC)) -    if (LI->replacementPreservesLCSSAForm(PN, V)) +  if (Value *V = SimplifyInstruction(PN, getDataLayout(), &TLI, &DT, &AC)) +    if (LI.replacementPreservesLCSSAForm(PN, V))        return getSCEV(V);    // If it's not a loop phi, we can't handle it yet.    return getUnknown(PN);  } +const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I, +                                                      Value *Cond, +                                                      Value *TrueVal, +                                                      Value *FalseVal) { +  // Handle "constant" branch or select. This can occur for instance when a +  // loop pass transforms an inner loop and moves on to process the outer loop. +  if (auto *CI = dyn_cast<ConstantInt>(Cond)) +    return getSCEV(CI->isOne() ? TrueVal : FalseVal); + +  // Try to match some simple smax or umax patterns. +  auto *ICI = dyn_cast<ICmpInst>(Cond); +  if (!ICI) +    return getUnknown(I); + +  Value *LHS = ICI->getOperand(0); +  Value *RHS = ICI->getOperand(1); + +  switch (ICI->getPredicate()) { +  case ICmpInst::ICMP_SLT: +  case ICmpInst::ICMP_SLE: +    std::swap(LHS, RHS); +  // fall through +  case ICmpInst::ICMP_SGT: +  case ICmpInst::ICMP_SGE: +    // a >s b ? a+x : b+x  ->  smax(a, b)+x +    // a >s b ? b+x : a+x  ->  smin(a, b)+x +    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { +      const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), I->getType()); +      const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), I->getType()); +      const SCEV *LA = getSCEV(TrueVal); +      const SCEV *RA = getSCEV(FalseVal); +      const SCEV *LDiff = getMinusSCEV(LA, LS); +      const SCEV *RDiff = getMinusSCEV(RA, RS); +      if (LDiff == RDiff) +        return getAddExpr(getSMaxExpr(LS, RS), LDiff); +      LDiff = getMinusSCEV(LA, RS); +      RDiff = getMinusSCEV(RA, LS); +      if (LDiff == RDiff) +        return getAddExpr(getSMinExpr(LS, RS), LDiff); +    } +    break; +  case ICmpInst::ICMP_ULT: +  case ICmpInst::ICMP_ULE: +    std::swap(LHS, RHS); +  // fall through +  case ICmpInst::ICMP_UGT: +  case ICmpInst::ICMP_UGE: +    // a >u b ? a+x : b+x  ->  umax(a, b)+x +    // a >u b ? b+x : a+x  ->  umin(a, b)+x +    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType())) { +      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); +      const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), I->getType()); +      const SCEV *LA = getSCEV(TrueVal); +      const SCEV *RA = getSCEV(FalseVal); +      const SCEV *LDiff = getMinusSCEV(LA, LS); +      const SCEV *RDiff = getMinusSCEV(RA, RS); +      if (LDiff == RDiff) +        return getAddExpr(getUMaxExpr(LS, RS), LDiff); +      LDiff = getMinusSCEV(LA, RS); +      RDiff = getMinusSCEV(RA, LS); +      if (LDiff == RDiff) +        return getAddExpr(getUMinExpr(LS, RS), LDiff); +    } +    break; +  case ICmpInst::ICMP_NE: +    // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x +    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && +        isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { +      const SCEV *One = getOne(I->getType()); +      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); +      const SCEV *LA = getSCEV(TrueVal); +      const SCEV *RA = getSCEV(FalseVal); +      const SCEV *LDiff = getMinusSCEV(LA, LS); +      const SCEV *RDiff = getMinusSCEV(RA, One); +      if (LDiff == RDiff) +        return getAddExpr(getUMaxExpr(One, LS), LDiff); +    } +    break; +  case ICmpInst::ICMP_EQ: +    // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x +    if (getTypeSizeInBits(LHS->getType()) <= getTypeSizeInBits(I->getType()) && +        isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { +      const SCEV *One = getOne(I->getType()); +      const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), I->getType()); +      const SCEV *LA = getSCEV(TrueVal); +      const SCEV *RA = getSCEV(FalseVal); +      const SCEV *LDiff = getMinusSCEV(LA, One); +      const SCEV *RDiff = getMinusSCEV(RA, LS); +      if (LDiff == RDiff) +        return getAddExpr(getUMaxExpr(One, LS), LDiff); +    } +    break; +  default: +    break; +  } + +  return getUnknown(I); +} +  /// createNodeForGEP - Expand GEP instructions into add and multiply  /// operations. This allows them to be analyzed by regular SCEV code.  /// @@ -3769,7 +4106,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {  uint32_t  ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) -    return C->getValue()->getValue().countTrailingZeros(); +    return C->getAPInt().countTrailingZeros();    if (const SCEVTruncateExpr *T = dyn_cast<SCEVTruncateExpr>(S))      return std::min(GetMinTrailingZeros(T->getOperand()), @@ -3834,8 +4171,8 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {      // For a SCEVUnknown, ask ValueTracking.      unsigned BitWidth = getTypeSizeInBits(U->getType());      APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); -    computeKnownBits(U->getValue(), Zeros, Ones, -                     F->getParent()->getDataLayout(), 0, AC, nullptr, DT); +    computeKnownBits(U->getValue(), Zeros, Ones, getDataLayout(), 0, &AC, +                     nullptr, &DT);      return Zeros.countTrailingOnes();    } @@ -3846,26 +4183,9 @@ ScalarEvolution::GetMinTrailingZeros(const SCEV *S) {  /// GetRangeFromMetadata - Helper method to assign a range to V from  /// metadata present in the IR.  static Optional<ConstantRange> GetRangeFromMetadata(Value *V) { -  if (Instruction *I = dyn_cast<Instruction>(V)) { -    if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) { -      ConstantRange TotalRange( -          cast<IntegerType>(I->getType())->getBitWidth(), false); - -      unsigned NumRanges = MD->getNumOperands() / 2; -      assert(NumRanges >= 1); - -      for (unsigned i = 0; i < NumRanges; ++i) { -        ConstantInt *Lower = -            mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 0)); -        ConstantInt *Upper = -            mdconst::extract<ConstantInt>(MD->getOperand(2 * i + 1)); -        ConstantRange Range(Lower->getValue(), Upper->getValue()); -        TotalRange = TotalRange.unionWith(Range); -      } - -      return TotalRange; -    } -  } +  if (Instruction *I = dyn_cast<Instruction>(V)) +    if (MDNode *MD = I->getMetadata(LLVMContext::MD_range)) +      return getConstantRangeFromMetadata(*MD);    return None;  } @@ -3887,7 +4207,7 @@ ScalarEvolution::getRange(const SCEV *S,      return I->second;    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) -    return setRange(C, SignHint, ConstantRange(C->getValue()->getValue())); +    return setRange(C, SignHint, ConstantRange(C->getAPInt()));    unsigned BitWidth = getTypeSizeInBits(S->getType());    ConstantRange ConservativeResult(BitWidth, /*isFullSet=*/true); @@ -3965,9 +4285,8 @@ ScalarEvolution::getRange(const SCEV *S,      if (AddRec->getNoWrapFlags(SCEV::FlagNUW))        if (const SCEVConstant *C = dyn_cast<SCEVConstant>(AddRec->getStart()))          if (!C->getValue()->isZero()) -          ConservativeResult = -            ConservativeResult.intersectWith( -              ConstantRange(C->getValue()->getValue(), APInt(BitWidth, 0))); +          ConservativeResult = ConservativeResult.intersectWith( +              ConstantRange(C->getAPInt(), APInt(BitWidth, 0)));      // If there's no signed wrap, and all the operands have the same sign or      // zero, the value won't ever change sign. @@ -4065,18 +4384,18 @@ ScalarEvolution::getRange(const SCEV *S,      // Split here to avoid paying the compile-time cost of calling both      // computeKnownBits and ComputeNumSignBits.  This restriction can be lifted      // if needed. -    const DataLayout &DL = F->getParent()->getDataLayout(); +    const DataLayout &DL = getDataLayout();      if (SignHint == ScalarEvolution::HINT_RANGE_UNSIGNED) {        // For a SCEVUnknown, ask ValueTracking.        APInt Zeros(BitWidth, 0), Ones(BitWidth, 0); -      computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, AC, nullptr, DT); +      computeKnownBits(U->getValue(), Zeros, Ones, DL, 0, &AC, nullptr, &DT);        if (Ones != ~Zeros + 1)          ConservativeResult =              ConservativeResult.intersectWith(ConstantRange(Ones, ~Zeros + 1));      } else {        assert(SignHint == ScalarEvolution::HINT_RANGE_SIGNED &&               "generalize as needed!"); -      unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, AC, nullptr, DT); +      unsigned NS = ComputeNumSignBits(U->getValue(), DL, 0, &AC, nullptr, &DT);        if (NS > 1)          ConservativeResult = ConservativeResult.intersectWith(              ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), @@ -4089,8 +4408,64 @@ ScalarEvolution::getRange(const SCEV *S,    return setRange(S, SignHint, ConservativeResult);  } -/// createSCEV - We know that there is no SCEV for the specified value. -/// Analyze the expression. +SCEV::NoWrapFlags ScalarEvolution::getNoWrapFlagsFromUB(const Value *V) { +  if (isa<ConstantExpr>(V)) return SCEV::FlagAnyWrap; +  const BinaryOperator *BinOp = cast<BinaryOperator>(V); + +  // Return early if there are no flags to propagate to the SCEV. +  SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap; +  if (BinOp->hasNoUnsignedWrap()) +    Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNUW); +  if (BinOp->hasNoSignedWrap()) +    Flags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW); +  if (Flags == SCEV::FlagAnyWrap) { +    return SCEV::FlagAnyWrap; +  } + +  // Here we check that BinOp is in the header of the innermost loop +  // containing BinOp, since we only deal with instructions in the loop +  // header. The actual loop we need to check later will come from an add +  // recurrence, but getting that requires computing the SCEV of the operands, +  // which can be expensive. This check we can do cheaply to rule out some +  // cases early. +  Loop *innermostContainingLoop = LI.getLoopFor(BinOp->getParent()); +  if (innermostContainingLoop == nullptr || +      innermostContainingLoop->getHeader() != BinOp->getParent()) +    return SCEV::FlagAnyWrap; + +  // Only proceed if we can prove that BinOp does not yield poison. +  if (!isKnownNotFullPoison(BinOp)) return SCEV::FlagAnyWrap; + +  // At this point we know that if V is executed, then it does not wrap +  // according to at least one of NSW or NUW. If V is not executed, then we do +  // not know if the calculation that V represents would wrap. Multiple +  // instructions can map to the same SCEV. If we apply NSW or NUW from V to +  // the SCEV, we must guarantee no wrapping for that SCEV also when it is +  // derived from other instructions that map to the same SCEV. We cannot make +  // that guarantee for cases where V is not executed. So we need to find the +  // loop that V is considered in relation to and prove that V is executed for +  // every iteration of that loop. That implies that the value that V +  // calculates does not wrap anywhere in the loop, so then we can apply the +  // flags to the SCEV. +  // +  // We check isLoopInvariant to disambiguate in case we are adding two +  // recurrences from different loops, so that we know which loop to prove +  // that V is executed in. +  for (int OpIndex = 0; OpIndex < 2; ++OpIndex) { +    const SCEV *Op = getSCEV(BinOp->getOperand(OpIndex)); +    if (auto *AddRec = dyn_cast<SCEVAddRecExpr>(Op)) { +      const int OtherOpIndex = 1 - OpIndex; +      const SCEV *OtherOp = getSCEV(BinOp->getOperand(OtherOpIndex)); +      if (isLoopInvariant(OtherOp, AddRec->getLoop()) && +          isGuaranteedToExecuteForEveryIteration(BinOp, AddRec->getLoop())) +        return Flags; +    } +  } +  return SCEV::FlagAnyWrap; +} + +/// createSCEV - We know that there is no SCEV for the specified value.  Analyze +/// the expression.  ///  const SCEV *ScalarEvolution::createSCEV(Value *V) {    if (!isSCEVable(V->getType())) @@ -4104,14 +4479,14 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {      // reachable. Such instructions don't matter, and they aren't required      // to obey basic rules for definitions dominating uses which this      // analysis depends on. -    if (!DT->isReachableFromEntry(I->getParent())) +    if (!DT.isReachableFromEntry(I->getParent()))        return getUnknown(V);    } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))      Opcode = CE->getOpcode();    else if (ConstantInt *CI = dyn_cast<ConstantInt>(V))      return getConstant(CI);    else if (isa<ConstantPointerNull>(V)) -    return getConstant(V->getType(), 0); +    return getZero(V->getType());    else if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V))      return GA->mayBeOverridden() ? getUnknown(V) : getSCEV(GA->getAliasee());    else @@ -4126,47 +4501,79 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {      // because it leads to N-1 getAddExpr calls for N ultimate operands.      // Instead, gather up all the operands and make a single getAddExpr call.      // LLVM IR canonical form means we need only traverse the left operands. -    // -    // Don't apply this instruction's NSW or NUW flags to the new -    // expression. The instruction may be guarded by control flow that the -    // no-wrap behavior depends on. Non-control-equivalent instructions can be -    // mapped to the same SCEV expression, and it would be incorrect to transfer -    // NSW/NUW semantics to those operations.      SmallVector<const SCEV *, 4> AddOps; -    AddOps.push_back(getSCEV(U->getOperand(1))); -    for (Value *Op = U->getOperand(0); ; Op = U->getOperand(0)) { -      unsigned Opcode = Op->getValueID() - Value::InstructionVal; -      if (Opcode != Instruction::Add && Opcode != Instruction::Sub) +    for (Value *Op = U;; Op = U->getOperand(0)) { +      U = dyn_cast<Operator>(Op); +      unsigned Opcode = U ? U->getOpcode() : 0; +      if (!U || (Opcode != Instruction::Add && Opcode != Instruction::Sub)) { +        assert(Op != V && "V should be an add"); +        AddOps.push_back(getSCEV(Op)); +        break; +      } + +      if (auto *OpSCEV = getExistingSCEV(U)) { +        AddOps.push_back(OpSCEV); +        break; +      } + +      // If a NUW or NSW flag can be applied to the SCEV for this +      // addition, then compute the SCEV for this addition by itself +      // with a separate call to getAddExpr. We need to do that +      // instead of pushing the operands of the addition onto AddOps, +      // since the flags are only known to apply to this particular +      // addition - they may not apply to other additions that can be +      // formed with operands from AddOps. +      const SCEV *RHS = getSCEV(U->getOperand(1)); +      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U); +      if (Flags != SCEV::FlagAnyWrap) { +        const SCEV *LHS = getSCEV(U->getOperand(0)); +        if (Opcode == Instruction::Sub) +          AddOps.push_back(getMinusSCEV(LHS, RHS, Flags)); +        else +          AddOps.push_back(getAddExpr(LHS, RHS, Flags));          break; -      U = cast<Operator>(Op); -      const SCEV *Op1 = getSCEV(U->getOperand(1)); +      } +        if (Opcode == Instruction::Sub) -        AddOps.push_back(getNegativeSCEV(Op1)); +        AddOps.push_back(getNegativeSCEV(RHS));        else -        AddOps.push_back(Op1); +        AddOps.push_back(RHS);      } -    AddOps.push_back(getSCEV(U->getOperand(0)));      return getAddExpr(AddOps);    } +    case Instruction::Mul: { -    // Don't transfer NSW/NUW for the same reason as AddExpr.      SmallVector<const SCEV *, 4> MulOps; -    MulOps.push_back(getSCEV(U->getOperand(1))); -    for (Value *Op = U->getOperand(0); -         Op->getValueID() == Instruction::Mul + Value::InstructionVal; -         Op = U->getOperand(0)) { -      U = cast<Operator>(Op); +    for (Value *Op = U;; Op = U->getOperand(0)) { +      U = dyn_cast<Operator>(Op); +      if (!U || U->getOpcode() != Instruction::Mul) { +        assert(Op != V && "V should be a mul"); +        MulOps.push_back(getSCEV(Op)); +        break; +      } + +      if (auto *OpSCEV = getExistingSCEV(U)) { +        MulOps.push_back(OpSCEV); +        break; +      } + +      SCEV::NoWrapFlags Flags = getNoWrapFlagsFromUB(U); +      if (Flags != SCEV::FlagAnyWrap) { +        MulOps.push_back(getMulExpr(getSCEV(U->getOperand(0)), +                                    getSCEV(U->getOperand(1)), Flags)); +        break; +      } +        MulOps.push_back(getSCEV(U->getOperand(1)));      } -    MulOps.push_back(getSCEV(U->getOperand(0)));      return getMulExpr(MulOps);    }    case Instruction::UDiv:      return getUDivExpr(getSCEV(U->getOperand(0)),                         getSCEV(U->getOperand(1)));    case Instruction::Sub: -    return getMinusSCEV(getSCEV(U->getOperand(0)), -                        getSCEV(U->getOperand(1))); +    return getMinusSCEV(getSCEV(U->getOperand(0)), getSCEV(U->getOperand(1)), +                        getNoWrapFlagsFromUB(U));    case Instruction::And:      // For an expression like x&255 that merely masks off the high bits,      // use zext(trunc(x)) as the SCEV expression. @@ -4185,8 +4592,8 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {        unsigned TZ = A.countTrailingZeros();        unsigned BitWidth = A.getBitWidth();        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); -      computeKnownBits(U->getOperand(0), KnownZero, KnownOne, -                       F->getParent()->getDataLayout(), 0, AC, nullptr, DT); +      computeKnownBits(U->getOperand(0), KnownZero, KnownOne, getDataLayout(), +                       0, &AC, nullptr, &DT);        APInt EffectiveMask =            APInt::getLowBitsSet(BitWidth, BitWidth - LZ - TZ).shl(TZ); @@ -4286,9 +4693,18 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {        if (SA->getValue().uge(BitWidth))          break; +      // It is currently not resolved how to interpret NSW for left +      // shift by BitWidth - 1, so we avoid applying flags in that +      // case. Remove this check (or this comment) once the situation +      // is resolved. See +      // http://lists.llvm.org/pipermail/llvm-dev/2015-April/084195.html +      // and http://reviews.llvm.org/D8890 . +      auto Flags = SCEV::FlagAnyWrap; +      if (SA->getValue().ult(BitWidth - 1)) Flags = getNoWrapFlagsFromUB(U); +        Constant *X = ConstantInt::get(getContext(),          APInt::getOneBitSet(BitWidth, SA->getZExtValue())); -      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); +      return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X), Flags);      }      break; @@ -4363,94 +4779,13 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {      return createNodeForPHI(cast<PHINode>(U));    case Instruction::Select: -    // This could be a smax or umax that was lowered earlier. -    // Try to recover it. -    if (ICmpInst *ICI = dyn_cast<ICmpInst>(U->getOperand(0))) { -      Value *LHS = ICI->getOperand(0); -      Value *RHS = ICI->getOperand(1); -      switch (ICI->getPredicate()) { -      case ICmpInst::ICMP_SLT: -      case ICmpInst::ICMP_SLE: -        std::swap(LHS, RHS); -        // fall through -      case ICmpInst::ICMP_SGT: -      case ICmpInst::ICMP_SGE: -        // a >s b ? a+x : b+x  ->  smax(a, b)+x -        // a >s b ? b+x : a+x  ->  smin(a, b)+x -        if (getTypeSizeInBits(LHS->getType()) <= -            getTypeSizeInBits(U->getType())) { -          const SCEV *LS = getNoopOrSignExtend(getSCEV(LHS), U->getType()); -          const SCEV *RS = getNoopOrSignExtend(getSCEV(RHS), U->getType()); -          const SCEV *LA = getSCEV(U->getOperand(1)); -          const SCEV *RA = getSCEV(U->getOperand(2)); -          const SCEV *LDiff = getMinusSCEV(LA, LS); -          const SCEV *RDiff = getMinusSCEV(RA, RS); -          if (LDiff == RDiff) -            return getAddExpr(getSMaxExpr(LS, RS), LDiff); -          LDiff = getMinusSCEV(LA, RS); -          RDiff = getMinusSCEV(RA, LS); -          if (LDiff == RDiff) -            return getAddExpr(getSMinExpr(LS, RS), LDiff); -        } -        break; -      case ICmpInst::ICMP_ULT: -      case ICmpInst::ICMP_ULE: -        std::swap(LHS, RHS); -        // fall through -      case ICmpInst::ICMP_UGT: -      case ICmpInst::ICMP_UGE: -        // a >u b ? a+x : b+x  ->  umax(a, b)+x -        // a >u b ? b+x : a+x  ->  umin(a, b)+x -        if (getTypeSizeInBits(LHS->getType()) <= -            getTypeSizeInBits(U->getType())) { -          const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); -          const SCEV *RS = getNoopOrZeroExtend(getSCEV(RHS), U->getType()); -          const SCEV *LA = getSCEV(U->getOperand(1)); -          const SCEV *RA = getSCEV(U->getOperand(2)); -          const SCEV *LDiff = getMinusSCEV(LA, LS); -          const SCEV *RDiff = getMinusSCEV(RA, RS); -          if (LDiff == RDiff) -            return getAddExpr(getUMaxExpr(LS, RS), LDiff); -          LDiff = getMinusSCEV(LA, RS); -          RDiff = getMinusSCEV(RA, LS); -          if (LDiff == RDiff) -            return getAddExpr(getUMinExpr(LS, RS), LDiff); -        } -        break; -      case ICmpInst::ICMP_NE: -        // n != 0 ? n+x : 1+x  ->  umax(n, 1)+x -        if (getTypeSizeInBits(LHS->getType()) <= -                getTypeSizeInBits(U->getType()) && -            isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { -          const SCEV *One = getConstant(U->getType(), 1); -          const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); -          const SCEV *LA = getSCEV(U->getOperand(1)); -          const SCEV *RA = getSCEV(U->getOperand(2)); -          const SCEV *LDiff = getMinusSCEV(LA, LS); -          const SCEV *RDiff = getMinusSCEV(RA, One); -          if (LDiff == RDiff) -            return getAddExpr(getUMaxExpr(One, LS), LDiff); -        } -        break; -      case ICmpInst::ICMP_EQ: -        // n == 0 ? 1+x : n+x  ->  umax(n, 1)+x -        if (getTypeSizeInBits(LHS->getType()) <= -                getTypeSizeInBits(U->getType()) && -            isa<ConstantInt>(RHS) && cast<ConstantInt>(RHS)->isZero()) { -          const SCEV *One = getConstant(U->getType(), 1); -          const SCEV *LS = getNoopOrZeroExtend(getSCEV(LHS), U->getType()); -          const SCEV *LA = getSCEV(U->getOperand(1)); -          const SCEV *RA = getSCEV(U->getOperand(2)); -          const SCEV *LDiff = getMinusSCEV(LA, One); -          const SCEV *RDiff = getMinusSCEV(RA, LS); -          if (LDiff == RDiff) -            return getAddExpr(getUMaxExpr(One, LS), LDiff); -        } -        break; -      default: -        break; -      } -    } +    // U can also be a select constant expr, which let fall through.  Since +    // createNodeForSelect only works for a condition that is an `ICmpInst`, and +    // constant expressions cannot have instructions as operands, we'd have +    // returned getUnknown for a select constant expressions anyway. +    if (isa<Instruction>(U)) +      return createNodeForSelectOrPHI(cast<Instruction>(U), U->getOperand(0), +                                      U->getOperand(1), U->getOperand(2));    default: // We cannot analyze this expression.      break; @@ -4534,8 +4869,7 @@ ScalarEvolution::getSmallConstantTripMultiple(Loop *L,      return 1;    // Get the trip count from the BE count by adding 1. -  const SCEV *TCMul = getAddExpr(ExitCount, -                                 getConstant(ExitCount->getType(), 1)); +  const SCEV *TCMul = getAddExpr(ExitCount, getOne(ExitCount->getType()));    // FIXME: SCEV distributes multiplication as V1*C1 + V2*C1. We could attempt    // to factor simple cases.    if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(TCMul)) @@ -4610,10 +4944,10 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {    if (!Pair.second)      return Pair.first->second; -  // ComputeBackedgeTakenCount may allocate memory for its result. Inserting it +  // computeBackedgeTakenCount may allocate memory for its result. Inserting it    // into the BackedgeTakenCounts map transfers ownership. Otherwise, the result    // must be cleared in this scope. -  BackedgeTakenInfo Result = ComputeBackedgeTakenCount(L); +  BackedgeTakenInfo Result = computeBackedgeTakenCount(L);    if (Result.getExact(this) != getCouldNotCompute()) {      assert(isLoopInvariant(Result.getExact(this), L) && @@ -4666,7 +5000,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {    }    // Re-lookup the insert position, since the call to -  // ComputeBackedgeTakenCount above could result in a +  // computeBackedgeTakenCount above could result in a    // recusive call to getBackedgeTakenInfo (on a different    // loop), which would invalidate the iterator computed    // earlier. @@ -4744,12 +5078,12 @@ void ScalarEvolution::forgetValue(Value *V) {  }  /// getExact - Get the exact loop backedge taken count considering all loop -/// exits. A computable result can only be return for loops with a single exit. -/// Returning the minimum taken count among all exits is incorrect because one -/// of the loop's exit limit's may have been skipped. HowFarToZero assumes that -/// the limit of each loop test is never skipped. This is a valid assumption as -/// long as the loop exits via that test. For precise results, it is the -/// caller's responsibility to specify the relevant loop exit using +/// exits. A computable result can only be returned for loops with a single +/// exit.  Returning the minimum taken count among all exits is incorrect +/// because one of the loop's exit limit's may have been skipped. HowFarToZero +/// assumes that the limit of each loop test is never skipped. This is a valid +/// assumption as long as the loop exits via that test. For precise results, it +/// is the caller's responsibility to specify the relevant loop exit using  /// getExact(ExitingBlock, SE).  const SCEV *  ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE) const { @@ -4847,10 +5181,10 @@ void ScalarEvolution::BackedgeTakenInfo::clear() {    delete[] ExitNotTaken.getNextExit();  } -/// ComputeBackedgeTakenCount - Compute the number of times the backedge +/// computeBackedgeTakenCount - Compute the number of times the backedge  /// of the specified loop will execute.  ScalarEvolution::BackedgeTakenInfo -ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) { +ScalarEvolution::computeBackedgeTakenCount(const Loop *L) {    SmallVector<BasicBlock *, 8> ExitingBlocks;    L->getExitingBlocks(ExitingBlocks); @@ -4864,7 +5198,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {    // and compute maxBECount.    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {      BasicBlock *ExitBB = ExitingBlocks[i]; -    ExitLimit EL = ComputeExitLimit(L, ExitBB); +    ExitLimit EL = computeExitLimit(L, ExitBB);      // 1. For each exit that can be computed, add an entry to ExitCounts.      // CouldComputeBECount is true only if all exits can be computed. @@ -4885,7 +5219,7 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {      // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is      // considered greater than any computable EL.Max.      if (EL.Max != getCouldNotCompute() && Latch && -        DT->dominates(ExitBB, Latch)) { +        DT.dominates(ExitBB, Latch)) {        if (!MustExitMaxBECount)          MustExitMaxBECount = EL.Max;        else { @@ -4906,13 +5240,11 @@ ScalarEvolution::ComputeBackedgeTakenCount(const Loop *L) {    return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);  } -/// ComputeExitLimit - Compute the number of times the backedge of the specified -/// loop will execute if it exits via the specified block.  ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { +ScalarEvolution::computeExitLimit(const Loop *L, BasicBlock *ExitingBlock) { -  // Okay, we've chosen an exiting block.  See what condition causes us to -  // exit at this block and remember the exit block and whether all other targets +  // Okay, we've chosen an exiting block.  See what condition causes us to exit +  // at this block and remember the exit block and whether all other targets    // lead to the loop header.    bool MustExecuteLoopHeader = true;    BasicBlock *Exit = nullptr; @@ -4952,8 +5284,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {        if (!Pred)          return getCouldNotCompute();        TerminatorInst *PredTerm = Pred->getTerminator(); -      for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i) { -        BasicBlock *PredSucc = PredTerm->getSuccessor(i); +      for (const BasicBlock *PredSucc : PredTerm->successors()) {          if (PredSucc == BB)            continue;          // If the predecessor has a successor that isn't BB and isn't @@ -4976,19 +5307,19 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {    if (BranchInst *BI = dyn_cast<BranchInst>(Term)) {      assert(BI->isConditional() && "If unconditional, it can't be in loop!");      // Proceed to the next level to examine the exit condition expression. -    return ComputeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0), +    return computeExitLimitFromCond(L, BI->getCondition(), BI->getSuccessor(0),                                      BI->getSuccessor(1),                                      /*ControlsExit=*/IsOnlyExit);    }    if (SwitchInst *SI = dyn_cast<SwitchInst>(Term)) -    return ComputeExitLimitFromSingleExitSwitch(L, SI, Exit, +    return computeExitLimitFromSingleExitSwitch(L, SI, Exit,                                                  /*ControlsExit=*/IsOnlyExit);    return getCouldNotCompute();  } -/// ComputeExitLimitFromCond - Compute the number of times the +/// computeExitLimitFromCond - Compute the number of times the  /// backedge of the specified loop will execute if its exit condition  /// were a conditional branch of ExitCond, TBB, and FBB.  /// @@ -4997,7 +5328,7 @@ ScalarEvolution::ComputeExitLimit(const Loop *L, BasicBlock *ExitingBlock) {  /// condition is true and can infer that failing to meet the condition prior to  /// integer wraparound results in undefined behavior.  ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimitFromCond(const Loop *L, +ScalarEvolution::computeExitLimitFromCond(const Loop *L,                                            Value *ExitCond,                                            BasicBlock *TBB,                                            BasicBlock *FBB, @@ -5007,9 +5338,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,      if (BO->getOpcode() == Instruction::And) {        // Recurse on the operands of the and.        bool EitherMayExit = L->contains(TBB); -      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, +      ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,                                                 ControlsExit && !EitherMayExit); -      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, +      ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,                                                 ControlsExit && !EitherMayExit);        const SCEV *BECount = getCouldNotCompute();        const SCEV *MaxBECount = getCouldNotCompute(); @@ -5042,9 +5373,9 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,      if (BO->getOpcode() == Instruction::Or) {        // Recurse on the operands of the or.        bool EitherMayExit = L->contains(FBB); -      ExitLimit EL0 = ComputeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB, +      ExitLimit EL0 = computeExitLimitFromCond(L, BO->getOperand(0), TBB, FBB,                                                 ControlsExit && !EitherMayExit); -      ExitLimit EL1 = ComputeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB, +      ExitLimit EL1 = computeExitLimitFromCond(L, BO->getOperand(1), TBB, FBB,                                                 ControlsExit && !EitherMayExit);        const SCEV *BECount = getCouldNotCompute();        const SCEV *MaxBECount = getCouldNotCompute(); @@ -5079,7 +5410,7 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,    // With an icmp, it may be feasible to compute an exact backedge-taken count.    // Proceed to the next level to examine the icmp.    if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond)) -    return ComputeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit); +    return computeExitLimitFromICmp(L, ExitCondICmp, TBB, FBB, ControlsExit);    // Check for a constant condition. These are normally stripped out by    // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to @@ -5091,18 +5422,15 @@ ScalarEvolution::ComputeExitLimitFromCond(const Loop *L,        return getCouldNotCompute();      else        // The backedge is never taken. -      return getConstant(CI->getType(), 0); +      return getZero(CI->getType());    }    // If it's not an integer or pointer comparison then compute it the hard way. -  return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); +  return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));  } -/// ComputeExitLimitFromICmp - Compute the number of times the -/// backedge of the specified loop will execute if its exit condition -/// were a conditional branch of the ICmpInst ExitCond, TBB, and FBB.  ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, +ScalarEvolution::computeExitLimitFromICmp(const Loop *L,                                            ICmpInst *ExitCond,                                            BasicBlock *TBB,                                            BasicBlock *FBB, @@ -5119,11 +5447,16 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,    if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))      if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {        ExitLimit ItCnt = -        ComputeLoadConstantCompareExitLimit(LI, RHS, L, Cond); +        computeLoadConstantCompareExitLimit(LI, RHS, L, Cond);        if (ItCnt.hasAnyInfo())          return ItCnt;      } +  ExitLimit ShiftEL = computeShiftCompareExitLimit( +      ExitCond->getOperand(0), ExitCond->getOperand(1), L, Cond); +  if (ShiftEL.hasAnyInfo()) +    return ShiftEL; +    const SCEV *LHS = getSCEV(ExitCond->getOperand(0));    const SCEV *RHS = getSCEV(ExitCond->getOperand(1)); @@ -5149,7 +5482,7 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,        if (AddRec->getLoop() == L) {          // Form the constant range.          ConstantRange CompRange( -            ICmpInst::makeConstantRange(Cond, RHSC->getValue()->getValue())); +            ICmpInst::makeConstantRange(Cond, RHSC->getAPInt()));          const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);          if (!isa<SCEVCouldNotCompute>(Ret)) return Ret; @@ -5183,21 +5516,13 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L,      break;    }    default: -#if 0 -    dbgs() << "ComputeBackedgeTakenCount "; -    if (ExitCond->getOperand(0)->getType()->isUnsigned()) -      dbgs() << "[unsigned] "; -    dbgs() << *LHS << "   " -         << Instruction::getOpcodeName(Instruction::ICmp) -         << "   " << *RHS << "\n"; -#endif      break;    } -  return ComputeExitCountExhaustively(L, ExitCond, !L->contains(TBB)); +  return computeExitCountExhaustively(L, ExitCond, !L->contains(TBB));  }  ScalarEvolution::ExitLimit -ScalarEvolution::ComputeExitLimitFromSingleExitSwitch(const Loop *L, +ScalarEvolution::computeExitLimitFromSingleExitSwitch(const Loop *L,                                                        SwitchInst *Switch,                                                        BasicBlock *ExitingBlock,                                                        bool ControlsExit) { @@ -5230,11 +5555,11 @@ EvaluateConstantChrecAtConstant(const SCEVAddRecExpr *AddRec, ConstantInt *C,    return cast<SCEVConstant>(Val)->getValue();  } -/// ComputeLoadConstantCompareExitLimit - Given an exit condition of +/// computeLoadConstantCompareExitLimit - Given an exit condition of  /// 'icmp op load X, cst', try to see if we can compute the backedge  /// execution count.  ScalarEvolution::ExitLimit -ScalarEvolution::ComputeLoadConstantCompareExitLimit( +ScalarEvolution::computeLoadConstantCompareExitLimit(    LoadInst *LI,    Constant *RHS,    const Loop *L, @@ -5303,11 +5628,6 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(      Result = ConstantExpr::getICmp(predicate, Result, RHS);      if (!isa<ConstantInt>(Result)) break;  // Couldn't decide for sure      if (cast<ConstantInt>(Result)->getValue().isMinValue()) { -#if 0 -      dbgs() << "\n***\n*** Computed loop count " << *ItCst -             << "\n*** From global " << *GV << "*** BB: " << *L->getHeader() -             << "***\n"; -#endif        ++NumArrayLenItCounts;        return getConstant(ItCst);   // Found terminating iteration!      } @@ -5315,6 +5635,149 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit(    return getCouldNotCompute();  } +ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit( +    Value *LHS, Value *RHSV, const Loop *L, ICmpInst::Predicate Pred) { +  ConstantInt *RHS = dyn_cast<ConstantInt>(RHSV); +  if (!RHS) +    return getCouldNotCompute(); + +  const BasicBlock *Latch = L->getLoopLatch(); +  if (!Latch) +    return getCouldNotCompute(); + +  const BasicBlock *Predecessor = L->getLoopPredecessor(); +  if (!Predecessor) +    return getCouldNotCompute(); + +  // Return true if V is of the form "LHS `shift_op` <positive constant>". +  // Return LHS in OutLHS and shift_opt in OutOpCode. +  auto MatchPositiveShift = +      [](Value *V, Value *&OutLHS, Instruction::BinaryOps &OutOpCode) { + +    using namespace PatternMatch; + +    ConstantInt *ShiftAmt; +    if (match(V, m_LShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) +      OutOpCode = Instruction::LShr; +    else if (match(V, m_AShr(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) +      OutOpCode = Instruction::AShr; +    else if (match(V, m_Shl(m_Value(OutLHS), m_ConstantInt(ShiftAmt)))) +      OutOpCode = Instruction::Shl; +    else +      return false; + +    return ShiftAmt->getValue().isStrictlyPositive(); +  }; + +  // Recognize a "shift recurrence" either of the form %iv or of %iv.shifted in +  // +  // loop: +  //   %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ] +  //   %iv.shifted = lshr i32 %iv, <positive constant> +  // +  // Return true on a succesful match.  Return the corresponding PHI node (%iv +  // above) in PNOut and the opcode of the shift operation in OpCodeOut. +  auto MatchShiftRecurrence = +      [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) { +    Optional<Instruction::BinaryOps> PostShiftOpCode; + +    { +      Instruction::BinaryOps OpC; +      Value *V; + +      // If we encounter a shift instruction, "peel off" the shift operation, +      // and remember that we did so.  Later when we inspect %iv's backedge +      // value, we will make sure that the backedge value uses the same +      // operation. +      // +      // Note: the peeled shift operation does not have to be the same +      // instruction as the one feeding into the PHI's backedge value.  We only +      // really care about it being the same *kind* of shift instruction -- +      // that's all that is required for our later inferences to hold. +      if (MatchPositiveShift(LHS, V, OpC)) { +        PostShiftOpCode = OpC; +        LHS = V; +      } +    } + +    PNOut = dyn_cast<PHINode>(LHS); +    if (!PNOut || PNOut->getParent() != L->getHeader()) +      return false; + +    Value *BEValue = PNOut->getIncomingValueForBlock(Latch); +    Value *OpLHS; + +    return +        // The backedge value for the PHI node must be a shift by a positive +        // amount +        MatchPositiveShift(BEValue, OpLHS, OpCodeOut) && + +        // of the PHI node itself +        OpLHS == PNOut && + +        // and the kind of shift should be match the kind of shift we peeled +        // off, if any. +        (!PostShiftOpCode.hasValue() || *PostShiftOpCode == OpCodeOut); +  }; + +  PHINode *PN; +  Instruction::BinaryOps OpCode; +  if (!MatchShiftRecurrence(LHS, PN, OpCode)) +    return getCouldNotCompute(); + +  const DataLayout &DL = getDataLayout(); + +  // The key rationale for this optimization is that for some kinds of shift +  // recurrences, the value of the recurrence "stabilizes" to either 0 or -1 +  // within a finite number of iterations.  If the condition guarding the +  // backedge (in the sense that the backedge is taken if the condition is true) +  // is false for the value the shift recurrence stabilizes to, then we know +  // that the backedge is taken only a finite number of times. + +  ConstantInt *StableValue = nullptr; +  switch (OpCode) { +  default: +    llvm_unreachable("Impossible case!"); + +  case Instruction::AShr: { +    // {K,ashr,<positive-constant>} stabilizes to signum(K) in at most +    // bitwidth(K) iterations. +    Value *FirstValue = PN->getIncomingValueForBlock(Predecessor); +    bool KnownZero, KnownOne; +    ComputeSignBit(FirstValue, KnownZero, KnownOne, DL, 0, nullptr, +                   Predecessor->getTerminator(), &DT); +    auto *Ty = cast<IntegerType>(RHS->getType()); +    if (KnownZero) +      StableValue = ConstantInt::get(Ty, 0); +    else if (KnownOne) +      StableValue = ConstantInt::get(Ty, -1, true); +    else +      return getCouldNotCompute(); + +    break; +  } +  case Instruction::LShr: +  case Instruction::Shl: +    // Both {K,lshr,<positive-constant>} and {K,shl,<positive-constant>} +    // stabilize to 0 in at most bitwidth(K) iterations. +    StableValue = ConstantInt::get(cast<IntegerType>(RHS->getType()), 0); +    break; +  } + +  auto *Result = +      ConstantFoldCompareInstOperands(Pred, StableValue, RHS, DL, &TLI); +  assert(Result->getType()->isIntegerTy(1) && +         "Otherwise cannot be an operand to a branch instruction"); + +  if (Result->isZeroValue()) { +    unsigned BitWidth = getTypeSizeInBits(RHS->getType()); +    const SCEV *UpperBound = +        getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth); +    return ExitLimit(getCouldNotCompute(), UpperBound); +  } + +  return getCouldNotCompute(); +}  /// CanConstantFold - Return true if we can constant fold an instruction of the  /// specified type, assuming that all operands were constants. @@ -5356,12 +5819,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L,    // Otherwise, we can evaluate this instruction if all of its operands are    // constant or derived from a PHI node themselves.    PHINode *PHI = nullptr; -  for (Instruction::op_iterator OpI = UseInst->op_begin(), -         OpE = UseInst->op_end(); OpI != OpE; ++OpI) { - -    if (isa<Constant>(*OpI)) continue; +  for (Value *Op : UseInst->operands()) { +    if (isa<Constant>(Op)) continue; -    Instruction *OpInst = dyn_cast<Instruction>(*OpI); +    Instruction *OpInst = dyn_cast<Instruction>(Op);      if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr;      PHINode *P = dyn_cast<PHINode>(OpInst); @@ -5395,9 +5856,8 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {    Instruction *I = dyn_cast<Instruction>(V);    if (!I || !canConstantEvolve(I, L)) return nullptr; -  if (PHINode *PN = dyn_cast<PHINode>(I)) { +  if (PHINode *PN = dyn_cast<PHINode>(I))      return PN; -  }    // Record non-constant instructions contained by the loop.    DenseMap<Instruction *, PHINode *> PHIMap; @@ -5454,6 +5914,30 @@ static Constant *EvaluateExpression(Value *V, const Loop *L,                                    TLI);  } + +// If every incoming value to PN except the one for BB is a specific Constant, +// return that, else return nullptr. +static Constant *getOtherIncomingValue(PHINode *PN, BasicBlock *BB) { +  Constant *IncomingVal = nullptr; + +  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { +    if (PN->getIncomingBlock(i) == BB) +      continue; + +    auto *CurrentVal = dyn_cast<Constant>(PN->getIncomingValue(i)); +    if (!CurrentVal) +      return nullptr; + +    if (IncomingVal != CurrentVal) { +      if (IncomingVal) +        return nullptr; +      IncomingVal = CurrentVal; +    } +  } + +  return IncomingVal; +} +  /// getConstantEvolutionLoopExitValue - If we know that the specified Phi is  /// in the header of its containing loop, we know the loop executes a  /// constant number of times, and the PHI node is just a recurrence @@ -5462,8 +5946,7 @@ Constant *  ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,                                                     const APInt &BEs,                                                     const Loop *L) { -  DenseMap<PHINode*, Constant*>::const_iterator I = -    ConstantEvolutionLoopExitValue.find(PN); +  auto I = ConstantEvolutionLoopExitValue.find(PN);    if (I != ConstantEvolutionLoopExitValue.end())      return I->second; @@ -5476,22 +5959,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,    BasicBlock *Header = L->getHeader();    assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); -  // Since the loop is canonicalized, the PHI node must have two entries.  One -  // entry must be a constant (coming in from outside of the loop), and the -  // second must be derived from the same PHI. -  bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); -  PHINode *PHI = nullptr; -  for (BasicBlock::iterator I = Header->begin(); -       (PHI = dyn_cast<PHINode>(I)); ++I) { -    Constant *StartCST = -      dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); +  BasicBlock *Latch = L->getLoopLatch(); +  if (!Latch) +    return nullptr; + +  for (auto &I : *Header) { +    PHINode *PHI = dyn_cast<PHINode>(&I); +    if (!PHI) break; +    auto *StartCST = getOtherIncomingValue(PHI, Latch);      if (!StartCST) continue;      CurrentIterVals[PHI] = StartCST;    }    if (!CurrentIterVals.count(PN))      return RetVal = nullptr; -  Value *BEValue = PN->getIncomingValue(SecondIsBackedge); +  Value *BEValue = PN->getIncomingValueForBlock(Latch);    // Execute the loop symbolically to determine the exit value.    if (BEs.getActiveBits() >= 32) @@ -5499,7 +5981,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,    unsigned NumIterations = BEs.getZExtValue(); // must be in range    unsigned IterationNum = 0; -  const DataLayout &DL = F->getParent()->getDataLayout(); +  const DataLayout &DL = getDataLayout();    for (; ; ++IterationNum) {      if (IterationNum == NumIterations)        return RetVal = CurrentIterVals[PN];  // Got exit value! @@ -5508,7 +5990,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,      // EvaluateExpression adds non-phi values to the CurrentIterVals map.      DenseMap<Instruction *, Constant *> NextIterVals;      Constant *NextPHI = -        EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); +        EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);      if (!NextPHI)        return nullptr;        // Couldn't evaluate!      NextIterVals[PN] = NextPHI; @@ -5519,23 +6001,21 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,      // cease to be able to evaluate one of them or if they stop evolving,      // because that doesn't necessarily prevent us from computing PN.      SmallVector<std::pair<PHINode *, Constant *>, 8> PHIsToCompute; -    for (DenseMap<Instruction *, Constant *>::const_iterator -           I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ -      PHINode *PHI = dyn_cast<PHINode>(I->first); +    for (const auto &I : CurrentIterVals) { +      PHINode *PHI = dyn_cast<PHINode>(I.first);        if (!PHI || PHI == PN || PHI->getParent() != Header) continue; -      PHIsToCompute.push_back(std::make_pair(PHI, I->second)); +      PHIsToCompute.emplace_back(PHI, I.second);      }      // We use two distinct loops because EvaluateExpression may invalidate any      // iterators into CurrentIterVals. -    for (SmallVectorImpl<std::pair<PHINode *, Constant*> >::const_iterator -             I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) { -      PHINode *PHI = I->first; +    for (const auto &I : PHIsToCompute) { +      PHINode *PHI = I.first;        Constant *&NextPHI = NextIterVals[PHI];        if (!NextPHI) {   // Not already computed. -        Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); -        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); +        Value *BEValue = PHI->getIncomingValueForBlock(Latch); +        NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);        } -      if (NextPHI != I->second) +      if (NextPHI != I.second)          StoppedEvolving = false;      } @@ -5548,12 +6028,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,    }  } -/// ComputeExitCountExhaustively - If the loop is known to execute a -/// constant number of times (the condition evolves only from constants), -/// try to evaluate a few iterations of the loop until we get the exit -/// condition gets a value of ExitWhen (true or false).  If we cannot -/// evaluate the trip count of the loop, return getCouldNotCompute(). -const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, +const SCEV *ScalarEvolution::computeExitCountExhaustively(const Loop *L,                                                            Value *Cond,                                                            bool ExitWhen) {    PHINode *PN = getConstantEvolvingPHI(Cond, L); @@ -5567,14 +6042,14 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,    BasicBlock *Header = L->getHeader();    assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); -  // One entry must be a constant (coming in from outside of the loop), and the -  // second must be derived from the same PHI. -  bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); -  PHINode *PHI = nullptr; -  for (BasicBlock::iterator I = Header->begin(); -       (PHI = dyn_cast<PHINode>(I)); ++I) { -    Constant *StartCST = -      dyn_cast<Constant>(PHI->getIncomingValue(!SecondIsBackedge)); +  BasicBlock *Latch = L->getLoopLatch(); +  assert(Latch && "Should follow from NumIncomingValues == 2!"); + +  for (auto &I : *Header) { +    PHINode *PHI = dyn_cast<PHINode>(&I); +    if (!PHI) +      break; +    auto *StartCST = getOtherIncomingValue(PHI, Latch);      if (!StartCST) continue;      CurrentIterVals[PHI] = StartCST;    } @@ -5585,10 +6060,10 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,    // the loop symbolically to determine when the condition gets a value of    // "ExitWhen".    unsigned MaxIterations = MaxBruteForceIterations;   // Limit analysis. -  const DataLayout &DL = F->getParent()->getDataLayout(); +  const DataLayout &DL = getDataLayout();    for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ -    ConstantInt *CondVal = dyn_cast_or_null<ConstantInt>( -        EvaluateExpression(Cond, L, CurrentIterVals, DL, TLI)); +    auto *CondVal = dyn_cast_or_null<ConstantInt>( +        EvaluateExpression(Cond, L, CurrentIterVals, DL, &TLI));      // Couldn't symbolically evaluate.      if (!CondVal) return getCouldNotCompute(); @@ -5605,20 +6080,17 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,      // calling EvaluateExpression on them because that may invalidate iterators      // into CurrentIterVals.      SmallVector<PHINode *, 8> PHIsToCompute; -    for (DenseMap<Instruction *, Constant *>::const_iterator -           I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ -      PHINode *PHI = dyn_cast<PHINode>(I->first); +    for (const auto &I : CurrentIterVals) { +      PHINode *PHI = dyn_cast<PHINode>(I.first);        if (!PHI || PHI->getParent() != Header) continue;        PHIsToCompute.push_back(PHI);      } -    for (SmallVectorImpl<PHINode *>::const_iterator I = PHIsToCompute.begin(), -             E = PHIsToCompute.end(); I != E; ++I) { -      PHINode *PHI = *I; +    for (PHINode *PHI : PHIsToCompute) {        Constant *&NextPHI = NextIterVals[PHI];        if (NextPHI) continue;    // Already computed! -      Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); -      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, TLI); +      Value *BEValue = PHI->getIncomingValueForBlock(Latch); +      NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, DL, &TLI);      }      CurrentIterVals.swap(NextIterVals);    } @@ -5638,22 +6110,22 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L,  /// In the case that a relevant loop exit value cannot be computed, the  /// original value V is returned.  const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { +  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = +      ValuesAtScopes[V];    // Check to see if we've folded this expression at this loop before. -  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values = ValuesAtScopes[V]; -  for (unsigned u = 0; u < Values.size(); u++) { -    if (Values[u].first == L) -      return Values[u].second ? Values[u].second : V; -  } -  Values.push_back(std::make_pair(L, static_cast<const SCEV *>(nullptr))); +  for (auto &LS : Values) +    if (LS.first == L) +      return LS.second ? LS.second : V; + +  Values.emplace_back(L, nullptr); +    // Otherwise compute it.    const SCEV *C = computeSCEVAtScope(V, L); -  SmallVector<std::pair<const Loop *, const SCEV *>, 2> &Values2 = ValuesAtScopes[V]; -  for (unsigned u = Values2.size(); u > 0; u--) { -    if (Values2[u - 1].first == L) { -      Values2[u - 1].second = C; +  for (auto &LS : reverse(ValuesAtScopes[V])) +    if (LS.first == L) { +      LS.second = C;        break;      } -  }    return C;  } @@ -5763,7 +6235,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {    // exit value from the loop without using SCEVs.    if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(V)) {      if (Instruction *I = dyn_cast<Instruction>(SU->getValue())) { -      const Loop *LI = (*this->LI)[I->getParent()]; +      const Loop *LI = this->LI[I->getParent()];        if (LI && LI->getParentLoop() == L)  // Looking for loop exit value.          if (PHINode *PN = dyn_cast<PHINode>(I))            if (PN->getParent() == LI->getHeader()) { @@ -5777,9 +6249,8 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {                // Okay, we know how many times the containing loop executes.  If                // this is a constant evolving PHI node, get the final value at                // the specified iteration number. -              Constant *RV = getConstantEvolutionLoopExitValue(PN, -                                                   BTCC->getValue()->getValue(), -                                                               LI); +              Constant *RV = +                  getConstantEvolutionLoopExitValue(PN, BTCC->getAPInt(), LI);                if (RV) return getSCEV(RV);              }            } @@ -5791,8 +6262,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {        if (CanConstantFold(I)) {          SmallVector<Constant *, 4> Operands;          bool MadeImprovement = false; -        for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { -          Value *Op = I->getOperand(i); +        for (Value *Op : I->operands()) {            if (Constant *C = dyn_cast<Constant>(Op)) {              Operands.push_back(C);              continue; @@ -5821,16 +6291,16 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {          // Check to see if getSCEVAtScope actually made an improvement.          if (MadeImprovement) {            Constant *C = nullptr; -          const DataLayout &DL = F->getParent()->getDataLayout(); +          const DataLayout &DL = getDataLayout();            if (const CmpInst *CI = dyn_cast<CmpInst>(I))              C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], -                                                Operands[1], DL, TLI); +                                                Operands[1], DL, &TLI);            else if (const LoadInst *LI = dyn_cast<LoadInst>(I)) {              if (!LI->isVolatile())                C = ConstantFoldLoadFromConstPtr(Operands[0], DL);            } else              C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, -                                         DL, TLI); +                                         DL, &TLI);            if (!C) return V;            return getSCEV(C);          } @@ -6021,10 +6491,10 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {      return std::make_pair(CNC, CNC);    } -  uint32_t BitWidth = LC->getValue()->getValue().getBitWidth(); -  const APInt &L = LC->getValue()->getValue(); -  const APInt &M = MC->getValue()->getValue(); -  const APInt &N = NC->getValue()->getValue(); +  uint32_t BitWidth = LC->getAPInt().getBitWidth(); +  const APInt &L = LC->getAPInt(); +  const APInt &M = MC->getAPInt(); +  const APInt &N = NC->getAPInt();    APInt Two(BitWidth, 2);    APInt Four(BitWidth, 4); @@ -6103,10 +6573,6 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {      const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);      const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);      if (R1 && R2) { -#if 0 -      dbgs() << "HFTZ: " << *V << " - sol#1: " << *R1 -             << "  sol#2: " << *R2 << "\n"; -#endif        // Pick the smallest positive root value.        if (ConstantInt *CB =            dyn_cast<ConstantInt>(ConstantExpr::getICmp(CmpInst::ICMP_ULT, @@ -6160,7 +6626,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {    // For negative steps (counting down to zero):    //   N = Start/-Step    // First compute the unsigned distance from zero in the direction of Step. -  bool CountDown = StepC->getValue()->getValue().isNegative(); +  bool CountDown = StepC->getAPInt().isNegative();    const SCEV *Distance = CountDown ? Start : getNegativeSCEV(Start);    // Handle unitary steps, which cannot wraparound. @@ -6185,13 +6651,53 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {    // done by counting and comparing the number of trailing zeros of Step and    // Distance.    if (!CountDown) { -    const APInt &StepV = StepC->getValue()->getValue(); +    const APInt &StepV = StepC->getAPInt();      // StepV.isPowerOf2() returns true if StepV is an positive power of two.  It      // also returns true if StepV is maximally negative (eg, INT_MIN), but that      // case is not handled as this code is guarded by !CountDown.      if (StepV.isPowerOf2() && -        GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) -      return getUDivExactExpr(Distance, Step); +        GetMinTrailingZeros(Distance) >= StepV.countTrailingZeros()) { +      // Here we've constrained the equation to be of the form +      // +      //   2^(N + k) * Distance' = (StepV == 2^N) * X (mod 2^W)  ... (0) +      // +      // where we're operating on a W bit wide integer domain and k is +      // non-negative.  The smallest unsigned solution for X is the trip count. +      // +      // (0) is equivalent to: +      // +      //      2^(N + k) * Distance' - 2^N * X = L * 2^W +      // <=>  2^N(2^k * Distance' - X) = L * 2^(W - N) * 2^N +      // <=>  2^k * Distance' - X = L * 2^(W - N) +      // <=>  2^k * Distance'     = L * 2^(W - N) + X    ... (1) +      // +      // The smallest X satisfying (1) is unsigned remainder of dividing the LHS +      // by 2^(W - N). +      // +      // <=>  X = 2^k * Distance' URem 2^(W - N)   ... (2) +      // +      // E.g. say we're solving +      // +      //   2 * Val = 2 * X  (in i8)   ... (3) +      // +      // then from (2), we get X = Val URem i8 128 (k = 0 in this case). +      // +      // Note: It is tempting to solve (3) by setting X = Val, but Val is not +      // necessarily the smallest unsigned value of X that satisfies (3). +      // E.g. if Val is i8 -127 then the smallest value of X that satisfies (3) +      // is i8 1, not i8 -127 + +      const auto *ModuloResult = getUDivExactExpr(Distance, Step); + +      // Since SCEV does not have a URem node, we construct one using a truncate +      // and a zero extend. + +      unsigned NarrowWidth = StepV.getBitWidth() - StepV.countTrailingZeros(); +      auto *NarrowTy = IntegerType::get(getContext(), NarrowWidth); +      auto *WideTy = Distance->getType(); + +      return getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy); +    }    }    // If the condition controls loop exit (the loop exits only if the expression @@ -6207,8 +6713,7 @@ ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L, bool ControlsExit) {    // Then, try to solve the above equation provided that Start is constant.    if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) -    return SolveLinEquationWithOverflow(StepC->getValue()->getValue(), -                                        -StartC->getValue()->getValue(), +    return SolveLinEquationWithOverflow(StepC->getAPInt(), -StartC->getAPInt(),                                          *this);    return getCouldNotCompute();  } @@ -6226,7 +6731,7 @@ ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {    // already.  If so, the backedge will execute zero times.    if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {      if (!C->getValue()->isNullValue()) -      return getConstant(C->getType(), 0); +      return getZero(C->getType());      return getCouldNotCompute();  // Otherwise it will loop infinitely.    } @@ -6251,7 +6756,7 @@ ScalarEvolution::getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB) {    // A loop's header is defined to be a block that dominates the loop.    // If the header has a unique predecessor outside the loop, it must be    // a block that has exactly one successor that can reach the loop. -  if (Loop *L = LI->getLoopFor(BB)) +  if (Loop *L = LI.getLoopFor(BB))      return std::make_pair(L->getLoopPredecessor(), L->getHeader());    return std::pair<BasicBlock *, BasicBlock *>(); @@ -6267,13 +6772,20 @@ static bool HasSameValue(const SCEV *A, const SCEV *B) {    // Quick check to see if they are the same SCEV.    if (A == B) return true; +  auto ComputesEqualValues = [](const Instruction *A, const Instruction *B) { +    // Not all instructions that are "identical" compute the same value.  For +    // instance, two distinct alloca instructions allocating the same type are +    // identical and do not read memory; but compute distinct values. +    return A->isIdenticalTo(B) && (isa<BinaryOperator>(A) || isa<GetElementPtrInst>(A)); +  }; +    // Otherwise, if they're both SCEVUnknown, it's possible that they hold    // two different instructions with the same value. Check for this case.    if (const SCEVUnknown *AU = dyn_cast<SCEVUnknown>(A))      if (const SCEVUnknown *BU = dyn_cast<SCEVUnknown>(B))        if (const Instruction *AI = dyn_cast<Instruction>(AU->getValue()))          if (const Instruction *BI = dyn_cast<Instruction>(BU->getValue())) -          if (AI->isIdenticalTo(BI) && !AI->mayReadFromMemory()) +          if (ComputesEqualValues(AI, BI))              return true;    // Otherwise assume they may have a different value. @@ -6324,7 +6836,7 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,    // If there's a constant operand, canonicalize comparisons with boundary    // cases, and canonicalize *-or-equal comparisons to regular comparisons.    if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) { -    const APInt &RA = RC->getValue()->getValue(); +    const APInt &RA = RC->getAPInt();      switch (Pred) {      default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");      case ICmpInst::ICMP_EQ: @@ -6515,16 +7027,14 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,        Pred = ICmpInst::ICMP_ULT;        Changed = true;      } else if (!getUnsignedRange(LHS).getUnsignedMin().isMinValue()) { -      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS, -                       SCEV::FlagNUW); +      LHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), LHS);        Pred = ICmpInst::ICMP_ULT;        Changed = true;      }      break;    case ICmpInst::ICMP_UGE:      if (!getUnsignedRange(RHS).getUnsignedMin().isMinValue()) { -      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS, -                       SCEV::FlagNUW); +      RHS = getAddExpr(getConstant(RHS->getType(), (uint64_t)-1, true), RHS);        Pred = ICmpInst::ICMP_UGT;        Changed = true;      } else if (!getUnsignedRange(LHS).getUnsignedMax().isMaxValue()) { @@ -6612,10 +7122,140 @@ bool ScalarEvolution::isKnownPredicate(ICmpInst::Predicate Pred,    if (LeftGuarded && RightGuarded)      return true; +  if (isKnownPredicateViaSplitting(Pred, LHS, RHS)) +    return true; +    // Otherwise see what can be done with known constant ranges.    return isKnownPredicateWithRanges(Pred, LHS, RHS);  } +bool ScalarEvolution::isMonotonicPredicate(const SCEVAddRecExpr *LHS, +                                           ICmpInst::Predicate Pred, +                                           bool &Increasing) { +  bool Result = isMonotonicPredicateImpl(LHS, Pred, Increasing); + +#ifndef NDEBUG +  // Verify an invariant: inverting the predicate should turn a monotonically +  // increasing change to a monotonically decreasing one, and vice versa. +  bool IncreasingSwapped; +  bool ResultSwapped = isMonotonicPredicateImpl( +      LHS, ICmpInst::getSwappedPredicate(Pred), IncreasingSwapped); + +  assert(Result == ResultSwapped && "should be able to analyze both!"); +  if (ResultSwapped) +    assert(Increasing == !IncreasingSwapped && +           "monotonicity should flip as we flip the predicate"); +#endif + +  return Result; +} + +bool ScalarEvolution::isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS, +                                               ICmpInst::Predicate Pred, +                                               bool &Increasing) { + +  // A zero step value for LHS means the induction variable is essentially a +  // loop invariant value. We don't really depend on the predicate actually +  // flipping from false to true (for increasing predicates, and the other way +  // around for decreasing predicates), all we care about is that *if* the +  // predicate changes then it only changes from false to true. +  // +  // A zero step value in itself is not very useful, but there may be places +  // where SCEV can prove X >= 0 but not prove X > 0, so it is helpful to be +  // as general as possible. + +  switch (Pred) { +  default: +    return false; // Conservative answer + +  case ICmpInst::ICMP_UGT: +  case ICmpInst::ICMP_UGE: +  case ICmpInst::ICMP_ULT: +  case ICmpInst::ICMP_ULE: +    if (!LHS->getNoWrapFlags(SCEV::FlagNUW)) +      return false; + +    Increasing = Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_UGE; +    return true; + +  case ICmpInst::ICMP_SGT: +  case ICmpInst::ICMP_SGE: +  case ICmpInst::ICMP_SLT: +  case ICmpInst::ICMP_SLE: { +    if (!LHS->getNoWrapFlags(SCEV::FlagNSW)) +      return false; + +    const SCEV *Step = LHS->getStepRecurrence(*this); + +    if (isKnownNonNegative(Step)) { +      Increasing = Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE; +      return true; +    } + +    if (isKnownNonPositive(Step)) { +      Increasing = Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE; +      return true; +    } + +    return false; +  } + +  } + +  llvm_unreachable("switch has default clause!"); +} + +bool ScalarEvolution::isLoopInvariantPredicate( +    ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, const Loop *L, +    ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS, +    const SCEV *&InvariantRHS) { + +  // If there is a loop-invariant, force it into the RHS, otherwise bail out. +  if (!isLoopInvariant(RHS, L)) { +    if (!isLoopInvariant(LHS, L)) +      return false; + +    std::swap(LHS, RHS); +    Pred = ICmpInst::getSwappedPredicate(Pred); +  } + +  const SCEVAddRecExpr *ArLHS = dyn_cast<SCEVAddRecExpr>(LHS); +  if (!ArLHS || ArLHS->getLoop() != L) +    return false; + +  bool Increasing; +  if (!isMonotonicPredicate(ArLHS, Pred, Increasing)) +    return false; + +  // If the predicate "ArLHS `Pred` RHS" monotonically increases from false to +  // true as the loop iterates, and the backedge is control dependent on +  // "ArLHS `Pred` RHS" == true then we can reason as follows: +  // +  //   * if the predicate was false in the first iteration then the predicate +  //     is never evaluated again, since the loop exits without taking the +  //     backedge. +  //   * if the predicate was true in the first iteration then it will +  //     continue to be true for all future iterations since it is +  //     monotonically increasing. +  // +  // For both the above possibilities, we can replace the loop varying +  // predicate with its value on the first iteration of the loop (which is +  // loop invariant). +  // +  // A similar reasoning applies for a monotonically decreasing predicate, by +  // replacing true with false and false with true in the above two bullets. + +  auto P = Increasing ? Pred : ICmpInst::getInversePredicate(Pred); + +  if (!isLoopBackedgeGuardedByCond(L, P, LHS, RHS)) +    return false; + +  InvariantPred = Pred; +  InvariantLHS = ArLHS->getStart(); +  InvariantRHS = RHS; +  return true; +} +  bool  ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,                                              const SCEV *LHS, const SCEV *RHS) { @@ -6690,6 +7330,84 @@ ScalarEvolution::isKnownPredicateWithRanges(ICmpInst::Predicate Pred,    return false;  } +bool ScalarEvolution::isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, +                                                    const SCEV *LHS, +                                                    const SCEV *RHS) { + +  // Match Result to (X + Y)<ExpectedFlags> where Y is a constant integer. +  // Return Y via OutY. +  auto MatchBinaryAddToConst = +      [this](const SCEV *Result, const SCEV *X, APInt &OutY, +             SCEV::NoWrapFlags ExpectedFlags) { +    const SCEV *NonConstOp, *ConstOp; +    SCEV::NoWrapFlags FlagsPresent; + +    if (!splitBinaryAdd(Result, ConstOp, NonConstOp, FlagsPresent) || +        !isa<SCEVConstant>(ConstOp) || NonConstOp != X) +      return false; + +    OutY = cast<SCEVConstant>(ConstOp)->getAPInt(); +    return (FlagsPresent & ExpectedFlags) == ExpectedFlags; +  }; + +  APInt C; + +  switch (Pred) { +  default: +    break; + +  case ICmpInst::ICMP_SGE: +    std::swap(LHS, RHS); +  case ICmpInst::ICMP_SLE: +    // X s<= (X + C)<nsw> if C >= 0 +    if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && C.isNonNegative()) +      return true; + +    // (X + C)<nsw> s<= X if C <= 0 +    if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && +        !C.isStrictlyPositive()) +      return true; +    break; + +  case ICmpInst::ICMP_SGT: +    std::swap(LHS, RHS); +  case ICmpInst::ICMP_SLT: +    // X s< (X + C)<nsw> if C > 0 +    if (MatchBinaryAddToConst(RHS, LHS, C, SCEV::FlagNSW) && +        C.isStrictlyPositive()) +      return true; + +    // (X + C)<nsw> s< X if C < 0 +    if (MatchBinaryAddToConst(LHS, RHS, C, SCEV::FlagNSW) && C.isNegative()) +      return true; +    break; +  } + +  return false; +} + +bool ScalarEvolution::isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, +                                                   const SCEV *LHS, +                                                   const SCEV *RHS) { +  if (Pred != ICmpInst::ICMP_ULT || ProvingSplitPredicate) +    return false; + +  // Allowing arbitrary number of activations of isKnownPredicateViaSplitting on +  // the stack can result in exponential time complexity. +  SaveAndRestore<bool> Restore(ProvingSplitPredicate, true); + +  // If L >= 0 then I `ult` L <=> I >= 0 && I `slt` L +  // +  // To prove L >= 0 we use isKnownNonNegative whereas to prove I >= 0 we use +  // isKnownPredicate.  isKnownPredicate is more powerful, but also more +  // expensive; and using isKnownNonNegative(RHS) is sufficient for most of the +  // interesting cases seen in practice.  We can consider "upgrading" L >= 0 to +  // use isKnownPredicate later if needed. +  return isKnownNonNegative(RHS) && +         isKnownPredicate(CmpInst::ICMP_SGE, LHS, getZero(LHS->getType())) && +         isKnownPredicate(CmpInst::ICMP_SLT, LHS, RHS); +} +  /// isLoopBackedgeGuardedByCond - Test whether the backedge of the loop is  /// protected by a conditional between LHS and RHS.  This is used to  /// to eliminate casts. @@ -6715,46 +7433,49 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,                      LoopContinuePredicate->getSuccessor(0) != L->getHeader()))      return true; +  // We don't want more than one activation of the following loops on the stack +  // -- that can lead to O(n!) time complexity. +  if (WalkingBEDominatingConds) +    return false; + +  SaveAndRestore<bool> ClearOnExit(WalkingBEDominatingConds, true); + +  // See if we can exploit a trip count to prove the predicate. +  const auto &BETakenInfo = getBackedgeTakenInfo(L); +  const SCEV *LatchBECount = BETakenInfo.getExact(Latch, this); +  if (LatchBECount != getCouldNotCompute()) { +    // We know that Latch branches back to the loop header exactly +    // LatchBECount times.  This means the backdege condition at Latch is +    // equivalent to  "{0,+,1} u< LatchBECount". +    Type *Ty = LatchBECount->getType(); +    auto NoWrapFlags = SCEV::NoWrapFlags(SCEV::FlagNUW | SCEV::FlagNW); +    const SCEV *LoopCounter = +      getAddRecExpr(getZero(Ty), getOne(Ty), L, NoWrapFlags); +    if (isImpliedCond(Pred, LHS, RHS, ICmpInst::ICMP_ULT, LoopCounter, +                      LatchBECount)) +      return true; +  } +    // Check conditions due to any @llvm.assume intrinsics. -  for (auto &AssumeVH : AC->assumptions()) { +  for (auto &AssumeVH : AC.assumptions()) {      if (!AssumeVH)        continue;      auto *CI = cast<CallInst>(AssumeVH); -    if (!DT->dominates(CI, Latch->getTerminator())) +    if (!DT.dominates(CI, Latch->getTerminator()))        continue;      if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false))        return true;    } -  struct ClearWalkingBEDominatingCondsOnExit { -    ScalarEvolution &SE; - -    explicit ClearWalkingBEDominatingCondsOnExit(ScalarEvolution &SE) -        : SE(SE){}; - -    ~ClearWalkingBEDominatingCondsOnExit() { -      SE.WalkingBEDominatingConds = false; -    } -  }; - -  // We don't want more than one activation of the following loop on the stack -  // -- that can lead to O(n!) time complexity. -  if (WalkingBEDominatingConds) -    return false; - -  WalkingBEDominatingConds = true; -  ClearWalkingBEDominatingCondsOnExit ClearOnExit(*this); -    // If the loop is not reachable from the entry block, we risk running into an    // infinite loop as we walk up into the dom tree.  These loops do not matter    // anyway, so we just return a conservative answer when we see them. -  if (!DT->isReachableFromEntry(L->getHeader())) +  if (!DT.isReachableFromEntry(L->getHeader()))      return false; -  for (DomTreeNode *DTN = (*DT)[Latch], *HeaderDTN = (*DT)[L->getHeader()]; -       DTN != HeaderDTN; -       DTN = DTN->getIDom()) { +  for (DomTreeNode *DTN = DT[Latch], *HeaderDTN = DT[L->getHeader()]; +       DTN != HeaderDTN; DTN = DTN->getIDom()) {      assert(DTN && "should reach the loop header before reaching the root!"); @@ -6778,7 +7499,7 @@ ScalarEvolution::isLoopBackedgeGuardedByCond(const Loop *L,        // We're constructively (and conservatively) enumerating edges within the        // loop body that dominate the latch.  The dominator tree better agree        // with us on this: -      assert(DT->dominates(DominatingEdge, Latch) && "should be!"); +      assert(DT.dominates(DominatingEdge, Latch) && "should be!");        if (isImpliedCond(Pred, LHS, RHS, Condition,                          BB != ContinuePredicate->getSuccessor(0))) @@ -6823,11 +7544,11 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,    }    // Check conditions due to any @llvm.assume intrinsics. -  for (auto &AssumeVH : AC->assumptions()) { +  for (auto &AssumeVH : AC.assumptions()) {      if (!AssumeVH)        continue;      auto *CI = cast<CallInst>(AssumeVH); -    if (!DT->dominates(CI, L->getHeader())) +    if (!DT.dominates(CI, L->getHeader()))        continue;      if (isImpliedCond(Pred, LHS, RHS, CI->getArgOperand(0), false)) @@ -6837,6 +7558,7 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,    return false;  } +namespace {  /// RAII wrapper to prevent recursive application of isImpliedCond.  /// ScalarEvolution's PendingLoopPredicates set must be empty unless we are  /// currently evaluating isImpliedCond. @@ -6854,6 +7576,7 @@ struct MarkPendingLoopPredicate {        LoopPreds.erase(Cond);    }  }; +} // end anonymous namespace  /// isImpliedCond - Test whether the condition described by Pred, LHS,  /// and RHS is true whenever the given Cond value evaluates to true. @@ -6892,6 +7615,14 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,    const SCEV *FoundLHS = getSCEV(ICI->getOperand(0));    const SCEV *FoundRHS = getSCEV(ICI->getOperand(1)); +  return isImpliedCond(Pred, LHS, RHS, FoundPred, FoundLHS, FoundRHS); +} + +bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, +                                    const SCEV *RHS, +                                    ICmpInst::Predicate FoundPred, +                                    const SCEV *FoundLHS, +                                    const SCEV *FoundRHS) {    // Balance the types.    if (getTypeSizeInBits(LHS->getType()) <        getTypeSizeInBits(FoundLHS->getType())) { @@ -6947,6 +7678,13 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,                                     RHS, LHS, FoundLHS, FoundRHS);    } +  // Unsigned comparison is the same as signed comparison when both the operands +  // are non-negative. +  if (CmpInst::isUnsigned(FoundPred) && +      CmpInst::getSignedPredicate(FoundPred) == Pred && +      isKnownNonNegative(FoundLHS) && isKnownNonNegative(FoundRHS)) +    return isImpliedCondOperands(Pred, LHS, RHS, FoundLHS, FoundRHS); +    // Check if we can make progress by sharpening ranges.    if (FoundPred == ICmpInst::ICMP_NE &&        (isa<SCEVConstant>(FoundLHS) || isa<SCEVConstant>(FoundRHS))) { @@ -6970,7 +7708,7 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,      APInt Min = ICmpInst::isSigned(Pred) ?          getSignedRange(V).getSignedMin() : getUnsignedRange(V).getUnsignedMin(); -    if (Min == C->getValue()->getValue()) { +    if (Min == C->getAPInt()) {        // Given (V >= Min && V != Min) we conclude V >= (Min + 1).        // This is true even if (Min + 1) wraps around -- in case of        // wraparound, (Min + 1) < Min, so (V >= Min => V >= (Min + 1)). @@ -7021,6 +7759,149 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,    return false;  } +bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr, +                                     const SCEV *&L, const SCEV *&R, +                                     SCEV::NoWrapFlags &Flags) { +  const auto *AE = dyn_cast<SCEVAddExpr>(Expr); +  if (!AE || AE->getNumOperands() != 2) +    return false; + +  L = AE->getOperand(0); +  R = AE->getOperand(1); +  Flags = AE->getNoWrapFlags(); +  return true; +} + +bool ScalarEvolution::computeConstantDifference(const SCEV *Less, +                                                const SCEV *More, +                                                APInt &C) { +  // We avoid subtracting expressions here because this function is usually +  // fairly deep in the call stack (i.e. is called many times). + +  if (isa<SCEVAddRecExpr>(Less) && isa<SCEVAddRecExpr>(More)) { +    const auto *LAR = cast<SCEVAddRecExpr>(Less); +    const auto *MAR = cast<SCEVAddRecExpr>(More); + +    if (LAR->getLoop() != MAR->getLoop()) +      return false; + +    // We look at affine expressions only; not for correctness but to keep +    // getStepRecurrence cheap. +    if (!LAR->isAffine() || !MAR->isAffine()) +      return false; + +    if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this)) +      return false; + +    Less = LAR->getStart(); +    More = MAR->getStart(); + +    // fall through +  } + +  if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) { +    const auto &M = cast<SCEVConstant>(More)->getAPInt(); +    const auto &L = cast<SCEVConstant>(Less)->getAPInt(); +    C = M - L; +    return true; +  } + +  const SCEV *L, *R; +  SCEV::NoWrapFlags Flags; +  if (splitBinaryAdd(Less, L, R, Flags)) +    if (const auto *LC = dyn_cast<SCEVConstant>(L)) +      if (R == More) { +        C = -(LC->getAPInt()); +        return true; +      } + +  if (splitBinaryAdd(More, L, R, Flags)) +    if (const auto *LC = dyn_cast<SCEVConstant>(L)) +      if (R == Less) { +        C = LC->getAPInt(); +        return true; +      } + +  return false; +} + +bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow( +    ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS, +    const SCEV *FoundLHS, const SCEV *FoundRHS) { +  if (Pred != CmpInst::ICMP_SLT && Pred != CmpInst::ICMP_ULT) +    return false; + +  const auto *AddRecLHS = dyn_cast<SCEVAddRecExpr>(LHS); +  if (!AddRecLHS) +    return false; + +  const auto *AddRecFoundLHS = dyn_cast<SCEVAddRecExpr>(FoundLHS); +  if (!AddRecFoundLHS) +    return false; + +  // We'd like to let SCEV reason about control dependencies, so we constrain +  // both the inequalities to be about add recurrences on the same loop.  This +  // way we can use isLoopEntryGuardedByCond later. + +  const Loop *L = AddRecFoundLHS->getLoop(); +  if (L != AddRecLHS->getLoop()) +    return false; + +  //  FoundLHS u< FoundRHS u< -C =>  (FoundLHS + C) u< (FoundRHS + C) ... (1) +  // +  //  FoundLHS s< FoundRHS s< INT_MIN - C => (FoundLHS + C) s< (FoundRHS + C) +  //                                                                  ... (2) +  // +  // Informal proof for (2), assuming (1) [*]: +  // +  // We'll also assume (A s< B) <=> ((A + INT_MIN) u< (B + INT_MIN)) ... (3)[**] +  // +  // Then +  // +  //       FoundLHS s< FoundRHS s< INT_MIN - C +  // <=>  (FoundLHS + INT_MIN) u< (FoundRHS + INT_MIN) u< -C   [ using (3) ] +  // <=>  (FoundLHS + INT_MIN + C) u< (FoundRHS + INT_MIN + C) [ using (1) ] +  // <=>  (FoundLHS + INT_MIN + C + INT_MIN) s< +  //                        (FoundRHS + INT_MIN + C + INT_MIN) [ using (3) ] +  // <=>  FoundLHS + C s< FoundRHS + C +  // +  // [*]: (1) can be proved by ruling out overflow. +  // +  // [**]: This can be proved by analyzing all the four possibilities: +  //    (A s< 0, B s< 0), (A s< 0, B s>= 0), (A s>= 0, B s< 0) and +  //    (A s>= 0, B s>= 0). +  // +  // Note: +  // Despite (2), "FoundRHS s< INT_MIN - C" does not mean that "FoundRHS + C" +  // will not sign underflow.  For instance, say FoundLHS = (i8 -128), FoundRHS +  // = (i8 -127) and C = (i8 -100).  Then INT_MIN - C = (i8 -28), and FoundRHS +  // s< (INT_MIN - C).  Lack of sign overflow / underflow in "FoundRHS + C" is +  // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS + +  // C)". + +  APInt LDiff, RDiff; +  if (!computeConstantDifference(FoundLHS, LHS, LDiff) || +      !computeConstantDifference(FoundRHS, RHS, RDiff) || +      LDiff != RDiff) +    return false; + +  if (LDiff == 0) +    return true; + +  APInt FoundRHSLimit; + +  if (Pred == CmpInst::ICMP_ULT) { +    FoundRHSLimit = -RDiff; +  } else { +    assert(Pred == CmpInst::ICMP_SLT && "Checked above!"); +    FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff; +  } + +  // Try to prove (1) or (2), as needed. +  return isLoopEntryGuardedByCond(L, Pred, FoundRHS, +                                  getConstant(FoundRHSLimit)); +} +  /// isImpliedCondOperands - Test whether the condition described by Pred,  /// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,  /// and FoundRHS is true. @@ -7031,6 +7912,9 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,    if (isImpliedCondOperandsViaRanges(Pred, LHS, RHS, FoundLHS, FoundRHS))      return true; +  if (isImpliedCondOperandsViaNoOverflow(Pred, LHS, RHS, FoundLHS, FoundRHS)) +    return true; +    return isImpliedCondOperandsHelper(Pred, LHS, RHS,                                       FoundLHS, FoundRHS) ||           // ~x < ~y --> x > y @@ -7043,17 +7927,13 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,  /// If Expr computes ~A, return A else return nullptr  static const SCEV *MatchNotExpr(const SCEV *Expr) {    const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Expr); -  if (!Add || Add->getNumOperands() != 2) return nullptr; - -  const SCEVConstant *AddLHS = dyn_cast<SCEVConstant>(Add->getOperand(0)); -  if (!(AddLHS && AddLHS->getValue()->getValue().isAllOnesValue())) +  if (!Add || Add->getNumOperands() != 2 || +      !Add->getOperand(0)->isAllOnesValue())      return nullptr;    const SCEVMulExpr *AddRHS = dyn_cast<SCEVMulExpr>(Add->getOperand(1)); -  if (!AddRHS || AddRHS->getNumOperands() != 2) return nullptr; - -  const SCEVConstant *MulLHS = dyn_cast<SCEVConstant>(AddRHS->getOperand(0)); -  if (!(MulLHS && MulLHS->getValue()->getValue().isAllOnesValue())) +  if (!AddRHS || AddRHS->getNumOperands() != 2 || +      !AddRHS->getOperand(0)->isAllOnesValue())      return nullptr;    return AddRHS->getOperand(1); @@ -7067,8 +7947,7 @@ static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,    const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);    if (!MaxExpr) return false; -  auto It = std::find(MaxExpr->op_begin(), MaxExpr->op_end(), Candidate); -  return It != MaxExpr->op_end(); +  return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();  } @@ -7084,6 +7963,38 @@ static bool IsMinConsistingOf(ScalarEvolution &SE,    return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));  } +static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, +                                           ICmpInst::Predicate Pred, +                                           const SCEV *LHS, const SCEV *RHS) { + +  // If both sides are affine addrecs for the same loop, with equal +  // steps, and we know the recurrences don't wrap, then we only +  // need to check the predicate on the starting values. + +  if (!ICmpInst::isRelational(Pred)) +    return false; + +  const SCEVAddRecExpr *LAR = dyn_cast<SCEVAddRecExpr>(LHS); +  if (!LAR) +    return false; +  const SCEVAddRecExpr *RAR = dyn_cast<SCEVAddRecExpr>(RHS); +  if (!RAR) +    return false; +  if (LAR->getLoop() != RAR->getLoop()) +    return false; +  if (!LAR->isAffine() || !RAR->isAffine()) +    return false; + +  if (LAR->getStepRecurrence(SE) != RAR->getStepRecurrence(SE)) +    return false; + +  SCEV::NoWrapFlags NW = ICmpInst::isSigned(Pred) ? +                         SCEV::FlagNSW : SCEV::FlagNUW; +  if (!LAR->getNoWrapFlags(NW) || !RAR->getNoWrapFlags(NW)) +    return false; + +  return SE.isKnownPredicate(Pred, LAR->getStart(), RAR->getStart()); +}  /// Is LHS `Pred` RHS true on the virtue of LHS or RHS being a Min or Max  /// expression? @@ -7129,7 +8040,9 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,    auto IsKnownPredicateFull =        [this](ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS) {      return isKnownPredicateWithRanges(Pred, LHS, RHS) || -        IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS); +           IsKnownPredicateViaMinOrMax(*this, Pred, LHS, RHS) || +           IsKnownPredicateViaAddRecStart(*this, Pred, LHS, RHS) || +           isKnownPredicateViaNoOverflow(Pred, LHS, RHS);    };    switch (Pred) { @@ -7185,7 +8098,7 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,        !isa<SCEVConstant>(AddLHS->getOperand(0)))      return false; -  APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getValue()->getValue(); +  APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();    // `FoundLHSRange` is the range we know `FoundLHS` to be in by virtue of the    // antecedent "`FoundLHS` `Pred` `FoundRHS`". @@ -7194,13 +8107,12 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,    // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range    // for `LHS`: -  APInt Addend = -      cast<SCEVConstant>(AddLHS->getOperand(0))->getValue()->getValue(); +  APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt();    ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));    // We can also compute the range of values for `LHS` that satisfy the    // consequent, "`LHS` `Pred` `RHS`": -  APInt ConstRHS = cast<SCEVConstant>(RHS)->getValue()->getValue(); +  APInt ConstRHS = cast<SCEVConstant>(RHS)->getAPInt();    ConstantRange SatisfyingLHSRange =        ConstantRange::makeSatisfyingICmpRegion(Pred, ConstRHS); @@ -7217,7 +8129,7 @@ bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,    if (NoWrap) return false;    unsigned BitWidth = getTypeSizeInBits(RHS->getType()); -  const SCEV *One = getConstant(Stride->getType(), 1); +  const SCEV *One = getOne(Stride->getType());    if (IsSigned) {      APInt MaxRHS = getSignedRange(RHS).getSignedMax(); @@ -7246,7 +8158,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,    if (NoWrap) return false;    unsigned BitWidth = getTypeSizeInBits(RHS->getType()); -  const SCEV *One = getConstant(Stride->getType(), 1); +  const SCEV *One = getOne(Stride->getType());    if (IsSigned) {      APInt MinRHS = getSignedRange(RHS).getSignedMin(); @@ -7271,7 +8183,7 @@ bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,  // stride and presence of the equality in the comparison.  const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step,                                              bool Equality) { -  const SCEV *One = getConstant(Step->getType(), 1); +  const SCEV *One = getOne(Step->getType());    Delta = Equality ? getAddExpr(Delta, Step)                     : getAddExpr(Delta, getMinusSCEV(Step, One));    return getUDivExpr(Delta, Step); @@ -7324,7 +8236,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,      // overflow, in which case if RHS - Start is a constant, we don't need to      // do a max operation since we can just figure it out statically      if (NoWrap && isa<SCEVConstant>(Diff)) { -      APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue(); +      APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();        if (D.isNegative())          End = Start;      } else @@ -7405,7 +8317,7 @@ ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS,      // overflow, in which case if RHS - Start is a constant, we don't need to      // do a max operation since we can just figure it out statically      if (NoWrap && isa<SCEVConstant>(Diff)) { -      APInt D = dyn_cast<const SCEVConstant>(Diff)->getValue()->getValue(); +      APInt D = dyn_cast<const SCEVConstant>(Diff)->getAPInt();        if (!D.isNegative())          End = Start;      } else @@ -7460,23 +8372,20 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(getStart()))      if (!SC->getValue()->isZero()) {        SmallVector<const SCEV *, 4> Operands(op_begin(), op_end()); -      Operands[0] = SE.getConstant(SC->getType(), 0); +      Operands[0] = SE.getZero(SC->getType());        const SCEV *Shifted = SE.getAddRecExpr(Operands, getLoop(),                                               getNoWrapFlags(FlagNW)); -      if (const SCEVAddRecExpr *ShiftedAddRec = -            dyn_cast<SCEVAddRecExpr>(Shifted)) +      if (const auto *ShiftedAddRec = dyn_cast<SCEVAddRecExpr>(Shifted))          return ShiftedAddRec->getNumIterationsInRange( -                           Range.subtract(SC->getValue()->getValue()), SE); +            Range.subtract(SC->getAPInt()), SE);        // This is strange and shouldn't happen.        return SE.getCouldNotCompute();      }    // The only time we can solve this is when we have all constant indices.    // Otherwise, we cannot determine the overflow conditions. -  for (unsigned i = 0, e = getNumOperands(); i != e; ++i) -    if (!isa<SCEVConstant>(getOperand(i))) -      return SE.getCouldNotCompute(); - +  if (any_of(operands(), [](const SCEV *Op) { return !isa<SCEVConstant>(Op); })) +    return SE.getCouldNotCompute();    // Okay at this point we know that all elements of the chrec are constants and    // that the start element is zero. @@ -7485,7 +8394,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,    // iteration exits.    unsigned BitWidth = SE.getTypeSizeInBits(getType());    if (!Range.contains(APInt(BitWidth, 0))) -    return SE.getConstant(getType(), 0); +    return SE.getZero(getType());    if (isAffine()) {      // If this is an affine expression then we have this situation: @@ -7496,7 +8405,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,      // If A is negative then the lower of the range is the last possible loop      // value.  Also note that we already checked for a full range.      APInt One(BitWidth,1); -    APInt A     = cast<SCEVConstant>(getOperand(1))->getValue()->getValue(); +    APInt A = cast<SCEVConstant>(getOperand(1))->getAPInt();      APInt End = A.sge(One) ? (Range.getUpper() - One) : Range.getLower();      // The exit value should be (End+A)/A. @@ -7528,15 +8437,13 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,                                               FlagAnyWrap);      // Next, solve the constructed addrec -    std::pair<const SCEV *,const SCEV *> Roots = -      SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE); +    auto Roots = SolveQuadraticEquation(cast<SCEVAddRecExpr>(NewAddRec), SE);      const SCEVConstant *R1 = dyn_cast<SCEVConstant>(Roots.first);      const SCEVConstant *R2 = dyn_cast<SCEVConstant>(Roots.second);      if (R1) {        // Pick the smallest positive root value. -      if (ConstantInt *CB = -          dyn_cast<ConstantInt>(ConstantExpr::getICmp(ICmpInst::ICMP_ULT, -                         R1->getValue(), R2->getValue()))) { +      if (ConstantInt *CB = dyn_cast<ConstantInt>(ConstantExpr::getICmp( +              ICmpInst::ICMP_ULT, R1->getValue(), R2->getValue()))) {          if (!CB->getZExtValue())            std::swap(R1, R2);   // R1 is the minimum root now. @@ -7549,7 +8456,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,          if (Range.contains(R1Val->getValue())) {            // The next iteration must be out of the range...            ConstantInt *NextVal = -                ConstantInt::get(SE.getContext(), R1->getValue()->getValue()+1); +              ConstantInt::get(SE.getContext(), R1->getAPInt() + 1);            R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);            if (!Range.contains(R1Val->getValue())) @@ -7560,7 +8467,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range,          // If R1 was not in the range, then it is a good return value.  Make          // sure that R1-1 WAS in the range though, just in case.          ConstantInt *NextVal = -               ConstantInt::get(SE.getContext(), R1->getValue()->getValue()-1); +            ConstantInt::get(SE.getContext(), R1->getAPInt() - 1);          R1Val = EvaluateConstantChrecAtConstant(this, NextVal, SE);          if (Range.contains(R1Val->getValue()))            return R1; @@ -7644,9 +8551,84 @@ struct SCEVCollectTerms {    }    bool isDone() const { return false; }  }; + +// Check if a SCEV contains an AddRecExpr. +struct SCEVHasAddRec { +  bool &ContainsAddRec; + +  SCEVHasAddRec(bool &ContainsAddRec) : ContainsAddRec(ContainsAddRec) { +   ContainsAddRec = false; +  } + +  bool follow(const SCEV *S) { +    if (isa<SCEVAddRecExpr>(S)) { +      ContainsAddRec = true; + +      // Stop recursion: once we collected a term, do not walk its operands. +      return false; +    } + +    // Keep looking. +    return true; +  } +  bool isDone() const { return false; } +}; + +// Find factors that are multiplied with an expression that (possibly as a +// subexpression) contains an AddRecExpr. In the expression: +// +//  8 * (100 +  %p * %q * (%a + {0, +, 1}_loop)) +// +// "%p * %q" are factors multiplied by the expression "(%a + {0, +, 1}_loop)" +// that contains the AddRec {0, +, 1}_loop. %p * %q are likely to be array size +// parameters as they form a product with an induction variable. +// +// This collector expects all array size parameters to be in the same MulExpr. +// It might be necessary to later add support for collecting parameters that are +// spread over different nested MulExpr. +struct SCEVCollectAddRecMultiplies { +  SmallVectorImpl<const SCEV *> &Terms; +  ScalarEvolution &SE; + +  SCEVCollectAddRecMultiplies(SmallVectorImpl<const SCEV *> &T, ScalarEvolution &SE) +      : Terms(T), SE(SE) {} + +  bool follow(const SCEV *S) { +    if (auto *Mul = dyn_cast<SCEVMulExpr>(S)) { +      bool HasAddRec = false; +      SmallVector<const SCEV *, 0> Operands; +      for (auto Op : Mul->operands()) { +        if (isa<SCEVUnknown>(Op)) { +          Operands.push_back(Op); +        } else { +          bool ContainsAddRec; +          SCEVHasAddRec ContiansAddRec(ContainsAddRec); +          visitAll(Op, ContiansAddRec); +          HasAddRec |= ContainsAddRec; +        } +      } +      if (Operands.size() == 0) +        return true; + +      if (!HasAddRec) +        return false; + +      Terms.push_back(SE.getMulExpr(Operands)); +      // Stop recursion: once we collected a term, do not walk its operands. +      return false; +    } + +    // Keep looking. +    return true; +  } +  bool isDone() const { return false; } +};  } -/// Find parametric terms in this SCEVAddRecExpr. +/// Find parametric terms in this SCEVAddRecExpr. We first for parameters in +/// two places: +///   1) The strides of AddRec expressions. +///   2) Unknowns that are multiplied with AddRec expressions.  void ScalarEvolution::collectParametricTerms(const SCEV *Expr,      SmallVectorImpl<const SCEV *> &Terms) {    SmallVector<const SCEV *, 4> Strides; @@ -7669,6 +8651,9 @@ void ScalarEvolution::collectParametricTerms(const SCEV *Expr,        for (const SCEV *T : Terms)          dbgs() << *T << "\n";      }); + +  SCEVCollectAddRecMultiplies MulCollector(Terms, *this); +  visitAll(Expr, MulCollector);  }  static bool findArrayDimensionsRec(ScalarEvolution &SE, @@ -7718,30 +8703,28 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,    return true;  } -namespace { -struct FindParameter { -  bool FoundParameter; -  FindParameter() : FoundParameter(false) {} - -  bool follow(const SCEV *S) { -    if (isa<SCEVUnknown>(S)) { -      FoundParameter = true; -      // Stop recursion: we found a parameter. -      return false; -    } -    // Keep looking. -    return true; -  } -  bool isDone() const { -    // Stop recursion if we have found a parameter. -    return FoundParameter; -  } -}; -} -  // Returns true when S contains at least a SCEVUnknown parameter.  static inline bool  containsParameters(const SCEV *S) { +  struct FindParameter { +    bool FoundParameter; +    FindParameter() : FoundParameter(false) {} + +    bool follow(const SCEV *S) { +      if (isa<SCEVUnknown>(S)) { +        FoundParameter = true; +        // Stop recursion: we found a parameter. +        return false; +      } +      // Keep looking. +      return true; +    } +    bool isDone() const { +      // Stop recursion if we have found a parameter. +      return FoundParameter; +    } +  }; +    FindParameter F;    SCEVTraversal<FindParameter> ST(F);    ST.visitAll(S); @@ -7829,11 +8812,13 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,    ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this); -  // Divide all terms by the element size. +  // Try to divide all terms by the element size. If term is not divisible by +  // element size, proceed with the original term.    for (const SCEV *&Term : Terms) {      const SCEV *Q, *R;      SCEVDivision::divide(SE, Term, ElementSize, &Q, &R); -    Term = Q; +    if (!Q->isZero()) +      Term = Q;    }    SmallVector<const SCEV *, 4> NewTerms; @@ -7875,7 +8860,7 @@ void ScalarEvolution::computeAccessFunctions(    if (Sizes.empty())      return; -  if (auto AR = dyn_cast<SCEVAddRecExpr>(Expr)) +  if (auto *AR = dyn_cast<SCEVAddRecExpr>(Expr))      if (!AR->isAffine())        return; @@ -8059,58 +9044,55 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se)  //                   ScalarEvolution Class Implementation  //===----------------------------------------------------------------------===// -ScalarEvolution::ScalarEvolution() -    : FunctionPass(ID), WalkingBEDominatingConds(false), ValuesAtScopes(64), -      LoopDispositions(64), BlockDispositions(64), FirstUnknown(nullptr) { -  initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); -} - -bool ScalarEvolution::runOnFunction(Function &F) { -  this->F = &F; -  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); -  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); -  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); -  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); -  return false; -} - -void ScalarEvolution::releaseMemory() { +ScalarEvolution::ScalarEvolution(Function &F, TargetLibraryInfo &TLI, +                                 AssumptionCache &AC, DominatorTree &DT, +                                 LoopInfo &LI) +    : F(F), TLI(TLI), AC(AC), DT(DT), LI(LI), +      CouldNotCompute(new SCEVCouldNotCompute()), +      WalkingBEDominatingConds(false), ProvingSplitPredicate(false), +      ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), +      FirstUnknown(nullptr) {} + +ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg) +    : F(Arg.F), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT), LI(Arg.LI), +      CouldNotCompute(std::move(Arg.CouldNotCompute)), +      ValueExprMap(std::move(Arg.ValueExprMap)), +      WalkingBEDominatingConds(false), ProvingSplitPredicate(false), +      BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)), +      ConstantEvolutionLoopExitValue( +          std::move(Arg.ConstantEvolutionLoopExitValue)), +      ValuesAtScopes(std::move(Arg.ValuesAtScopes)), +      LoopDispositions(std::move(Arg.LoopDispositions)), +      BlockDispositions(std::move(Arg.BlockDispositions)), +      UnsignedRanges(std::move(Arg.UnsignedRanges)), +      SignedRanges(std::move(Arg.SignedRanges)), +      UniqueSCEVs(std::move(Arg.UniqueSCEVs)), +      UniquePreds(std::move(Arg.UniquePreds)), +      SCEVAllocator(std::move(Arg.SCEVAllocator)), +      FirstUnknown(Arg.FirstUnknown) { +  Arg.FirstUnknown = nullptr; +} + +ScalarEvolution::~ScalarEvolution() {    // Iterate through all the SCEVUnknown instances and call their    // destructors, so that they release their references to their values. -  for (SCEVUnknown *U = FirstUnknown; U; U = U->Next) -    U->~SCEVUnknown(); +  for (SCEVUnknown *U = FirstUnknown; U;) { +    SCEVUnknown *Tmp = U; +    U = U->Next; +    Tmp->~SCEVUnknown(); +  }    FirstUnknown = nullptr;    ValueExprMap.clear();    // Free any extra memory created for ExitNotTakenInfo in the unlikely event    // that a loop had multiple computable exits. -  for (DenseMap<const Loop*, BackedgeTakenInfo>::iterator I = -         BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); -       I != E; ++I) { -    I->second.clear(); -  } +  for (auto &BTCI : BackedgeTakenCounts) +    BTCI.second.clear();    assert(PendingLoopPredicates.empty() && "isImpliedCond garbage");    assert(!WalkingBEDominatingConds && "isLoopBackedgeGuardedByCond garbage!"); - -  BackedgeTakenCounts.clear(); -  ConstantEvolutionLoopExitValue.clear(); -  ValuesAtScopes.clear(); -  LoopDispositions.clear(); -  BlockDispositions.clear(); -  UnsignedRanges.clear(); -  SignedRanges.clear(); -  UniqueSCEVs.clear(); -  SCEVAllocator.Reset(); -} - -void ScalarEvolution::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.setPreservesAll(); -  AU.addRequired<AssumptionCacheTracker>(); -  AU.addRequiredTransitive<LoopInfoWrapperPass>(); -  AU.addRequiredTransitive<DominatorTreeWrapperPass>(); -  AU.addRequired<TargetLibraryInfoWrapperPass>(); +  assert(!ProvingSplitPredicate && "ProvingSplitPredicate garbage!");  }  bool ScalarEvolution::hasLoopInvariantBackedgeTakenCount(const Loop *L) { @@ -8152,7 +9134,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,    OS << "\n";  } -void ScalarEvolution::print(raw_ostream &OS, const Module *) const { +void ScalarEvolution::print(raw_ostream &OS) const {    // ScalarEvolution's implementation of the print method is to print    // out SCEV values of all instructions that are interesting. Doing    // this potentially causes it to create new SCEV objects though, @@ -8162,13 +9144,13 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {    ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);    OS << "Classifying expressions for: "; -  F->printAsOperand(OS, /*PrintType=*/false); +  F.printAsOperand(OS, /*PrintType=*/false);    OS << "\n"; -  for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) -    if (isSCEVable(I->getType()) && !isa<CmpInst>(*I)) { -      OS << *I << '\n'; +  for (Instruction &I : instructions(F)) +    if (isSCEVable(I.getType()) && !isa<CmpInst>(I)) { +      OS << I << '\n';        OS << "  -->  "; -      const SCEV *SV = SE.getSCEV(&*I); +      const SCEV *SV = SE.getSCEV(&I);        SV->print(OS);        if (!isa<SCEVCouldNotCompute>(SV)) {          OS << " U: "; @@ -8177,7 +9159,7 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {          SE.getSignedRange(SV).print(OS);        } -      const Loop *L = LI->getLoopFor((*I).getParent()); +      const Loop *L = LI.getLoopFor(I.getParent());        const SCEV *AtUse = SE.getSCEVAtScope(SV, L);        if (AtUse != SV) { @@ -8205,9 +9187,9 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const {      }    OS << "Determining loop execution counts for: "; -  F->printAsOperand(OS, /*PrintType=*/false); +  F.printAsOperand(OS, /*PrintType=*/false);    OS << "\n"; -  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) +  for (LoopInfo::iterator I = LI.begin(), E = LI.end(); I != E; ++I)      PrintLoopInfo(OS, &SE, *I);  } @@ -8260,9 +9242,8 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {      // This recurrence is variant w.r.t. L if any of its operands      // are variant. -    for (SCEVAddRecExpr::op_iterator I = AR->op_begin(), E = AR->op_end(); -         I != E; ++I) -      if (!isLoopInvariant(*I, L)) +    for (auto *Op : AR->operands()) +      if (!isLoopInvariant(Op, L))          return LoopVariant;      // Otherwise it's loop-invariant. @@ -8272,11 +9253,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {    case scMulExpr:    case scUMaxExpr:    case scSMaxExpr: { -    const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);      bool HasVarying = false; -    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); -         I != E; ++I) { -      LoopDisposition D = getLoopDisposition(*I, L); +    for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) { +      LoopDisposition D = getLoopDisposition(Op, L);        if (D == LoopVariant)          return LoopVariant;        if (D == LoopComputable) @@ -8300,7 +9279,7 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {      // invariant if they are not contained in the specified loop.      // Instructions are never considered invariant in the function body      // (null loop) because they are defined within the "loop". -    if (Instruction *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) +    if (auto *I = dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue()))        return (L && !L->contains(I)) ? LoopInvariant : LoopVariant;      return LoopInvariant;    case scCouldNotCompute: @@ -8351,7 +9330,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {      // produces the addrec's value is a PHI, and a PHI effectively properly      // dominates its entire containing block.      const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S); -    if (!DT->dominates(AR->getLoop()->getHeader(), BB)) +    if (!DT.dominates(AR->getLoop()->getHeader(), BB))        return DoesNotDominateBlock;    }    // FALL THROUGH into SCEVNAryExpr handling. @@ -8361,9 +9340,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {    case scSMaxExpr: {      const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);      bool Proper = true; -    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); -         I != E; ++I) { -      BlockDisposition D = getBlockDisposition(*I, BB); +    for (const SCEV *NAryOp : NAry->operands()) { +      BlockDisposition D = getBlockDisposition(NAryOp, BB);        if (D == DoesNotDominateBlock)          return DoesNotDominateBlock;        if (D == DominatesBlock) @@ -8388,7 +9366,7 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {            dyn_cast<Instruction>(cast<SCEVUnknown>(S)->getValue())) {        if (I->getParent() == BB)          return DominatesBlock; -      if (DT->properlyDominates(I->getParent(), BB)) +      if (DT.properlyDominates(I->getParent(), BB))          return ProperlyDominatesBlock;        return DoesNotDominateBlock;      } @@ -8407,24 +9385,22 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {    return getBlockDisposition(S, BB) == ProperlyDominatesBlock;  } -namespace { -// Search for a SCEV expression node within an expression tree. -// Implements SCEVTraversal::Visitor. -struct SCEVSearch { -  const SCEV *Node; -  bool IsFound; +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { +  // Search for a SCEV expression node within an expression tree. +  // Implements SCEVTraversal::Visitor. +  struct SCEVSearch { +    const SCEV *Node; +    bool IsFound; -  SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} +    SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} -  bool follow(const SCEV *S) { -    IsFound |= (S == Node); -    return !IsFound; -  } -  bool isDone() const { return IsFound; } -}; -} +    bool follow(const SCEV *S) { +      IsFound |= (S == Node); +      return !IsFound; +    } +    bool isDone() const { return IsFound; } +  }; -bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {    SCEVSearch Search(Op);    visitAll(S, Search);    return Search.IsFound; @@ -8463,43 +9439,39 @@ static void replaceSubString(std::string &Str, StringRef From, StringRef To) {  /// getLoopBackedgeTakenCounts - Helper method for verifyAnalysis.  static void  getLoopBackedgeTakenCounts(Loop *L, VerifyMap &Map, ScalarEvolution &SE) { -  for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I) { -    getLoopBackedgeTakenCounts(*I, Map, SE); // recurse. - -    std::string &S = Map[L]; -    if (S.empty()) { -      raw_string_ostream OS(S); -      SE.getBackedgeTakenCount(L)->print(OS); +  std::string &S = Map[L]; +  if (S.empty()) { +    raw_string_ostream OS(S); +    SE.getBackedgeTakenCount(L)->print(OS); -      // false and 0 are semantically equivalent. This can happen in dead loops. -      replaceSubString(OS.str(), "false", "0"); -      // Remove wrap flags, their use in SCEV is highly fragile. -      // FIXME: Remove this when SCEV gets smarter about them. -      replaceSubString(OS.str(), "<nw>", ""); -      replaceSubString(OS.str(), "<nsw>", ""); -      replaceSubString(OS.str(), "<nuw>", ""); -    } +    // false and 0 are semantically equivalent. This can happen in dead loops. +    replaceSubString(OS.str(), "false", "0"); +    // Remove wrap flags, their use in SCEV is highly fragile. +    // FIXME: Remove this when SCEV gets smarter about them. +    replaceSubString(OS.str(), "<nw>", ""); +    replaceSubString(OS.str(), "<nsw>", ""); +    replaceSubString(OS.str(), "<nuw>", "");    } -} -void ScalarEvolution::verifyAnalysis() const { -  if (!VerifySCEV) -    return; +  for (auto *R : reverse(*L)) +    getLoopBackedgeTakenCounts(R, Map, SE); // recurse. +} +void ScalarEvolution::verify() const {    ScalarEvolution &SE = *const_cast<ScalarEvolution *>(this);    // Gather stringified backedge taken counts for all loops using SCEV's caches.    // FIXME: It would be much better to store actual values instead of strings,    //        but SCEV pointers will change if we drop the caches.    VerifyMap BackedgeDumpsOld, BackedgeDumpsNew; -  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) +  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I)      getLoopBackedgeTakenCounts(*I, BackedgeDumpsOld, SE); -  // Gather stringified backedge taken counts for all loops without using -  // SCEV's caches. -  SE.releaseMemory(); -  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) -    getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE); +  // Gather stringified backedge taken counts for all loops using a fresh +  // ScalarEvolution object. +  ScalarEvolution SE2(F, TLI, AC, DT, LI); +  for (LoopInfo::reverse_iterator I = LI.rbegin(), E = LI.rend(); I != E; ++I) +    getLoopBackedgeTakenCounts(*I, BackedgeDumpsNew, SE2);    // Now compare whether they're the same with and without caches. This allows    // verifying that no pass changed the cache. @@ -8532,3 +9504,238 @@ void ScalarEvolution::verifyAnalysis() const {    // TODO: Verify more things.  } + +char ScalarEvolutionAnalysis::PassID; + +ScalarEvolution ScalarEvolutionAnalysis::run(Function &F, +                                             AnalysisManager<Function> *AM) { +  return ScalarEvolution(F, AM->getResult<TargetLibraryAnalysis>(F), +                         AM->getResult<AssumptionAnalysis>(F), +                         AM->getResult<DominatorTreeAnalysis>(F), +                         AM->getResult<LoopAnalysis>(F)); +} + +PreservedAnalyses +ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> *AM) { +  AM->getResult<ScalarEvolutionAnalysis>(F).print(OS); +  return PreservedAnalyses::all(); +} + +INITIALIZE_PASS_BEGIN(ScalarEvolutionWrapperPass, "scalar-evolution", +                      "Scalar Evolution Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ScalarEvolutionWrapperPass, "scalar-evolution", +                    "Scalar Evolution Analysis", false, true) +char ScalarEvolutionWrapperPass::ID = 0; + +ScalarEvolutionWrapperPass::ScalarEvolutionWrapperPass() : FunctionPass(ID) { +  initializeScalarEvolutionWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool ScalarEvolutionWrapperPass::runOnFunction(Function &F) { +  SE.reset(new ScalarEvolution( +      F, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), +      getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F), +      getAnalysis<DominatorTreeWrapperPass>().getDomTree(), +      getAnalysis<LoopInfoWrapperPass>().getLoopInfo())); +  return false; +} + +void ScalarEvolutionWrapperPass::releaseMemory() { SE.reset(); } + +void ScalarEvolutionWrapperPass::print(raw_ostream &OS, const Module *) const { +  SE->print(OS); +} + +void ScalarEvolutionWrapperPass::verifyAnalysis() const { +  if (!VerifySCEV) +    return; + +  SE->verify(); +} + +void ScalarEvolutionWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequiredTransitive<AssumptionCacheTracker>(); +  AU.addRequiredTransitive<LoopInfoWrapperPass>(); +  AU.addRequiredTransitive<DominatorTreeWrapperPass>(); +  AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>(); +} + +const SCEVPredicate * +ScalarEvolution::getEqualPredicate(const SCEVUnknown *LHS, +                                   const SCEVConstant *RHS) { +  FoldingSetNodeID ID; +  // Unique this node based on the arguments +  ID.AddInteger(SCEVPredicate::P_Equal); +  ID.AddPointer(LHS); +  ID.AddPointer(RHS); +  void *IP = nullptr; +  if (const auto *S = UniquePreds.FindNodeOrInsertPos(ID, IP)) +    return S; +  SCEVEqualPredicate *Eq = new (SCEVAllocator) +      SCEVEqualPredicate(ID.Intern(SCEVAllocator), LHS, RHS); +  UniquePreds.InsertNode(Eq, IP); +  return Eq; +} + +namespace { +class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> { +public: +  static const SCEV *rewrite(const SCEV *Scev, ScalarEvolution &SE, +                             SCEVUnionPredicate &A) { +    SCEVPredicateRewriter Rewriter(SE, A); +    return Rewriter.visit(Scev); +  } + +  SCEVPredicateRewriter(ScalarEvolution &SE, SCEVUnionPredicate &P) +      : SCEVRewriteVisitor(SE), P(P) {} + +  const SCEV *visitUnknown(const SCEVUnknown *Expr) { +    auto ExprPreds = P.getPredicatesForExpr(Expr); +    for (auto *Pred : ExprPreds) +      if (const auto *IPred = dyn_cast<const SCEVEqualPredicate>(Pred)) +        if (IPred->getLHS() == Expr) +          return IPred->getRHS(); + +    return Expr; +  } + +private: +  SCEVUnionPredicate &P; +}; +} // end anonymous namespace + +const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *Scev, +                                                   SCEVUnionPredicate &Preds) { +  return SCEVPredicateRewriter::rewrite(Scev, *this, Preds); +} + +/// SCEV predicates +SCEVPredicate::SCEVPredicate(const FoldingSetNodeIDRef ID, +                             SCEVPredicateKind Kind) +    : FastID(ID), Kind(Kind) {} + +SCEVEqualPredicate::SCEVEqualPredicate(const FoldingSetNodeIDRef ID, +                                       const SCEVUnknown *LHS, +                                       const SCEVConstant *RHS) +    : SCEVPredicate(ID, P_Equal), LHS(LHS), RHS(RHS) {} + +bool SCEVEqualPredicate::implies(const SCEVPredicate *N) const { +  const auto *Op = dyn_cast<const SCEVEqualPredicate>(N); + +  if (!Op) +    return false; + +  return Op->LHS == LHS && Op->RHS == RHS; +} + +bool SCEVEqualPredicate::isAlwaysTrue() const { return false; } + +const SCEV *SCEVEqualPredicate::getExpr() const { return LHS; } + +void SCEVEqualPredicate::print(raw_ostream &OS, unsigned Depth) const { +  OS.indent(Depth) << "Equal predicate: " << *LHS << " == " << *RHS << "\n"; +} + +/// Union predicates don't get cached so create a dummy set ID for it. +SCEVUnionPredicate::SCEVUnionPredicate() +    : SCEVPredicate(FoldingSetNodeIDRef(nullptr, 0), P_Union) {} + +bool SCEVUnionPredicate::isAlwaysTrue() const { +  return all_of(Preds, +                [](const SCEVPredicate *I) { return I->isAlwaysTrue(); }); +} + +ArrayRef<const SCEVPredicate *> +SCEVUnionPredicate::getPredicatesForExpr(const SCEV *Expr) { +  auto I = SCEVToPreds.find(Expr); +  if (I == SCEVToPreds.end()) +    return ArrayRef<const SCEVPredicate *>(); +  return I->second; +} + +bool SCEVUnionPredicate::implies(const SCEVPredicate *N) const { +  if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) +    return all_of(Set->Preds, +                  [this](const SCEVPredicate *I) { return this->implies(I); }); + +  auto ScevPredsIt = SCEVToPreds.find(N->getExpr()); +  if (ScevPredsIt == SCEVToPreds.end()) +    return false; +  auto &SCEVPreds = ScevPredsIt->second; + +  return any_of(SCEVPreds, +                [N](const SCEVPredicate *I) { return I->implies(N); }); +} + +const SCEV *SCEVUnionPredicate::getExpr() const { return nullptr; } + +void SCEVUnionPredicate::print(raw_ostream &OS, unsigned Depth) const { +  for (auto Pred : Preds) +    Pred->print(OS, Depth); +} + +void SCEVUnionPredicate::add(const SCEVPredicate *N) { +  if (const auto *Set = dyn_cast<const SCEVUnionPredicate>(N)) { +    for (auto Pred : Set->Preds) +      add(Pred); +    return; +  } + +  if (implies(N)) +    return; + +  const SCEV *Key = N->getExpr(); +  assert(Key && "Only SCEVUnionPredicate doesn't have an " +                " associated expression!"); + +  SCEVToPreds[Key].push_back(N); +  Preds.push_back(N); +} + +PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE) +    : SE(SE), Generation(0) {} + +const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { +  const SCEV *Expr = SE.getSCEV(V); +  RewriteEntry &Entry = RewriteMap[Expr]; + +  // If we already have an entry and the version matches, return it. +  if (Entry.second && Generation == Entry.first) +    return Entry.second; + +  // We found an entry but it's stale. Rewrite the stale entry +  // acording to the current predicate. +  if (Entry.second) +    Expr = Entry.second; + +  const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds); +  Entry = {Generation, NewSCEV}; + +  return NewSCEV; +} + +void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { +  if (Preds.implies(&Pred)) +    return; +  Preds.add(&Pred); +  updateGeneration(); +} + +const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { +  return Preds; +} + +void PredicatedScalarEvolution::updateGeneration() { +  // If the generation number wrapped recompute everything. +  if (++Generation == 0) { +    for (auto &II : RewriteMap) { +      const SCEV *Rewritten = II.second.second; +      II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)}; +    } +  } +} diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index 6bc0d85a61f9..2e50c80c4e73 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -19,125 +19,42 @@  //  //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h"  using namespace llvm; -namespace { -  /// ScalarEvolutionAliasAnalysis - This is a simple alias analysis -  /// implementation that uses ScalarEvolution to answer queries. -  class ScalarEvolutionAliasAnalysis : public FunctionPass, -                                       public AliasAnalysis { -    ScalarEvolution *SE; - -  public: -    static char ID; // Class identification, replacement for typeinfo -    ScalarEvolutionAliasAnalysis() : FunctionPass(ID), SE(nullptr) { -      initializeScalarEvolutionAliasAnalysisPass( -        *PassRegistry::getPassRegistry()); -    } - -    /// getAdjustedAnalysisPointer - This method is used when a pass implements -    /// an analysis interface through multiple inheritance.  If needed, it -    /// should override this to adjust the this pointer as needed for the -    /// specified pass info. -    void *getAdjustedAnalysisPointer(AnalysisID PI) override { -      if (PI == &AliasAnalysis::ID) -        return (AliasAnalysis*)this; -      return this; -    } - -  private: -    void getAnalysisUsage(AnalysisUsage &AU) const override; -    bool runOnFunction(Function &F) override; -    AliasResult alias(const MemoryLocation &LocA, -                      const MemoryLocation &LocB) override; - -    Value *GetBaseValue(const SCEV *S); -  }; -}  // End of anonymous namespace - -// Register this pass... -char ScalarEvolutionAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS_BEGIN(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", -                   "ScalarEvolution-based Alias Analysis", false, true, false) -INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) -INITIALIZE_AG_PASS_END(ScalarEvolutionAliasAnalysis, AliasAnalysis, "scev-aa", -                    "ScalarEvolution-based Alias Analysis", false, true, false) - -FunctionPass *llvm::createScalarEvolutionAliasAnalysisPass() { -  return new ScalarEvolutionAliasAnalysis(); -} - -void -ScalarEvolutionAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.addRequiredTransitive<ScalarEvolution>(); -  AU.setPreservesAll(); -  AliasAnalysis::getAnalysisUsage(AU); -} - -bool -ScalarEvolutionAliasAnalysis::runOnFunction(Function &F) { -  InitializeAliasAnalysis(this, &F.getParent()->getDataLayout()); -  SE = &getAnalysis<ScalarEvolution>(); -  return false; -} - -/// GetBaseValue - Given an expression, try to find a -/// base value. Return null is none was found. -Value * -ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) { -  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { -    // In an addrec, assume that the base will be in the start, rather -    // than the step. -    return GetBaseValue(AR->getStart()); -  } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { -    // If there's a pointer operand, it'll be sorted at the end of the list. -    const SCEV *Last = A->getOperand(A->getNumOperands()-1); -    if (Last->getType()->isPointerTy()) -      return GetBaseValue(Last); -  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { -    // This is a leaf node. -    return U->getValue(); -  } -  // No Identified object found. -  return nullptr; -} - -AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA, -                                                const MemoryLocation &LocB) { +AliasResult SCEVAAResult::alias(const MemoryLocation &LocA, +                                const MemoryLocation &LocB) {    // If either of the memory references is empty, it doesn't matter what the    // pointer values are. This allows the code below to ignore this special    // case.    if (LocA.Size == 0 || LocB.Size == 0)      return NoAlias; -  // This is ScalarEvolutionAliasAnalysis. Get the SCEVs! -  const SCEV *AS = SE->getSCEV(const_cast<Value *>(LocA.Ptr)); -  const SCEV *BS = SE->getSCEV(const_cast<Value *>(LocB.Ptr)); +  // This is SCEVAAResult. Get the SCEVs! +  const SCEV *AS = SE.getSCEV(const_cast<Value *>(LocA.Ptr)); +  const SCEV *BS = SE.getSCEV(const_cast<Value *>(LocB.Ptr));    // If they evaluate to the same expression, it's a MustAlias. -  if (AS == BS) return MustAlias; +  if (AS == BS) +    return MustAlias;    // If something is known about the difference between the two addresses,    // see if it's enough to prove a NoAlias. -  if (SE->getEffectiveSCEVType(AS->getType()) == -      SE->getEffectiveSCEVType(BS->getType())) { -    unsigned BitWidth = SE->getTypeSizeInBits(AS->getType()); +  if (SE.getEffectiveSCEVType(AS->getType()) == +      SE.getEffectiveSCEVType(BS->getType())) { +    unsigned BitWidth = SE.getTypeSizeInBits(AS->getType());      APInt ASizeInt(BitWidth, LocA.Size);      APInt BSizeInt(BitWidth, LocB.Size);      // Compute the difference between the two pointers. -    const SCEV *BA = SE->getMinusSCEV(BS, AS); +    const SCEV *BA = SE.getMinusSCEV(BS, AS);      // Test whether the difference is known to be great enough that memory of      // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt      // are non-zero, which is special-cased above. -    if (ASizeInt.ule(SE->getUnsignedRange(BA).getUnsignedMin()) && -        (-BSizeInt).uge(SE->getUnsignedRange(BA).getUnsignedMax())) +    if (ASizeInt.ule(SE.getUnsignedRange(BA).getUnsignedMin()) && +        (-BSizeInt).uge(SE.getUnsignedRange(BA).getUnsignedMax()))        return NoAlias;      // Folding the subtraction while preserving range information can be tricky @@ -145,13 +62,13 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,      // and try again to see if things fold better that way.      // Compute the difference between the two pointers. -    const SCEV *AB = SE->getMinusSCEV(AS, BS); +    const SCEV *AB = SE.getMinusSCEV(AS, BS);      // Test whether the difference is known to be great enough that memory of      // the given sizes don't overlap. This assumes that ASizeInt and BSizeInt      // are non-zero, which is special-cased above. -    if (BSizeInt.ule(SE->getUnsignedRange(AB).getUnsignedMin()) && -        (-ASizeInt).uge(SE->getUnsignedRange(AB).getUnsignedMax())) +    if (BSizeInt.ule(SE.getUnsignedRange(AB).getUnsignedMin()) && +        (-ASizeInt).uge(SE.getUnsignedRange(AB).getUnsignedMax()))        return NoAlias;    } @@ -170,5 +87,62 @@ AliasResult ScalarEvolutionAliasAnalysis::alias(const MemoryLocation &LocA,        return NoAlias;    // Forward the query to the next analysis. -  return AliasAnalysis::alias(LocA, LocB); +  return AAResultBase::alias(LocA, LocB); +} + +/// Given an expression, try to find a base value. +/// +/// Returns null if none was found. +Value *SCEVAAResult::GetBaseValue(const SCEV *S) { +  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { +    // In an addrec, assume that the base will be in the start, rather +    // than the step. +    return GetBaseValue(AR->getStart()); +  } else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) { +    // If there's a pointer operand, it'll be sorted at the end of the list. +    const SCEV *Last = A->getOperand(A->getNumOperands() - 1); +    if (Last->getType()->isPointerTy()) +      return GetBaseValue(Last); +  } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { +    // This is a leaf node. +    return U->getValue(); +  } +  // No Identified object found. +  return nullptr; +} + +SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> *AM) { +  return SCEVAAResult(AM->getResult<TargetLibraryAnalysis>(F), +                      AM->getResult<ScalarEvolutionAnalysis>(F)); +} + +char SCEVAA::PassID; + +char SCEVAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(SCEVAAWrapperPass, "scev-aa", +                      "ScalarEvolution-based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(SCEVAAWrapperPass, "scev-aa", +                    "ScalarEvolution-based Alias Analysis", false, true) + +FunctionPass *llvm::createSCEVAAWrapperPass() { +  return new SCEVAAWrapperPass(); +} + +SCEVAAWrapperPass::SCEVAAWrapperPass() : FunctionPass(ID) { +  initializeSCEVAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool SCEVAAWrapperPass::runOnFunction(Function &F) { +  Result.reset( +      new SCEVAAResult(getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(), +                       getAnalysis<ScalarEvolutionWrapperPass>().getSE())); +  return false; +} + +void SCEVAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequired<ScalarEvolutionWrapperPass>(); +  AU.addRequired<TargetLibraryInfoWrapperPass>();  } diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index fee2a2d0d183..921403ddc0fd 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -63,7 +63,7 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,              // Create a new cast, and leave the old cast in place in case              // it is being used as an insert point. Clear its operand              // so that it doesn't hold anything live. -            Ret = CastInst::Create(Op, V, Ty, "", IP); +            Ret = CastInst::Create(Op, V, Ty, "", &*IP);              Ret->takeName(CI);              CI->replaceAllUsesWith(Ret);              CI->setOperand(0, UndefValue::get(V->getType())); @@ -75,17 +75,39 @@ Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,    // Create a new cast.    if (!Ret) -    Ret = CastInst::Create(Op, V, Ty, V->getName(), IP); +    Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP);    // We assert at the end of the function since IP might point to an    // instruction with different dominance properties than a cast    // (an invoke for example) and not dominate BIP (but the cast does). -  assert(SE.DT->dominates(Ret, BIP)); +  assert(SE.DT.dominates(Ret, &*BIP));    rememberInstruction(Ret);    return Ret;  } +static BasicBlock::iterator findInsertPointAfter(Instruction *I, +                                                 BasicBlock *MustDominate) { +  BasicBlock::iterator IP = ++I->getIterator(); +  if (auto *II = dyn_cast<InvokeInst>(I)) +    IP = II->getNormalDest()->begin(); + +  while (isa<PHINode>(IP)) +    ++IP; + +  while (IP->isEHPad()) { +    if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) { +      ++IP; +    } else if (isa<CatchSwitchInst>(IP)) { +      IP = MustDominate->getFirstInsertionPt(); +    } else { +      llvm_unreachable("unexpected eh pad!"); +    } +  } + +  return IP; +} +  /// InsertNoopCastOfTo - Insert a cast of V to the specified type,  /// which must be possible with a noop cast, doing what we can to share  /// the casts. @@ -135,19 +157,14 @@ Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {      while ((isa<BitCastInst>(IP) &&              isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&              cast<BitCastInst>(IP)->getOperand(0) != A) || -           isa<DbgInfoIntrinsic>(IP) || -           isa<LandingPadInst>(IP)) +           isa<DbgInfoIntrinsic>(IP))        ++IP;      return ReuseOrCreateCast(A, Ty, Op, IP);    }    // Cast the instruction immediately after the instruction.    Instruction *I = cast<Instruction>(V); -  BasicBlock::iterator IP = I; ++IP; -  if (InvokeInst *II = dyn_cast<InvokeInst>(I)) -    IP = II->getNormalDest()->begin(); -  while (isa<PHINode>(IP) || isa<LandingPadInst>(IP)) -    ++IP; +  BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock());    return ReuseOrCreateCast(I, Ty, Op, IP);  } @@ -174,7 +191,7 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,          ScanLimit++;        if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&            IP->getOperand(1) == RHS) -        return IP; +        return &*IP;        if (IP == BlockBegin) break;      }    } @@ -184,13 +201,13 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,    BuilderType::InsertPointGuard Guard(Builder);    // Move the insertion point out of as many loops as we can. -  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { +  while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {      if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;      BasicBlock *Preheader = L->getLoopPreheader();      if (!Preheader) break;      // Ok, move up a level. -    Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); +    Builder.SetInsertPoint(Preheader->getTerminator());    }    // If we haven't found this binop, insert it. @@ -229,19 +246,15 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,      // Check for divisibility.      if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {        ConstantInt *CI = -        ConstantInt::get(SE.getContext(), -                         C->getValue()->getValue().sdiv( -                                                   FC->getValue()->getValue())); +          ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt()));        // If the quotient is zero and the remainder is non-zero, reject        // the value at this scale. It will be considered for subsequent        // smaller scales.        if (!CI->isZero()) {          const SCEV *Div = SE.getConstant(CI);          S = Div; -        Remainder = -          SE.getAddExpr(Remainder, -                        SE.getConstant(C->getValue()->getValue().srem( -                                                  FC->getValue()->getValue()))); +        Remainder = SE.getAddExpr( +            Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt())));          return true;        }      } @@ -254,10 +267,9 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder,      // of the given factor. If so, we can factor it.      const SCEVConstant *FC = cast<SCEVConstant>(Factor);      if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) -      if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) { +      if (!C->getAPInt().srem(FC->getAPInt())) {          SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); -        NewMulOps[0] = SE.getConstant( -            C->getValue()->getValue().sdiv(FC->getValue()->getValue())); +        NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt()));          S = SE.getMulExpr(NewMulOps);          return true;        } @@ -402,8 +414,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,        const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);        if (!ElSize->isZero()) {          SmallVector<const SCEV *, 8> NewOps; -        for (unsigned i = 0, e = Ops.size(); i != e; ++i) { -          const SCEV *Op = Ops[i]; +        for (const SCEV *Op : Ops) {            const SCEV *Remainder = SE.getConstant(Ty, 0);            if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) {              // Op now has ElSize factored out. @@ -414,7 +425,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,            } else {              // The operand was not divisible, so add it to the list of operands              // we'll scan next iteration. -            NewOps.push_back(Ops[i]); +            NewOps.push_back(Op);            }          }          // If we made any changes, update Ops. @@ -483,7 +494,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,         Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));      assert(!isa<Instruction>(V) || -           SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint())); +           SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint()));      // Expand the operands for a plain byte offset.      Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); @@ -508,7 +519,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,            ScanLimit++;          if (IP->getOpcode() == Instruction::GetElementPtr &&              IP->getOperand(0) == V && IP->getOperand(1) == Idx) -          return IP; +          return &*IP;          if (IP == BlockBegin) break;        }      } @@ -517,13 +528,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,      BuilderType::InsertPointGuard Guard(Builder);      // Move the insertion point out of as many loops as we can. -    while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { +    while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {        if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;        BasicBlock *Preheader = L->getLoopPreheader();        if (!Preheader) break;        // Ok, move up a level. -      Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); +      Builder.SetInsertPoint(Preheader->getTerminator());      }      // Emit a GEP. @@ -537,16 +548,13 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,    BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();    // Move the insertion point out of as many loops as we can. -  while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { +  while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {      if (!L->isLoopInvariant(V)) break; -    bool AnyIndexNotLoopInvariant = false; -    for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(), -         E = GepIndices.end(); I != E; ++I) -      if (!L->isLoopInvariant(*I)) { -        AnyIndexNotLoopInvariant = true; -        break; -      } +    bool AnyIndexNotLoopInvariant = +        std::any_of(GepIndices.begin(), GepIndices.end(), +                    [L](Value *Op) { return !L->isLoopInvariant(Op); }); +      if (AnyIndexNotLoopInvariant)        break; @@ -554,7 +562,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,      if (!Preheader) break;      // Ok, move up a level. -    Builder.SetInsertPoint(Preheader, Preheader->getTerminator()); +    Builder.SetInsertPoint(Preheader->getTerminator());    }    // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, @@ -563,9 +571,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,    Value *Casted = V;    if (V->getType() != PTy)      Casted = InsertNoopCastOfTo(Casted, PTy); -  Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, -                                 GepIndices, -                                 "scevgep"); +  Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep");    Ops.push_back(SE.getUnknown(GEP));    rememberInstruction(GEP); @@ -593,8 +599,7 @@ static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,  /// expression, according to PickMostRelevantLoop.  const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {    // Test whether we've already computed the most relevant loop for this SCEV. -  std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair = -    RelevantLoops.insert(std::make_pair(S, nullptr)); +  auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr));    if (!Pair.second)      return Pair.first->second; @@ -603,7 +608,7 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {      return nullptr;    if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {      if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) -      return Pair.first->second = SE.LI->getLoopFor(I->getParent()); +      return Pair.first->second = SE.LI.getLoopFor(I->getParent());      // A non-instruction has no relevant loops.      return nullptr;    } @@ -611,9 +616,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {      const Loop *L = nullptr;      if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))        L = AR->getLoop(); -    for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end(); -         I != E; ++I) -      L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT); +    for (const SCEV *Op : N->operands()) +      L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT);      return RelevantLoops[N] = L;    }    if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { @@ -621,10 +625,8 @@ const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {      return RelevantLoops[C] = Result;    }    if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { -    const Loop *Result = -      PickMostRelevantLoop(getRelevantLoop(D->getLHS()), -                           getRelevantLoop(D->getRHS()), -                           *SE.DT); +    const Loop *Result = PickMostRelevantLoop( +        getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT);      return RelevantLoops[D] = Result;    }    llvm_unreachable("Unexpected SCEV type!"); @@ -679,13 +681,12 @@ Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {    // Sort by loop. Use a stable sort so that constants follow non-constants and    // pointer operands precede non-pointer operands. -  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); +  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));    // Emit instructions to add all the operands. Hoist as much as possible    // out of loops, and form meaningful getelementptrs where possible.    Value *Sum = nullptr; -  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator -       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) { +  for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) {      const Loop *CurLoop = I->first;      const SCEV *Op = I->second;      if (!Sum) { @@ -747,14 +748,13 @@ Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {      OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));    // Sort by loop. Use a stable sort so that constants follow non-constants. -  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT)); +  std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(SE.DT));    // Emit instructions to mul all the operands. Hoist as much as possible    // out of loops.    Value *Prod = nullptr; -  for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator -       I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ++I) { -    const SCEV *Op = I->second; +  for (const auto &I : OpsAndLoops) { +    const SCEV *Op = I.second;      if (!Prod) {        // This is the first operand. Just expand it.        Prod = expand(Op); @@ -788,7 +788,7 @@ Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {    Value *LHS = expandCodeFor(S->getLHS(), Ty);    if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { -    const APInt &RHS = SC->getValue()->getValue(); +    const APInt &RHS = SC->getAPInt();      if (RHS.isPowerOf2())        return InsertBinop(Instruction::LShr, LHS,                           ConstantInt::get(Ty, RHS.logBase2())); @@ -834,7 +834,7 @@ bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,      for (User::op_iterator OI = IncV->op_begin()+1,             OE = IncV->op_end(); OI != OE; ++OI)        if (Instruction *OInst = dyn_cast<Instruction>(OI)) -        if (!SE.DT->dominates(OInst, IVIncInsertPos)) +        if (!SE.DT.dominates(OInst, IVIncInsertPos))            return false;    }    // Advance to the next instruction. @@ -873,19 +873,18 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,    case Instruction::Add:    case Instruction::Sub: {      Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); -    if (!OInst || SE.DT->dominates(OInst, InsertPos)) +    if (!OInst || SE.DT.dominates(OInst, InsertPos))        return dyn_cast<Instruction>(IncV->getOperand(0));      return nullptr;    }    case Instruction::BitCast:      return dyn_cast<Instruction>(IncV->getOperand(0));    case Instruction::GetElementPtr: -    for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end(); -         I != E; ++I) { +    for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) {        if (isa<Constant>(*I))          continue;        if (Instruction *OInst = dyn_cast<Instruction>(*I)) { -        if (!SE.DT->dominates(OInst, InsertPos)) +        if (!SE.DT.dominates(OInst, InsertPos))            return nullptr;        }        if (allowScale) { @@ -912,13 +911,16 @@ Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,  /// it available to other uses in this loop. Recursively hoist any operands,  /// until we reach a value that dominates InsertPos.  bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { -  if (SE.DT->dominates(IncV, InsertPos)) +  if (SE.DT.dominates(IncV, InsertPos))        return true;    // InsertPos must itself dominate IncV so that IncV's new position satisfies    // its existing users. -  if (isa<PHINode>(InsertPos) -      || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent())) +  if (isa<PHINode>(InsertPos) || +      !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) +    return false; + +  if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos))      return false;    // Check that the chain of IV operands leading back to Phi can be hoisted. @@ -930,11 +932,10 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {      // IncV is safe to hoist.      IVIncs.push_back(IncV);      IncV = Oper; -    if (SE.DT->dominates(IncV, InsertPos)) +    if (SE.DT.dominates(IncV, InsertPos))        break;    } -  for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(), -         E = IVIncs.rend(); I != E; ++I) { +  for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) {      (*I)->moveBefore(InsertPos);    }    return true; @@ -1002,7 +1003,7 @@ static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,  }  /// \brief Check whether we can cheaply express the requested SCEV in terms of -/// the available PHI SCEV by truncation and/or invertion of the step. +/// the available PHI SCEV by truncation and/or inversion of the step.  static bool canBeCheaplyTransformed(ScalarEvolution &SE,                                      const SCEVAddRecExpr *Phi,                                      const SCEVAddRecExpr *Requested, @@ -1084,12 +1085,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,      // Only try partially matching scevs that need truncation and/or      // step-inversion if we know this loop is outside the current loop. -    bool TryNonMatchingSCEV = IVIncInsertLoop && -      SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); +    bool TryNonMatchingSCEV = +        IVIncInsertLoop && +        SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); -    for (BasicBlock::iterator I = L->getHeader()->begin(); -         PHINode *PN = dyn_cast<PHINode>(I); ++I) { -      if (!SE.isSCEVable(PN->getType())) +    for (auto &I : *L->getHeader()) { +      auto *PN = dyn_cast<PHINode>(&I); +      if (!PN || !SE.isSCEVable(PN->getType()))          continue;        const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN)); @@ -1142,7 +1144,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,        // Potentially, move the increment. We have made sure in        // isExpandedAddRecExprPHI or hoistIVInc that this is possible.        if (L == IVIncInsertLoop) -        hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); +        hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);        // Ok, the add recurrence looks usable.        // Remember this PHI, even in post-inc mode. @@ -1167,13 +1169,13 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,    PostIncLoops.clear();    // Expand code for the start value. -  Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, -                                L->getHeader()->begin()); +  Value *StartV = +      expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front());    // StartV must be hoisted into L's preheader to dominate the new phi.    assert(!isa<Instruction>(StartV) || -         SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(), -                                  L->getHeader())); +         SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(), +                                 L->getHeader()));    // Expand code for the step value. Do this before creating the PHI so that PHI    // reuse code doesn't see an incomplete PHI. @@ -1185,7 +1187,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,    if (useSubtract)      Step = SE.getNegativeSCEV(Step);    // Expand the step somewhere that dominates the loop header. -  Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); +  Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());    // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if    // we actually do emit an addition.  It does not apply if we emit a @@ -1249,9 +1251,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {    if (PostIncLoops.count(L)) {      PostIncLoopSet Loops;      Loops.insert(L); -    Normalized = -      cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, nullptr, -                                                  nullptr, Loops, SE, *SE.DT)); +    Normalized = cast<SCEVAddRecExpr>(TransformForPostIncUse( +        Normalize, S, nullptr, nullptr, Loops, SE, SE.DT));    }    // Strip off any non-loop-dominating component from the addrec start. @@ -1301,9 +1302,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {      // For an expansion to use the postinc form, the client must call      // expandCodeFor with an InsertPoint that is either outside the PostIncLoop      // or dominated by IVIncInsertPos. -    if (isa<Instruction>(Result) -        && !SE.DT->dominates(cast<Instruction>(Result), -                             Builder.GetInsertPoint())) { +    if (isa<Instruction>(Result) && +        !SE.DT.dominates(cast<Instruction>(Result), +                         &*Builder.GetInsertPoint())) {        // The induction variable's postinc expansion does not dominate this use.        // IVUsers tries to prevent this case, so it is rare. However, it can        // happen when an IVUser outside the loop is not dominated by the latch @@ -1321,7 +1322,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {        {          // Expand the step somewhere that dominates the loop header.          BuilderType::InsertPointGuard Guard(Builder); -        StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); +        StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());        }        Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);      } @@ -1395,13 +1396,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {      Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),                                         S->getNoWrapFlags(SCEV::FlagNW)));      BasicBlock::iterator NewInsertPt = -      std::next(BasicBlock::iterator(cast<Instruction>(V))); -    BuilderType::InsertPointGuard Guard(Builder); -    while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) || -           isa<LandingPadInst>(NewInsertPt)) -      ++NewInsertPt; +        findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock());      V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, -                      NewInsertPt); +                      &*NewInsertPt);      return V;    } @@ -1442,7 +1439,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {      BasicBlock *Header = L->getHeader();      pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);      CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", -                                  Header->begin()); +                                  &Header->front());      rememberInstruction(CanonicalIV);      SmallSet<BasicBlock *, 4> PredSeen; @@ -1587,7 +1584,8 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {  Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,                                     Instruction *IP) { -  Builder.SetInsertPoint(IP->getParent(), IP); +  assert(IP); +  Builder.SetInsertPoint(IP);    return expandCodeFor(SH, Ty);  } @@ -1605,8 +1603,8 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {  Value *SCEVExpander::expand(const SCEV *S) {    // Compute an insertion point for this SCEV object. Hoist the instructions    // as far out in the loop nest as possible. -  Instruction *InsertPt = Builder.GetInsertPoint(); -  for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ; +  Instruction *InsertPt = &*Builder.GetInsertPoint(); +  for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());;         L = L->getParentLoop())      if (SE.isLoopInvariant(S, L)) {        if (!L) break; @@ -1616,30 +1614,29 @@ Value *SCEVExpander::expand(const SCEV *S) {          // LSR sets the insertion point for AddRec start/step values to the          // block start to simplify value reuse, even though it's an invalid          // position. SCEVExpander must correct for this in all cases. -        InsertPt = L->getHeader()->getFirstInsertionPt(); +        InsertPt = &*L->getHeader()->getFirstInsertionPt();        }      } else {        // If the SCEV is computable at this level, insert it into the header        // after the PHIs (and after any other instructions that we've inserted        // there) so that it is guaranteed to dominate any user inside the loop.        if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) -        InsertPt = L->getHeader()->getFirstInsertionPt(); +        InsertPt = &*L->getHeader()->getFirstInsertionPt();        while (InsertPt != Builder.GetInsertPoint()               && (isInsertedInstruction(InsertPt)                   || isa<DbgInfoIntrinsic>(InsertPt))) { -        InsertPt = std::next(BasicBlock::iterator(InsertPt)); +        InsertPt = &*std::next(InsertPt->getIterator());        }        break;      }    // Check to see if we already expanded this here. -  std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator -    I = InsertedExpressions.find(std::make_pair(S, InsertPt)); +  auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));    if (I != InsertedExpressions.end())      return I->second;    BuilderType::InsertPointGuard Guard(Builder); -  Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); +  Builder.SetInsertPoint(InsertPt);    // Expand the expression into instructions.    Value *V = visit(S); @@ -1677,8 +1674,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,    // Emit code for it.    BuilderType::InsertPointGuard Guard(Builder); -  PHINode *V = cast<PHINode>(expandCodeFor(H, nullptr, -                                           L->getHeader()->begin())); +  PHINode *V = +      cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));    return V;  } @@ -1694,10 +1691,13 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,                                             const TargetTransformInfo *TTI) {    // Find integer phis in order of increasing width.    SmallVector<PHINode*, 8> Phis; -  for (BasicBlock::iterator I = L->getHeader()->begin(); -       PHINode *Phi = dyn_cast<PHINode>(I); ++I) { -    Phis.push_back(Phi); +  for (auto &I : *L->getHeader()) { +    if (auto *PN = dyn_cast<PHINode>(&I)) +      Phis.push_back(PN); +    else +      break;    } +    if (TTI)      std::sort(Phis.begin(), Phis.end(), [](Value *LHS, Value *RHS) {        // Put pointers at the back and make sure pointer < pointer = false. @@ -1711,13 +1711,23 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,    DenseMap<const SCEV *, PHINode *> ExprToIVMap;    // Process phis from wide to narrow. Map wide phis to their truncation    // so narrow phis can reuse them. -  for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(), -         PEnd = Phis.end(); PIter != PEnd; ++PIter) { -    PHINode *Phi = *PIter; +  for (PHINode *Phi : Phis) { +    auto SimplifyPHINode = [&](PHINode *PN) -> Value * { +      if (Value *V = SimplifyInstruction(PN, DL, &SE.TLI, &SE.DT, &SE.AC)) +        return V; +      if (!SE.isSCEVable(PN->getType())) +        return nullptr; +      auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN)); +      if (!Const) +        return nullptr; +      return Const->getValue(); +    };      // Fold constant phis. They may be congruent to other constant phis and      // would confuse the logic below that expects proper IVs. -    if (Value *V = SimplifyInstruction(Phi, DL, SE.TLI, SE.DT, SE.AC)) { +    if (Value *V = SimplifyPHINode(Phi)) { +      if (V->getType() != Phi->getType()) +        continue;        Phi->replaceAllUsesWith(V);        DeadInsts.emplace_back(Phi);        ++NumElim; @@ -1784,7 +1794,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,          if (OrigInc->getType() != IsomorphicInc->getType()) {            Instruction *IP = nullptr;            if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) -            IP = PN->getParent()->getFirstInsertionPt(); +            IP = &*PN->getParent()->getFirstInsertionPt();            else              IP = OrigInc->getNextNode(); @@ -1802,7 +1812,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,      ++NumElim;      Value *NewIV = OrigPhiRef;      if (OrigPhiRef->getType() != Phi->getType()) { -      IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt()); +      IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt());        Builder.SetCurrentDebugLocation(Phi->getDebugLoc());        NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);      } @@ -1812,8 +1822,46 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,    return NumElim;  } +Value *SCEVExpander::findExistingExpansion(const SCEV *S, +                                           const Instruction *At, Loop *L) { +  using namespace llvm::PatternMatch; + +  SmallVector<BasicBlock *, 4> ExitingBlocks; +  L->getExitingBlocks(ExitingBlocks); + +  // Look for suitable value in simple conditions at the loop exits. +  for (BasicBlock *BB : ExitingBlocks) { +    ICmpInst::Predicate Pred; +    Instruction *LHS, *RHS; +    BasicBlock *TrueBB, *FalseBB; + +    if (!match(BB->getTerminator(), +               m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), +                    TrueBB, FalseBB))) +      continue; + +    if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) +      return LHS; + +    if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) +      return RHS; +  } + +  // There is potential to make this significantly smarter, but this simple +  // heuristic already gets some interesting cases. + +  // Can not find suitable value. +  return nullptr; +} +  bool SCEVExpander::isHighCostExpansionHelper( -    const SCEV *S, Loop *L, SmallPtrSetImpl<const SCEV *> &Processed) { +    const SCEV *S, Loop *L, const Instruction *At, +    SmallPtrSetImpl<const SCEV *> &Processed) { + +  // If we can find an existing value for this scev avaliable at the point "At" +  // then consider the expression cheap. +  if (At && findExistingExpansion(S, At, L) != nullptr) +    return false;    // Zero/One operand expressions    switch (S->getSCEVType()) { @@ -1821,14 +1869,14 @@ bool SCEVExpander::isHighCostExpansionHelper(    case scConstant:      return false;    case scTruncate: -    return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), L, -                                     Processed); +    return isHighCostExpansionHelper(cast<SCEVTruncateExpr>(S)->getOperand(), +                                     L, At, Processed);    case scZeroExtend:      return isHighCostExpansionHelper(cast<SCEVZeroExtendExpr>(S)->getOperand(), -                                     L, Processed); +                                     L, At, Processed);    case scSignExtend:      return isHighCostExpansionHelper(cast<SCEVSignExtendExpr>(S)->getOperand(), -                                     L, Processed); +                                     L, At, Processed);    }    if (!Processed.insert(S).second) @@ -1836,10 +1884,10 @@ bool SCEVExpander::isHighCostExpansionHelper(    if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) {      // If the divisor is a power of two and the SCEV type fits in a native -    // integer, consider the divison cheap irrespective of whether it occurs in +    // integer, consider the division cheap irrespective of whether it occurs in      // the user code since it can be lowered into a right shift.      if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) -      if (SC->getValue()->getValue().isPowerOf2()) { +      if (SC->getAPInt().isPowerOf2()) {          const DataLayout &DL =              L->getHeader()->getParent()->getParent()->getDataLayout();          unsigned Width = cast<IntegerType>(UDivExpr->getType())->getBitWidth(); @@ -1855,22 +1903,14 @@ bool SCEVExpander::isHighCostExpansionHelper(      if (!ExitingBB)        return true; -    BranchInst *ExitingBI = dyn_cast<BranchInst>(ExitingBB->getTerminator()); -    if (!ExitingBI || !ExitingBI->isConditional()) +    // At the beginning of this function we already tried to find existing value +    // for plain 'S'. Now try to lookup 'S + 1' since it is common pattern +    // involving division. This is just a simple search heuristic. +    if (!At) +      At = &ExitingBB->back(); +    if (!findExistingExpansion( +            SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))        return true; - -    ICmpInst *OrigCond = dyn_cast<ICmpInst>(ExitingBI->getCondition()); -    if (!OrigCond) -      return true; - -    const SCEV *RHS = SE.getSCEV(OrigCond->getOperand(1)); -    RHS = SE.getMinusSCEV(RHS, SE.getConstant(RHS->getType(), 1)); -    if (RHS != S) { -      const SCEV *LHS = SE.getSCEV(OrigCond->getOperand(0)); -      LHS = SE.getMinusSCEV(LHS, SE.getConstant(LHS->getType(), 1)); -      if (LHS != S) -        return true; -    }    }    // HowManyLessThans uses a Max expression whenever the loop is not guarded by @@ -1882,11 +1922,9 @@ bool SCEVExpander::isHighCostExpansionHelper(    // BackedgeTakenCount. They may already exist in program code, and if not,    // they are not too expensive rematerialize.    if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) { -    for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); -         I != E; ++I) { -      if (isHighCostExpansionHelper(*I, L, Processed)) +    for (auto *Op : NAry->operands()) +      if (isHighCostExpansionHelper(Op, L, At, Processed))          return true; -    }    }    // If we haven't recognized an expensive SCEV pattern, assume it's an @@ -1894,6 +1932,43 @@ bool SCEVExpander::isHighCostExpansionHelper(    return false;  } +Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, +                                            Instruction *IP) { +  assert(IP); +  switch (Pred->getKind()) { +  case SCEVPredicate::P_Union: +    return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP); +  case SCEVPredicate::P_Equal: +    return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP); +  } +  llvm_unreachable("Unknown SCEV predicate type"); +} + +Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, +                                          Instruction *IP) { +  Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP); +  Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP); + +  Builder.SetInsertPoint(IP); +  auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); +  return I; +} + +Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, +                                          Instruction *IP) { +  auto *BoolType = IntegerType::get(IP->getContext(), 1); +  Value *Check = ConstantInt::getNullValue(BoolType); + +  // Loop over all checks in this set. +  for (auto Pred : Union->getPredicates()) { +    auto *NextCheck = expandCodeForPredicate(Pred, IP); +    Builder.SetInsertPoint(IP); +    Check = Builder.CreateOr(Check, NextCheck); +  } + +  return Check; +} +  namespace {  // Search for a SCEV subexpression that is not safe to expand.  Any expression  // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp index b238fe43cc60..b7fd5d506175 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -109,7 +109,7 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) {      SmallVector<const SCEV *, 8> Operands;      const Loop *L = AR->getLoop();      // The addrec conceptually uses its operands at loop entry. -    Instruction *LUser = L->getHeader()->begin(); +    Instruction *LUser = &L->getHeader()->front();      // Transform each operand.      for (SCEVNAryExpr::op_iterator I = AR->op_begin(), E = AR->op_end();           I != E; ++I) { diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp index a5fca3e79b37..029997adab9e 100644 --- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp +++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp @@ -32,22 +32,23 @@  //  //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ScopedNoAliasAA.h"  #include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/TargetLibraryInfo.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/LLVMContext.h"  #include "llvm/IR/Metadata.h"  #include "llvm/IR/Module.h"  #include "llvm/Pass.h"  #include "llvm/Support/CommandLine.h" +  using namespace llvm;  // A handy option for disabling scoped no-alias functionality. The same effect  // can also be achieved by stripping the associated metadata tags from IR, but  // this option is sometimes more convenient. -static cl::opt<bool> -EnableScopedNoAlias("enable-scoped-noalias", cl::init(true)); +static cl::opt<bool> EnableScopedNoAlias("enable-scoped-noalias", +                                         cl::init(true));  namespace {  /// AliasScopeNode - This is a simple wrapper around an MDNode which provides @@ -57,7 +58,7 @@ class AliasScopeNode {    const MDNode *Node;  public: -  AliasScopeNode() : Node(0) {} +  AliasScopeNode() : Node(nullptr) {}    explicit AliasScopeNode(const MDNode *N) : Node(N) {}    /// getNode - Get the MDNode for this AliasScopeNode. @@ -70,79 +71,74 @@ public:      return dyn_cast_or_null<MDNode>(Node->getOperand(1));    }  }; +} // end of anonymous namespace -/// ScopedNoAliasAA - This is a simple alias analysis -/// implementation that uses scoped-noalias metadata to answer queries. -class ScopedNoAliasAA : public ImmutablePass, public AliasAnalysis { -public: -  static char ID; // Class identification, replacement for typeinfo -  ScopedNoAliasAA() : ImmutablePass(ID) { -    initializeScopedNoAliasAAPass(*PassRegistry::getPassRegistry()); -  } +AliasResult ScopedNoAliasAAResult::alias(const MemoryLocation &LocA, +                                         const MemoryLocation &LocB) { +  if (!EnableScopedNoAlias) +    return AAResultBase::alias(LocA, LocB); -  bool doInitialization(Module &M) override; +  // Get the attached MDNodes. +  const MDNode *AScopes = LocA.AATags.Scope, *BScopes = LocB.AATags.Scope; -  /// getAdjustedAnalysisPointer - This method is used when a pass implements -  /// an analysis interface through multiple inheritance.  If needed, it -  /// should override this to adjust the this pointer as needed for the -  /// specified pass info. -  void *getAdjustedAnalysisPointer(const void *PI) override { -    if (PI == &AliasAnalysis::ID) -      return (AliasAnalysis*)this; -    return this; -  } +  const MDNode *ANoAlias = LocA.AATags.NoAlias, *BNoAlias = LocB.AATags.NoAlias; -protected: -  bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const; -  void collectMDInDomain(const MDNode *List, const MDNode *Domain, -                         SmallPtrSetImpl<const MDNode *> &Nodes) const; - -private: -  void getAnalysisUsage(AnalysisUsage &AU) const override; -  AliasResult alias(const MemoryLocation &LocA, -                    const MemoryLocation &LocB) override; -  bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) override; -  ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; -  ModRefBehavior getModRefBehavior(const Function *F) override; -  ModRefResult getModRefInfo(ImmutableCallSite CS, -                             const MemoryLocation &Loc) override; -  ModRefResult getModRefInfo(ImmutableCallSite CS1, -                             ImmutableCallSite CS2) override; -}; -}  // End of anonymous namespace +  if (!mayAliasInScopes(AScopes, BNoAlias)) +    return NoAlias; -// Register this pass... -char ScopedNoAliasAA::ID = 0; -INITIALIZE_AG_PASS(ScopedNoAliasAA, AliasAnalysis, "scoped-noalias", -                   "Scoped NoAlias Alias Analysis", false, true, false) +  if (!mayAliasInScopes(BScopes, ANoAlias)) +    return NoAlias; -ImmutablePass *llvm::createScopedNoAliasAAPass() { -  return new ScopedNoAliasAA(); +  // If they may alias, chain to the next AliasAnalysis. +  return AAResultBase::alias(LocA, LocB);  } -bool ScopedNoAliasAA::doInitialization(Module &M) { -  InitializeAliasAnalysis(this, &M.getDataLayout()); -  return true; +ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS, +                                                const MemoryLocation &Loc) { +  if (!EnableScopedNoAlias) +    return AAResultBase::getModRefInfo(CS, Loc); + +  if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata( +                                              LLVMContext::MD_noalias))) +    return MRI_NoModRef; + +  if (!mayAliasInScopes( +          CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), +          Loc.AATags.NoAlias)) +    return MRI_NoModRef; + +  return AAResultBase::getModRefInfo(CS, Loc);  } -void -ScopedNoAliasAA::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.setPreservesAll(); -  AliasAnalysis::getAnalysisUsage(AU); +ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1, +                                                ImmutableCallSite CS2) { +  if (!EnableScopedNoAlias) +    return AAResultBase::getModRefInfo(CS1, CS2); + +  if (!mayAliasInScopes( +          CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), +          CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias))) +    return MRI_NoModRef; + +  if (!mayAliasInScopes( +          CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), +          CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias))) +    return MRI_NoModRef; + +  return AAResultBase::getModRefInfo(CS1, CS2);  } -void -ScopedNoAliasAA::collectMDInDomain(const MDNode *List, const MDNode *Domain, -                   SmallPtrSetImpl<const MDNode *> &Nodes) const { +void ScopedNoAliasAAResult::collectMDInDomain( +    const MDNode *List, const MDNode *Domain, +    SmallPtrSetImpl<const MDNode *> &Nodes) const {    for (unsigned i = 0, ie = List->getNumOperands(); i != ie; ++i)      if (const MDNode *MD = dyn_cast<MDNode>(List->getOperand(i)))        if (AliasScopeNode(MD).getDomain() == Domain)          Nodes.insert(MD);  } -bool -ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes, -                                  const MDNode *NoAlias) const { +bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes, +                                             const MDNode *NoAlias) const {    if (!Scopes || !NoAlias)      return true; @@ -177,76 +173,40 @@ ScopedNoAliasAA::mayAliasInScopes(const MDNode *Scopes,    return true;  } -AliasResult ScopedNoAliasAA::alias(const MemoryLocation &LocA, -                                   const MemoryLocation &LocB) { -  if (!EnableScopedNoAlias) -    return AliasAnalysis::alias(LocA, LocB); - -  // Get the attached MDNodes. -  const MDNode *AScopes = LocA.AATags.Scope, -               *BScopes = LocB.AATags.Scope; +ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F, +                                           AnalysisManager<Function> *AM) { +  return ScopedNoAliasAAResult(AM->getResult<TargetLibraryAnalysis>(F)); +} -  const MDNode *ANoAlias = LocA.AATags.NoAlias, -               *BNoAlias = LocB.AATags.NoAlias; +char ScopedNoAliasAA::PassID; -  if (!mayAliasInScopes(AScopes, BNoAlias)) -    return NoAlias; - -  if (!mayAliasInScopes(BScopes, ANoAlias)) -    return NoAlias; +char ScopedNoAliasAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(ScopedNoAliasAAWrapperPass, "scoped-noalias", +                      "Scoped NoAlias Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(ScopedNoAliasAAWrapperPass, "scoped-noalias", +                    "Scoped NoAlias Alias Analysis", false, true) -  // If they may alias, chain to the next AliasAnalysis. -  return AliasAnalysis::alias(LocA, LocB); +ImmutablePass *llvm::createScopedNoAliasAAWrapperPass() { +  return new ScopedNoAliasAAWrapperPass();  } -bool ScopedNoAliasAA::pointsToConstantMemory(const MemoryLocation &Loc, -                                             bool OrLocal) { -  return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +ScopedNoAliasAAWrapperPass::ScopedNoAliasAAWrapperPass() : ImmutablePass(ID) { +  initializeScopedNoAliasAAWrapperPassPass(*PassRegistry::getPassRegistry());  } -AliasAnalysis::ModRefBehavior -ScopedNoAliasAA::getModRefBehavior(ImmutableCallSite CS) { -  return AliasAnalysis::getModRefBehavior(CS); +bool ScopedNoAliasAAWrapperPass::doInitialization(Module &M) { +  Result.reset(new ScopedNoAliasAAResult( +      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); +  return false;  } -AliasAnalysis::ModRefBehavior -ScopedNoAliasAA::getModRefBehavior(const Function *F) { -  return AliasAnalysis::getModRefBehavior(F); +bool ScopedNoAliasAAWrapperPass::doFinalization(Module &M) { +  Result.reset(); +  return false;  } -AliasAnalysis::ModRefResult -ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS, -                               const MemoryLocation &Loc) { -  if (!EnableScopedNoAlias) -    return AliasAnalysis::getModRefInfo(CS, Loc); - -  if (!mayAliasInScopes(Loc.AATags.Scope, CS.getInstruction()->getMetadata( -                                              LLVMContext::MD_noalias))) -    return NoModRef; - -  if (!mayAliasInScopes( -          CS.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), -          Loc.AATags.NoAlias)) -    return NoModRef; - -  return AliasAnalysis::getModRefInfo(CS, Loc); -} - -AliasAnalysis::ModRefResult -ScopedNoAliasAA::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { -  if (!EnableScopedNoAlias) -    return AliasAnalysis::getModRefInfo(CS1, CS2); - -  if (!mayAliasInScopes( -          CS1.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), -          CS2.getInstruction()->getMetadata(LLVMContext::MD_noalias))) -    return NoModRef; - -  if (!mayAliasInScopes( -          CS2.getInstruction()->getMetadata(LLVMContext::MD_alias_scope), -          CS1.getInstruction()->getMetadata(LLVMContext::MD_noalias))) -    return NoModRef; - -  return AliasAnalysis::getModRefInfo(CS1, CS2); +void ScopedNoAliasAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequired<TargetLibraryInfoWrapperPass>();  } - diff --git a/contrib/llvm/lib/Analysis/SparsePropagation.cpp b/contrib/llvm/lib/Analysis/SparsePropagation.cpp index edd82f5fe296..f5a927b80525 100644 --- a/contrib/llvm/lib/Analysis/SparsePropagation.cpp +++ b/contrib/llvm/lib/Analysis/SparsePropagation.cpp @@ -328,17 +328,17 @@ void SparseSolver::Solve(Function &F) {  void SparseSolver::Print(Function &F, raw_ostream &OS) const {    OS << "\nFUNCTION: " << F.getName() << "\n"; -  for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { -    if (!BBExecutable.count(BB)) +  for (auto &BB : F) { +    if (!BBExecutable.count(&BB))        OS << "INFEASIBLE: ";      OS << "\t"; -    if (BB->hasName()) -      OS << BB->getName() << ":\n"; +    if (BB.hasName()) +      OS << BB.getName() << ":\n";      else        OS << "; anon bb\n"; -    for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { -      LatticeFunc->PrintValue(getLatticeState(I), OS); -      OS << *I << "\n"; +    for (auto &I : BB) { +      LatticeFunc->PrintValue(getLatticeState(&I), OS); +      OS << I << "\n";      }      OS << "\n"; diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp index 635c50ca6e53..e00f4aed07fc 100644 --- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp @@ -61,10 +61,19 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,    }  #endif // !NDEBUG +  if (T.getArch() == Triple::r600 || +      T.getArch() == Triple::amdgcn) { +    TLI.setUnavailable(LibFunc::ldexp); +    TLI.setUnavailable(LibFunc::ldexpf); +    TLI.setUnavailable(LibFunc::ldexpl); +  } +    // There are no library implementations of mempcy and memset for AMD gpus and    // these can be difficult to lower in the backend.    if (T.getArch() == Triple::r600 || -      T.getArch() == Triple::amdgcn) { +      T.getArch() == Triple::amdgcn || +      T.getArch() == Triple::wasm32 || +      T.getArch() == Triple::wasm64) {      TLI.setUnavailable(LibFunc::memcpy);      TLI.setUnavailable(LibFunc::memset);      TLI.setUnavailable(LibFunc::memset_pattern16); @@ -72,13 +81,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,    }    // memset_pattern16 is only available on iOS 3.0 and Mac OS X 10.5 and later. +  // All versions of watchOS support it.    if (T.isMacOSX()) {      if (T.isMacOSXVersionLT(10, 5))        TLI.setUnavailable(LibFunc::memset_pattern16);    } else if (T.isiOS()) {      if (T.isOSVersionLT(3, 0))        TLI.setUnavailable(LibFunc::memset_pattern16); -  } else { +  } else if (!T.isWatchOS()) {      TLI.setUnavailable(LibFunc::memset_pattern16);    } @@ -286,8 +296,13 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,      }      break;    case Triple::IOS: +  case Triple::TvOS: +  case Triple::WatchOS:      TLI.setUnavailable(LibFunc::exp10l); -    if (T.isOSVersionLT(7, 0)) { +    if (!T.isWatchOS() && (T.isOSVersionLT(7, 0) || +                           (T.isOSVersionLT(9, 0) && +                            (T.getArch() == Triple::x86 || +                             T.getArch() == Triple::x86_64)))) {        TLI.setUnavailable(LibFunc::exp10);        TLI.setUnavailable(LibFunc::exp10f);      } else { @@ -311,12 +326,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,    // ffsl is available on at least Darwin, Mac OS X, iOS, FreeBSD, and    // Linux (GLIBC):    // http://developer.apple.com/library/mac/#documentation/Darwin/Reference/ManPages/man3/ffsl.3.html -  // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsl.c +  // http://svn.freebsd.org/base/head/lib/libc/string/ffsl.c    // http://www.gnu.org/software/gnulib/manual/html_node/ffsl.html    switch (T.getOS()) {    case Triple::Darwin:    case Triple::MacOSX:    case Triple::IOS: +  case Triple::TvOS: +  case Triple::WatchOS:    case Triple::FreeBSD:    case Triple::Linux:      break; @@ -325,9 +342,14 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,    }    // ffsll is available on at least FreeBSD and Linux (GLIBC): -  // http://svn.freebsd.org/base/user/eri/pf45/head/lib/libc/string/ffsll.c +  // http://svn.freebsd.org/base/head/lib/libc/string/ffsll.c    // http://www.gnu.org/software/gnulib/manual/html_node/ffsll.html    switch (T.getOS()) { +  case Triple::Darwin: +  case Triple::MacOSX: +  case Triple::IOS: +  case Triple::TvOS: +  case Triple::WatchOS:    case Triple::FreeBSD:    case Triple::Linux:      break; @@ -335,6 +357,16 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,      TLI.setUnavailable(LibFunc::ffsll);    } +  // The following functions are available on at least FreeBSD: +  // http://svn.freebsd.org/base/head/lib/libc/string/fls.c +  // http://svn.freebsd.org/base/head/lib/libc/string/flsl.c +  // http://svn.freebsd.org/base/head/lib/libc/string/flsll.c +  if (!T.isOSFreeBSD()) { +    TLI.setUnavailable(LibFunc::fls); +    TLI.setUnavailable(LibFunc::flsl); +    TLI.setUnavailable(LibFunc::flsll); +  } +    // The following functions are available on at least Linux:    if (!T.isOSLinux()) {      TLI.setUnavailable(LibFunc::dunder_strdup); diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index 7d1c3fbef68a..9c1d3fd4f582 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -46,30 +46,37 @@ TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {    return *this;  } -unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, -                                               Type *OpTy) const { -  return TTIImpl->getOperationCost(Opcode, Ty, OpTy); +int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, +                                          Type *OpTy) const { +  int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, -                                          int NumArgs) const { -  return TTIImpl->getCallCost(FTy, NumArgs); +int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs) const { +  int Cost = TTIImpl->getCallCost(FTy, NumArgs); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned -TargetTransformInfo::getCallCost(const Function *F, -                                 ArrayRef<const Value *> Arguments) const { -  return TTIImpl->getCallCost(F, Arguments); +int TargetTransformInfo::getCallCost(const Function *F, +                                     ArrayRef<const Value *> Arguments) const { +  int Cost = TTIImpl->getCallCost(F, Arguments); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned -TargetTransformInfo::getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, -                                      ArrayRef<const Value *> Arguments) const { -  return TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); +int TargetTransformInfo::getIntrinsicCost( +    Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments) const { +  int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getUserCost(const User *U) const { -  return TTIImpl->getUserCost(U); +int TargetTransformInfo::getUserCost(const User *U) const { +  int Cost = TTIImpl->getUserCost(U); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  }  bool TargetTransformInfo::hasBranchDivergence() const { @@ -106,14 +113,20 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,                                          Scale, AddrSpace);  } -bool TargetTransformInfo::isLegalMaskedStore(Type *DataType, -                                             int Consecutive) const { -  return TTIImpl->isLegalMaskedStore(DataType, Consecutive); +bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const { +  return TTIImpl->isLegalMaskedStore(DataType); +} + +bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const { +  return TTIImpl->isLegalMaskedLoad(DataType);  } -bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType, -                                            int Consecutive) const { -  return TTIImpl->isLegalMaskedLoad(DataType, Consecutive); +bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const { +  return TTIImpl->isLegalMaskedGather(DataType); +} + +bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const { +  return TTIImpl->isLegalMaskedGather(DataType);  }  int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, @@ -121,8 +134,10 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,                                                bool HasBaseReg,                                                int64_t Scale,                                                unsigned AddrSpace) const { -  return TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, -                                       Scale, AddrSpace); +  int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, +                                           Scale, AddrSpace); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  }  bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { @@ -153,6 +168,10 @@ bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) c    return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);  } +bool TargetTransformInfo::enableInterleavedAccessVectorization() const { +  return TTIImpl->enableInterleavedAccessVectorization(); +} +  TargetTransformInfo::PopcntSupportKind  TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {    return TTIImpl->getPopcntSupport(IntTyWidthInBit); @@ -162,22 +181,30 @@ bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {    return TTIImpl->haveFastSqrt(Ty);  } -unsigned TargetTransformInfo::getFPOpCost(Type *Ty) const { -  return TTIImpl->getFPOpCost(Ty); +int TargetTransformInfo::getFPOpCost(Type *Ty) const { +  int Cost = TTIImpl->getFPOpCost(Ty); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { -  return TTIImpl->getIntImmCost(Imm, Ty); +int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { +  int Cost = TTIImpl->getIntImmCost(Imm, Ty); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, -                                            const APInt &Imm, Type *Ty) const { -  return TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); +int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx, +                                       const APInt &Imm, Type *Ty) const { +  int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, -                                            const APInt &Imm, Type *Ty) const { -  return TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); +int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx, +                                       const APInt &Imm, Type *Ty) const { +  int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  }  unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { @@ -192,81 +219,122 @@ unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {    return TTIImpl->getMaxInterleaveFactor(VF);  } -unsigned TargetTransformInfo::getArithmeticInstrCost( +int TargetTransformInfo::getArithmeticInstrCost(      unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,      OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,      OperandValueProperties Opd2PropInfo) const { -  return TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, -                                         Opd1PropInfo, Opd2PropInfo); +  int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info, +                                             Opd1PropInfo, Opd2PropInfo); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, -                                             int Index, Type *SubTp) const { -  return TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); +int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index, +                                        Type *SubTp) const { +  int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, -                                               Type *Src) const { -  return TTIImpl->getCastInstrCost(Opcode, Dst, Src); +int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, +                                          Type *Src) const { +  int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { -  return TTIImpl->getCFInstrCost(Opcode); +int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { +  int Cost = TTIImpl->getCFInstrCost(Opcode); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, -                                                 Type *CondTy) const { -  return TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); +int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, +                                            Type *CondTy) const { +  int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, -                                                 unsigned Index) const { -  return TTIImpl->getVectorInstrCost(Opcode, Val, Index); +int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, +                                            unsigned Index) const { +  int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, -                                              unsigned Alignment, -                                              unsigned AddressSpace) const { -  return TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, +                                         unsigned Alignment, +                                         unsigned AddressSpace) const { +  int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned -TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, -                                           unsigned Alignment, -                                           unsigned AddressSpace) const { -  return TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, +                                               unsigned Alignment, +                                               unsigned AddressSpace) const { +  int Cost = +      TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost; +} + +int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy, +                                                Value *Ptr, bool VariableMask, +                                                unsigned Alignment) const { +  int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, +                                             Alignment); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getInterleavedMemoryOpCost( +int TargetTransformInfo::getInterleavedMemoryOpCost(      unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,      unsigned Alignment, unsigned AddressSpace) const { -  return TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, -                                             Alignment, AddressSpace); +  int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, +                                                 Alignment, AddressSpace); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned -TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, -                                           ArrayRef<Type *> Tys) const { -  return TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); +int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, +                                               ArrayRef<Type *> Tys) const { +  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, -                                               ArrayRef<Type *> Tys) const { -  return TTIImpl->getCallInstrCost(F, RetTy, Tys); +int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, +                                               ArrayRef<Value *> Args) const { +  int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost; +} + +int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy, +                                          ArrayRef<Type *> Tys) const { +  int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  }  unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {    return TTIImpl->getNumberOfParts(Tp);  } -unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp, -                                                        bool IsComplex) const { -  return TTIImpl->getAddressComputationCost(Tp, IsComplex); +int TargetTransformInfo::getAddressComputationCost(Type *Tp, +                                                   bool IsComplex) const { +  int Cost = TTIImpl->getAddressComputationCost(Tp, IsComplex); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  } -unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, -                                               bool IsPairwiseForm) const { -  return TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm); +int TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, +                                          bool IsPairwiseForm) const { +  int Cost = TTIImpl->getReductionCost(Opcode, Ty, IsPairwiseForm); +  assert(Cost >= 0 && "TTI should not produce negative costs!"); +  return Cost;  }  unsigned @@ -284,9 +352,9 @@ Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(    return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);  } -bool TargetTransformInfo::hasCompatibleFunctionAttributes( -    const Function *Caller, const Function *Callee) const { -  return TTIImpl->hasCompatibleFunctionAttributes(Caller, Callee); +bool TargetTransformInfo::areInlineCompatible(const Function *Caller, +                                              const Function *Callee) const { +  return TTIImpl->areInlineCompatible(Caller, Callee);  }  TargetTransformInfo::Concept::~Concept() {} @@ -294,16 +362,16 @@ TargetTransformInfo::Concept::~Concept() {}  TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}  TargetIRAnalysis::TargetIRAnalysis( -    std::function<Result(Function &)> TTICallback) +    std::function<Result(const Function &)> TTICallback)      : TTICallback(TTICallback) {} -TargetIRAnalysis::Result TargetIRAnalysis::run(Function &F) { +TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F) {    return TTICallback(F);  }  char TargetIRAnalysis::PassID; -TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(Function &F) { +TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {    return Result(F.getParent()->getDataLayout());  } @@ -327,7 +395,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(        *PassRegistry::getPassRegistry());  } -TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(Function &F) { +TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {    TTI = TIRA.run(F);    return *TTI;  } diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index 4e9c6f678ebd..805f3efb0814 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -121,15 +121,13 @@  //  //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/ADT/SetVector.h"  #include "llvm/IR/Constants.h"  #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Metadata.h"  #include "llvm/IR/Module.h" -#include "llvm/Pass.h"  #include "llvm/Support/CommandLine.h" -#include "llvm/ADT/SetVector.h"  using namespace llvm;  // A handy option for disabling TBAA functionality. The same effect can also be @@ -138,199 +136,138 @@ using namespace llvm;  static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));  namespace { -  /// TBAANode - This is a simple wrapper around an MDNode which provides a -  /// higher-level interface by hiding the details of how alias analysis -  /// information is encoded in its operands. -  class TBAANode { -    const MDNode *Node; - -  public: -    TBAANode() : Node(nullptr) {} -    explicit TBAANode(const MDNode *N) : Node(N) {} - -    /// getNode - Get the MDNode for this TBAANode. -    const MDNode *getNode() const { return Node; } - -    /// getParent - Get this TBAANode's Alias tree parent. -    TBAANode getParent() const { -      if (Node->getNumOperands() < 2) -        return TBAANode(); -      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); -      if (!P) -        return TBAANode(); -      // Ok, this node has a valid parent. Return it. -      return TBAANode(P); -    } - -    /// TypeIsImmutable - Test if this TBAANode represents a type for objects -    /// which are not modified (by any means) in the context where this -    /// AliasAnalysis is relevant. -    bool TypeIsImmutable() const { -      if (Node->getNumOperands() < 3) -        return false; -      ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); -      if (!CI) -        return false; -      return CI->getValue()[0]; -    } -  }; - -  /// This is a simple wrapper around an MDNode which provides a -  /// higher-level interface by hiding the details of how alias analysis -  /// information is encoded in its operands. -  class TBAAStructTagNode { -    /// This node should be created with createTBAAStructTagNode. -    const MDNode *Node; +/// TBAANode - This is a simple wrapper around an MDNode which provides a +/// higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class TBAANode { +  const MDNode *Node; + +public: +  TBAANode() : Node(nullptr) {} +  explicit TBAANode(const MDNode *N) : Node(N) {} + +  /// getNode - Get the MDNode for this TBAANode. +  const MDNode *getNode() const { return Node; } + +  /// getParent - Get this TBAANode's Alias tree parent. +  TBAANode getParent() const { +    if (Node->getNumOperands() < 2) +      return TBAANode(); +    MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); +    if (!P) +      return TBAANode(); +    // Ok, this node has a valid parent. Return it. +    return TBAANode(P); +  } -  public: -    explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} +  /// TypeIsImmutable - Test if this TBAANode represents a type for objects +  /// which are not modified (by any means) in the context where this +  /// AliasAnalysis is relevant. +  bool TypeIsImmutable() const { +    if (Node->getNumOperands() < 3) +      return false; +    ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2)); +    if (!CI) +      return false; +    return CI->getValue()[0]; +  } +}; -    /// Get the MDNode for this TBAAStructTagNode. -    const MDNode *getNode() const { return Node; } +/// This is a simple wrapper around an MDNode which provides a +/// higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class TBAAStructTagNode { +  /// This node should be created with createTBAAStructTagNode. +  const MDNode *Node; -    const MDNode *getBaseType() const { -      return dyn_cast_or_null<MDNode>(Node->getOperand(0)); -    } -    const MDNode *getAccessType() const { -      return dyn_cast_or_null<MDNode>(Node->getOperand(1)); -    } -    uint64_t getOffset() const { -      return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); -    } -    /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for -    /// objects which are not modified (by any means) in the context where this -    /// AliasAnalysis is relevant. -    bool TypeIsImmutable() const { -      if (Node->getNumOperands() < 4) -        return false; -      ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3)); -      if (!CI) -        return false; -      return CI->getValue()[0]; -    } -  }; - -  /// This is a simple wrapper around an MDNode which provides a -  /// higher-level interface by hiding the details of how alias analysis -  /// information is encoded in its operands. -  class TBAAStructTypeNode { -    /// This node should be created with createTBAAStructTypeNode. -    const MDNode *Node; - -  public: -    TBAAStructTypeNode() : Node(nullptr) {} -    explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} - -    /// Get the MDNode for this TBAAStructTypeNode. -    const MDNode *getNode() const { return Node; } - -    /// Get this TBAAStructTypeNode's field in the type DAG with -    /// given offset. Update the offset to be relative to the field type. -    TBAAStructTypeNode getParent(uint64_t &Offset) const { -      // Parent can be omitted for the root node. -      if (Node->getNumOperands() < 2) -        return TBAAStructTypeNode(); +public: +  explicit TBAAStructTagNode(const MDNode *N) : Node(N) {} -      // Fast path for a scalar type node and a struct type node with a single -      // field. -      if (Node->getNumOperands() <= 3) { -        uint64_t Cur = Node->getNumOperands() == 2 -                           ? 0 -                           : mdconst::extract<ConstantInt>(Node->getOperand(2)) -                                 ->getZExtValue(); -        Offset -= Cur; -        MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1)); -        if (!P) -          return TBAAStructTypeNode(); -        return TBAAStructTypeNode(P); -      } +  /// Get the MDNode for this TBAAStructTagNode. +  const MDNode *getNode() const { return Node; } -      // Assume the offsets are in order. We return the previous field if -      // the current offset is bigger than the given offset. -      unsigned TheIdx = 0; -      for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { -        uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1)) -                           ->getZExtValue(); -        if (Cur > Offset) { -          assert(Idx >= 3 && -                 "TBAAStructTypeNode::getParent should have an offset match!"); -          TheIdx = Idx - 2; -          break; -        } -      } -      // Move along the last field. -      if (TheIdx == 0) -        TheIdx = Node->getNumOperands() - 2; -      uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1)) -                         ->getZExtValue(); +  const MDNode *getBaseType() const { +    return dyn_cast_or_null<MDNode>(Node->getOperand(0)); +  } +  const MDNode *getAccessType() const { +    return dyn_cast_or_null<MDNode>(Node->getOperand(1)); +  } +  uint64_t getOffset() const { +    return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue(); +  } +  /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for +  /// objects which are not modified (by any means) in the context where this +  /// AliasAnalysis is relevant. +  bool TypeIsImmutable() const { +    if (Node->getNumOperands() < 4) +      return false; +    ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3)); +    if (!CI) +      return false; +    return CI->getValue()[0]; +  } +}; + +/// This is a simple wrapper around an MDNode which provides a +/// higher-level interface by hiding the details of how alias analysis +/// information is encoded in its operands. +class TBAAStructTypeNode { +  /// This node should be created with createTBAAStructTypeNode. +  const MDNode *Node; + +public: +  TBAAStructTypeNode() : Node(nullptr) {} +  explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {} + +  /// Get the MDNode for this TBAAStructTypeNode. +  const MDNode *getNode() const { return Node; } + +  /// Get this TBAAStructTypeNode's field in the type DAG with +  /// given offset. Update the offset to be relative to the field type. +  TBAAStructTypeNode getParent(uint64_t &Offset) const { +    // Parent can be omitted for the root node. +    if (Node->getNumOperands() < 2) +      return TBAAStructTypeNode(); + +    // Fast path for a scalar type node and a struct type node with a single +    // field. +    if (Node->getNumOperands() <= 3) { +      uint64_t Cur = Node->getNumOperands() == 2 +                         ? 0 +                         : mdconst::extract<ConstantInt>(Node->getOperand(2)) +                               ->getZExtValue();        Offset -= Cur; -      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); +      MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));        if (!P)          return TBAAStructTypeNode();        return TBAAStructTypeNode(P);      } -  }; -} - -namespace { -  /// TypeBasedAliasAnalysis - This is a simple alias analysis -  /// implementation that uses TypeBased to answer queries. -  class TypeBasedAliasAnalysis : public ImmutablePass, -                                 public AliasAnalysis { -  public: -    static char ID; // Class identification, replacement for typeinfo -    TypeBasedAliasAnalysis() : ImmutablePass(ID) { -      initializeTypeBasedAliasAnalysisPass(*PassRegistry::getPassRegistry()); -    } -    bool doInitialization(Module &M) override; - -    /// getAdjustedAnalysisPointer - This method is used when a pass implements -    /// an analysis interface through multiple inheritance.  If needed, it -    /// should override this to adjust the this pointer as needed for the -    /// specified pass info. -    void *getAdjustedAnalysisPointer(const void *PI) override { -      if (PI == &AliasAnalysis::ID) -        return (AliasAnalysis*)this; -      return this; +    // Assume the offsets are in order. We return the previous field if +    // the current offset is bigger than the given offset. +    unsigned TheIdx = 0; +    for (unsigned Idx = 1; Idx < Node->getNumOperands(); Idx += 2) { +      uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(Idx + 1)) +                         ->getZExtValue(); +      if (Cur > Offset) { +        assert(Idx >= 3 && +               "TBAAStructTypeNode::getParent should have an offset match!"); +        TheIdx = Idx - 2; +        break; +      }      } - -    bool Aliases(const MDNode *A, const MDNode *B) const; -    bool PathAliases(const MDNode *A, const MDNode *B) const; - -  private: -    void getAnalysisUsage(AnalysisUsage &AU) const override; -    AliasResult alias(const MemoryLocation &LocA, -                      const MemoryLocation &LocB) override; -    bool pointsToConstantMemory(const MemoryLocation &Loc, -                                bool OrLocal) override; -    ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; -    ModRefBehavior getModRefBehavior(const Function *F) override; -    ModRefResult getModRefInfo(ImmutableCallSite CS, -                               const MemoryLocation &Loc) override; -    ModRefResult getModRefInfo(ImmutableCallSite CS1, -                               ImmutableCallSite CS2) override; -  }; -}  // End of anonymous namespace - -// Register this pass... -char TypeBasedAliasAnalysis::ID = 0; -INITIALIZE_AG_PASS(TypeBasedAliasAnalysis, AliasAnalysis, "tbaa", -                   "Type-Based Alias Analysis", false, true, false) - -ImmutablePass *llvm::createTypeBasedAliasAnalysisPass() { -  return new TypeBasedAliasAnalysis(); -} - -bool TypeBasedAliasAnalysis::doInitialization(Module &M) { -  InitializeAliasAnalysis(this, &M.getDataLayout()); -  return true; -} - -void -TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { -  AU.setPreservesAll(); -  AliasAnalysis::getAnalysisUsage(AU); +    // Move along the last field. +    if (TheIdx == 0) +      TheIdx = Node->getNumOperands() - 2; +    uint64_t Cur = mdconst::extract<ConstantInt>(Node->getOperand(TheIdx + 1)) +                       ->getZExtValue(); +    Offset -= Cur; +    MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(TheIdx)); +    if (!P) +      return TBAAStructTypeNode(); +    return TBAAStructTypeNode(P); +  } +};  }  /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat @@ -342,145 +279,36 @@ static bool isStructPathTBAA(const MDNode *MD) {    return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;  } -/// Aliases - Test whether the type represented by A may alias the -/// type represented by B. -bool -TypeBasedAliasAnalysis::Aliases(const MDNode *A, -                                const MDNode *B) const { -  // Make sure that both MDNodes are struct-path aware. -  if (isStructPathTBAA(A) && isStructPathTBAA(B)) -    return PathAliases(A, B); - -  // Keep track of the root node for A and B. -  TBAANode RootA, RootB; - -  // Climb the tree from A to see if we reach B. -  for (TBAANode T(A); ; ) { -    if (T.getNode() == B) -      // B is an ancestor of A. -      return true; - -    RootA = T; -    T = T.getParent(); -    if (!T.getNode()) -      break; -  } - -  // Climb the tree from B to see if we reach A. -  for (TBAANode T(B); ; ) { -    if (T.getNode() == A) -      // A is an ancestor of B. -      return true; - -    RootB = T; -    T = T.getParent(); -    if (!T.getNode()) -      break; -  } - -  // Neither node is an ancestor of the other. -   -  // If they have different roots, they're part of different potentially -  // unrelated type systems, so we must be conservative. -  if (RootA.getNode() != RootB.getNode()) -    return true; - -  // If they have the same root, then we've proved there's no alias. -  return false; -} - -/// Test whether the struct-path tag represented by A may alias the -/// struct-path tag represented by B. -bool -TypeBasedAliasAnalysis::PathAliases(const MDNode *A, -                                    const MDNode *B) const { -  // Verify that both input nodes are struct-path aware. -  assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); -  assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); - -  // Keep track of the root node for A and B. -  TBAAStructTypeNode RootA, RootB; -  TBAAStructTagNode TagA(A), TagB(B); - -  // TODO: We need to check if AccessType of TagA encloses AccessType of -  // TagB to support aggregate AccessType. If yes, return true. - -  // Start from the base type of A, follow the edge with the correct offset in -  // the type DAG and adjust the offset until we reach the base type of B or -  // until we reach the Root node. -  // Compare the adjusted offset once we have the same base. - -  // Climb the type DAG from base type of A to see if we reach base type of B. -  const MDNode *BaseA = TagA.getBaseType(); -  const MDNode *BaseB = TagB.getBaseType(); -  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); -  for (TBAAStructTypeNode T(BaseA); ; ) { -    if (T.getNode() == BaseB) -      // Base type of A encloses base type of B, check if the offsets match. -      return OffsetA == OffsetB; - -    RootA = T; -    // Follow the edge with the correct offset, OffsetA will be adjusted to -    // be relative to the field type. -    T = T.getParent(OffsetA); -    if (!T.getNode()) -      break; -  } - -  // Reset OffsetA and climb the type DAG from base type of B to see if we reach -  // base type of A. -  OffsetA = TagA.getOffset(); -  for (TBAAStructTypeNode T(BaseB); ; ) { -    if (T.getNode() == BaseA) -      // Base type of B encloses base type of A, check if the offsets match. -      return OffsetA == OffsetB; - -    RootB = T; -    // Follow the edge with the correct offset, OffsetB will be adjusted to -    // be relative to the field type. -    T = T.getParent(OffsetB); -    if (!T.getNode()) -      break; -  } - -  // Neither node is an ancestor of the other. - -  // If they have different roots, they're part of different potentially -  // unrelated type systems, so we must be conservative. -  if (RootA.getNode() != RootB.getNode()) -    return true; - -  // If they have the same root, then we've proved there's no alias. -  return false; -} - -AliasResult TypeBasedAliasAnalysis::alias(const MemoryLocation &LocA, -                                          const MemoryLocation &LocB) { +AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA, +                                     const MemoryLocation &LocB) {    if (!EnableTBAA) -    return AliasAnalysis::alias(LocA, LocB); +    return AAResultBase::alias(LocA, LocB);    // Get the attached MDNodes. If either value lacks a tbaa MDNode, we must    // be conservative.    const MDNode *AM = LocA.AATags.TBAA; -  if (!AM) return AliasAnalysis::alias(LocA, LocB); +  if (!AM) +    return AAResultBase::alias(LocA, LocB);    const MDNode *BM = LocB.AATags.TBAA; -  if (!BM) return AliasAnalysis::alias(LocA, LocB); +  if (!BM) +    return AAResultBase::alias(LocA, LocB);    // If they may alias, chain to the next AliasAnalysis.    if (Aliases(AM, BM)) -    return AliasAnalysis::alias(LocA, LocB); +    return AAResultBase::alias(LocA, LocB);    // Otherwise return a definitive result.    return NoAlias;  } -bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc, -                                                    bool OrLocal) { +bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc, +                                               bool OrLocal) {    if (!EnableTBAA) -    return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);    const MDNode *M = Loc.AATags.TBAA; -  if (!M) return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +  if (!M) +    return AAResultBase::pointsToConstantMemory(Loc, OrLocal);    // If this is an "immutable" type, we can assume the pointer is pointing    // to constant memory. @@ -488,80 +316,82 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const MemoryLocation &Loc,        (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))      return true; -  return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); +  return AAResultBase::pointsToConstantMemory(Loc, OrLocal);  } -AliasAnalysis::ModRefBehavior -TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { +FunctionModRefBehavior +TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {    if (!EnableTBAA) -    return AliasAnalysis::getModRefBehavior(CS); +    return AAResultBase::getModRefBehavior(CS); -  ModRefBehavior Min = UnknownModRefBehavior; +  FunctionModRefBehavior Min = FMRB_UnknownModRefBehavior;    // If this is an "immutable" type, we can assume the call doesn't write    // to memory.    if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))      if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||          (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) -      Min = OnlyReadsMemory; +      Min = FMRB_OnlyReadsMemory; -  return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); +  return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);  } -AliasAnalysis::ModRefBehavior -TypeBasedAliasAnalysis::getModRefBehavior(const Function *F) { +FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {    // Functions don't have metadata. Just chain to the next implementation. -  return AliasAnalysis::getModRefBehavior(F); +  return AAResultBase::getModRefBehavior(F);  } -AliasAnalysis::ModRefResult -TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS, -                                      const MemoryLocation &Loc) { +ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS, +                                            const MemoryLocation &Loc) {    if (!EnableTBAA) -    return AliasAnalysis::getModRefInfo(CS, Loc); +    return AAResultBase::getModRefInfo(CS, Loc);    if (const MDNode *L = Loc.AATags.TBAA)      if (const MDNode *M =              CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))        if (!Aliases(L, M)) -        return NoModRef; +        return MRI_NoModRef; -  return AliasAnalysis::getModRefInfo(CS, Loc); +  return AAResultBase::getModRefInfo(CS, Loc);  } -AliasAnalysis::ModRefResult -TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, -                                      ImmutableCallSite CS2) { +ModRefInfo TypeBasedAAResult::getModRefInfo(ImmutableCallSite CS1, +                                            ImmutableCallSite CS2) {    if (!EnableTBAA) -    return AliasAnalysis::getModRefInfo(CS1, CS2); +    return AAResultBase::getModRefInfo(CS1, CS2);    if (const MDNode *M1 =            CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa))      if (const MDNode *M2 =              CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa))        if (!Aliases(M1, M2)) -        return NoModRef; +        return MRI_NoModRef; -  return AliasAnalysis::getModRefInfo(CS1, CS2); +  return AAResultBase::getModRefInfo(CS1, CS2);  }  bool MDNode::isTBAAVtableAccess() const {    if (!isStructPathTBAA(this)) { -    if (getNumOperands() < 1) return false; +    if (getNumOperands() < 1) +      return false;      if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) { -      if (Tag1->getString() == "vtable pointer") return true; +      if (Tag1->getString() == "vtable pointer") +        return true;      }      return false;    }    // For struct-path aware TBAA, we use the access type of the tag. -  if (getNumOperands() < 2) return false; +  if (getNumOperands() < 2) +    return false;    MDNode *Tag = cast_or_null<MDNode>(getOperand(1)); -  if (!Tag) return false; +  if (!Tag) +    return false;    if (MDString *Tag1 = dyn_cast<MDString>(Tag->getOperand(0))) { -    if (Tag1->getString() == "vtable pointer") return true; +    if (Tag1->getString() == "vtable pointer") +      return true;    } -  return false;   +  return false;  }  MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { @@ -575,9 +405,11 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {    bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);    if (StructPath) {      A = cast_or_null<MDNode>(A->getOperand(1)); -    if (!A) return nullptr; +    if (!A) +      return nullptr;      B = cast_or_null<MDNode>(B->getOperand(1)); -    if (!B) return nullptr; +    if (!B) +      return nullptr;    }    SmallSetVector<MDNode *, 4> PathA; @@ -604,7 +436,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {    int IB = PathB.size() - 1;    MDNode *Ret = nullptr; -  while (IA >= 0 && IB >=0) { +  while (IA >= 0 && IB >= 0) {      if (PathA[IA] == PathB[IB])        Ret = PathA[IA];      else @@ -644,3 +476,147 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {      N.NoAlias = getMetadata(LLVMContext::MD_noalias);  } +/// Aliases - Test whether the type represented by A may alias the +/// type represented by B. +bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const { +  // Make sure that both MDNodes are struct-path aware. +  if (isStructPathTBAA(A) && isStructPathTBAA(B)) +    return PathAliases(A, B); + +  // Keep track of the root node for A and B. +  TBAANode RootA, RootB; + +  // Climb the tree from A to see if we reach B. +  for (TBAANode T(A);;) { +    if (T.getNode() == B) +      // B is an ancestor of A. +      return true; + +    RootA = T; +    T = T.getParent(); +    if (!T.getNode()) +      break; +  } + +  // Climb the tree from B to see if we reach A. +  for (TBAANode T(B);;) { +    if (T.getNode() == A) +      // A is an ancestor of B. +      return true; + +    RootB = T; +    T = T.getParent(); +    if (!T.getNode()) +      break; +  } + +  // Neither node is an ancestor of the other. + +  // If they have different roots, they're part of different potentially +  // unrelated type systems, so we must be conservative. +  if (RootA.getNode() != RootB.getNode()) +    return true; + +  // If they have the same root, then we've proved there's no alias. +  return false; +} + +/// Test whether the struct-path tag represented by A may alias the +/// struct-path tag represented by B. +bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const { +  // Verify that both input nodes are struct-path aware. +  assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware."); +  assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware."); + +  // Keep track of the root node for A and B. +  TBAAStructTypeNode RootA, RootB; +  TBAAStructTagNode TagA(A), TagB(B); + +  // TODO: We need to check if AccessType of TagA encloses AccessType of +  // TagB to support aggregate AccessType. If yes, return true. + +  // Start from the base type of A, follow the edge with the correct offset in +  // the type DAG and adjust the offset until we reach the base type of B or +  // until we reach the Root node. +  // Compare the adjusted offset once we have the same base. + +  // Climb the type DAG from base type of A to see if we reach base type of B. +  const MDNode *BaseA = TagA.getBaseType(); +  const MDNode *BaseB = TagB.getBaseType(); +  uint64_t OffsetA = TagA.getOffset(), OffsetB = TagB.getOffset(); +  for (TBAAStructTypeNode T(BaseA);;) { +    if (T.getNode() == BaseB) +      // Base type of A encloses base type of B, check if the offsets match. +      return OffsetA == OffsetB; + +    RootA = T; +    // Follow the edge with the correct offset, OffsetA will be adjusted to +    // be relative to the field type. +    T = T.getParent(OffsetA); +    if (!T.getNode()) +      break; +  } + +  // Reset OffsetA and climb the type DAG from base type of B to see if we reach +  // base type of A. +  OffsetA = TagA.getOffset(); +  for (TBAAStructTypeNode T(BaseB);;) { +    if (T.getNode() == BaseA) +      // Base type of B encloses base type of A, check if the offsets match. +      return OffsetA == OffsetB; + +    RootB = T; +    // Follow the edge with the correct offset, OffsetB will be adjusted to +    // be relative to the field type. +    T = T.getParent(OffsetB); +    if (!T.getNode()) +      break; +  } + +  // Neither node is an ancestor of the other. + +  // If they have different roots, they're part of different potentially +  // unrelated type systems, so we must be conservative. +  if (RootA.getNode() != RootB.getNode()) +    return true; + +  // If they have the same root, then we've proved there's no alias. +  return false; +} + +TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> *AM) { +  return TypeBasedAAResult(AM->getResult<TargetLibraryAnalysis>(F)); +} + +char TypeBasedAA::PassID; + +char TypeBasedAAWrapperPass::ID = 0; +INITIALIZE_PASS_BEGIN(TypeBasedAAWrapperPass, "tbaa", +                      "Type-Based Alias Analysis", false, true) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis", +                    false, true) + +ImmutablePass *llvm::createTypeBasedAAWrapperPass() { +  return new TypeBasedAAWrapperPass(); +} + +TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) { +  initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry()); +} + +bool TypeBasedAAWrapperPass::doInitialization(Module &M) { +  Result.reset(new TypeBasedAAResult( +      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI())); +  return false; +} + +bool TypeBasedAAWrapperPass::doFinalization(Module &M) { +  Result.reset(); +  return false; +} + +void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { +  AU.setPreservesAll(); +  AU.addRequired<TargetLibraryInfoWrapperPass>(); +} diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index fa0d7798cae9..314ec9c1886e 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -13,6 +13,7 @@  //===----------------------------------------------------------------------===//  #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/Optional.h"  #include "llvm/ADT/SmallPtrSet.h"  #include "llvm/Analysis/AssumptionCache.h"  #include "llvm/Analysis/InstructionSimplify.h" @@ -43,7 +44,7 @@ const unsigned MaxDepth = 6;  /// Enable an experimental feature to leverage information about dominating  /// conditions to compute known bits.  The individual options below control how -/// hard we search.  The defaults are choosen to be fairly aggressive.  If you +/// hard we search.  The defaults are chosen to be fairly aggressive.  If you  /// run into compile time problems when testing, scale them back and report  /// your findings.  static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions", @@ -58,12 +59,12 @@ static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",  /// conditions?  static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",                                                     cl::Hidden, -                                                   cl::init(20000)); +                                                   cl::init(20));  // Controls the number of uses of the value searched for possible  // dominating comparisons.  static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses", -                                              cl::Hidden, cl::init(2000)); +                                              cl::Hidden, cl::init(20));  // If true, don't consider only compares whose only use is a branch.  static cl::opt<bool> DomConditionsSingleCmpUse("dom-conditions-single-cmp-use", @@ -185,6 +186,25 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,    return ::isKnownNonZero(V, DL, Depth, Query(AC, safeCxtI(V, CxtI), DT));  } +bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth, +                              AssumptionCache *AC, const Instruction *CxtI, +                              const DominatorTree *DT) { +  bool NonNegative, Negative; +  ComputeSignBit(V, NonNegative, Negative, DL, Depth, AC, CxtI, DT); +  return NonNegative; +} + +static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, +                           const Query &Q); + +bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, +                          AssumptionCache *AC, const Instruction *CxtI, +                          const DominatorTree *DT) { +  return ::isKnownNonEqual(V1, V2, DL, Query(AC, +                                             safeCxtI(V1, safeCxtI(V2, CxtI)), +                                             DT)); +} +  static bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,                                unsigned Depth, const Query &Q); @@ -320,7 +340,7 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,    }    // If low bits are zero in either operand, output low known-0 bits. -  // Also compute a conserative estimate for high known-0 bits. +  // Also compute a conservative estimate for high known-0 bits.    // More trickiness is possible, but this is sufficient for the    // interesting case of alignment computation.    KnownOne.clearAllBits(); @@ -347,26 +367,30 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,  }  void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, -                                             APInt &KnownZero) { +                                             APInt &KnownZero, +                                             APInt &KnownOne) {    unsigned BitWidth = KnownZero.getBitWidth();    unsigned NumRanges = Ranges.getNumOperands() / 2;    assert(NumRanges >= 1); -  // Use the high end of the ranges to find leading zeros. -  unsigned MinLeadingZeros = BitWidth; +  KnownZero.setAllBits(); +  KnownOne.setAllBits(); +    for (unsigned i = 0; i < NumRanges; ++i) {      ConstantInt *Lower =          mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));      ConstantInt *Upper =          mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));      ConstantRange Range(Lower->getValue(), Upper->getValue()); -    if (Range.isWrappedSet()) -      MinLeadingZeros = 0; // -1 has no zeros -    unsigned LeadingZeros = (Upper->getValue() - 1).countLeadingZeros(); -    MinLeadingZeros = std::min(LeadingZeros, MinLeadingZeros); -  } -  KnownZero = APInt::getHighBitsSet(BitWidth, MinLeadingZeros); +    // The first CommonPrefixBits of all values in Range are equal. +    unsigned CommonPrefixBits = +        (Range.getUnsignedMax() ^ Range.getUnsignedMin()).countLeadingZeros(); + +    APInt Mask = APInt::getHighBitsSet(BitWidth, CommonPrefixBits); +    KnownOne &= Range.getUnsignedMax() & Mask; +    KnownZero &= ~Range.getUnsignedMax() & Mask; +  }  }  static bool isEphemeralValueOf(Instruction *I, const Value *E) { @@ -374,20 +398,20 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {    SmallPtrSet<const Value *, 32> Visited;    SmallPtrSet<const Value *, 16> EphValues; +  // The instruction defining an assumption's condition itself is always +  // considered ephemeral to that assumption (even if it has other +  // non-ephemeral users). See r246696's test case for an example. +  if (std::find(I->op_begin(), I->op_end(), E) != I->op_end()) +    return true; +    while (!WorkSet.empty()) {      const Value *V = WorkSet.pop_back_val();      if (!Visited.insert(V).second)        continue;      // If all uses of this value are ephemeral, then so is this value. -    bool FoundNEUse = false; -    for (const User *I : V->users()) -      if (!EphValues.count(I)) { -        FoundNEUse = true; -        break; -      } - -    if (!FoundNEUse) { +    if (std::all_of(V->user_begin(), V->user_end(), +                    [&](const User *U) { return EphValues.count(U); })) {        if (V == E)          return true; @@ -447,7 +471,7 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {        for (BasicBlock::const_iterator I =               std::next(BasicBlock::const_iterator(Q.CxtI)),                                        IE(Inv); I != IE; ++I) -        if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) +        if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))            return false;        return !isEphemeralValueOf(Inv, Q.CxtI); @@ -464,14 +488,14 @@ static bool isValidAssumeForContext(Value *V, const Query &Q) {      // of the block); the common case is that the assume will come first.      for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),           IE = Inv->getParent()->end(); I != IE; ++I) -      if (I == Q.CxtI) +      if (&*I == Q.CxtI)          return true;      // The context must come first...      for (BasicBlock::const_iterator I =             std::next(BasicBlock::const_iterator(Q.CxtI)),                                      IE(Inv); I != IE; ++I) -      if (!isSafeToSpeculativelyExecute(I) && !isAssumeLikeIntrinsic(I)) +      if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))          return false;      return !isEphemeralValueOf(Inv, Q.CxtI); @@ -601,6 +625,11 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,    if (!Q.DT || !Q.CxtI)      return;    Instruction *Cxt = const_cast<Instruction *>(Q.CxtI); +  // The context instruction might be in a statically unreachable block.  If +  // so, asking dominator queries may yield suprising results.  (e.g. the block +  // may not have a dom tree node) +  if (!Q.DT->isReachableFromEntry(Cxt->getParent())) +    return;    // Avoid useless work    if (auto VI = dyn_cast<Instruction>(V)) @@ -647,7 +676,9 @@ static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,      // instruction.  Finding a condition where one path dominates the context      // isn't enough because both the true and false cases could merge before      // the context instruction we're actually interested in.  Instead, we need -    // to ensure that the taken *edge* dominates the context instruction. +    // to ensure that the taken *edge* dominates the context instruction.  We +    // know that the edge must be reachable since we started from a reachable +    // block.      BasicBlock *BB0 = BI->getSuccessor(0);      BasicBlockEdge Edge(BI->getParent(), BB0);      if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent())) @@ -941,6 +972,90 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,    }  } +// Compute known bits from a shift operator, including those with a +// non-constant shift amount. KnownZero and KnownOne are the outputs of this +// function. KnownZero2 and KnownOne2 are pre-allocated temporaries with the +// same bit width as KnownZero and KnownOne. KZF and KOF are operator-specific +// functors that, given the known-zero or known-one bits respectively, and a +// shift amount, compute the implied known-zero or known-one bits of the shift +// operator's result respectively for that shift amount. The results from calling +// KZF and KOF are conservatively combined for all permitted shift amounts. +template <typename KZFunctor, typename KOFunctor> +static void computeKnownBitsFromShiftOperator(Operator *I, +              APInt &KnownZero, APInt &KnownOne, +              APInt &KnownZero2, APInt &KnownOne2, +              const DataLayout &DL, unsigned Depth, const Query &Q, +              KZFunctor KZF, KOFunctor KOF) { +  unsigned BitWidth = KnownZero.getBitWidth(); + +  if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { +    unsigned ShiftAmt = SA->getLimitedValue(BitWidth-1); + +    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); +    KnownZero = KZF(KnownZero, ShiftAmt); +    KnownOne  = KOF(KnownOne, ShiftAmt); +    return; +  } + +  computeKnownBits(I->getOperand(1), KnownZero, KnownOne, DL, Depth + 1, Q); + +  // Note: We cannot use KnownZero.getLimitedValue() here, because if +  // BitWidth > 64 and any upper bits are known, we'll end up returning the +  // limit value (which implies all bits are known). +  uint64_t ShiftAmtKZ = KnownZero.zextOrTrunc(64).getZExtValue(); +  uint64_t ShiftAmtKO = KnownOne.zextOrTrunc(64).getZExtValue(); + +  // It would be more-clearly correct to use the two temporaries for this +  // calculation. Reusing the APInts here to prevent unnecessary allocations. +  KnownZero.clearAllBits(), KnownOne.clearAllBits(); + +  // If we know the shifter operand is nonzero, we can sometimes infer more +  // known bits. However this is expensive to compute, so be lazy about it and +  // only compute it when absolutely necessary. +  Optional<bool> ShifterOperandIsNonZero; + +  // Early exit if we can't constrain any well-defined shift amount. +  if (!(ShiftAmtKZ & (BitWidth - 1)) && !(ShiftAmtKO & (BitWidth - 1))) { +    ShifterOperandIsNonZero = +        isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); +    if (!*ShifterOperandIsNonZero) +      return; +  } + +  computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, Depth + 1, Q); + +  KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth); +  for (unsigned ShiftAmt = 0; ShiftAmt < BitWidth; ++ShiftAmt) { +    // Combine the shifted known input bits only for those shift amounts +    // compatible with its known constraints. +    if ((ShiftAmt & ~ShiftAmtKZ) != ShiftAmt) +      continue; +    if ((ShiftAmt | ShiftAmtKO) != ShiftAmt) +      continue; +    // If we know the shifter is nonzero, we may be able to infer more known +    // bits. This check is sunk down as far as possible to avoid the expensive +    // call to isKnownNonZero if the cheaper checks above fail. +    if (ShiftAmt == 0) { +      if (!ShifterOperandIsNonZero.hasValue()) +        ShifterOperandIsNonZero = +            isKnownNonZero(I->getOperand(1), DL, Depth + 1, Q); +      if (*ShifterOperandIsNonZero) +        continue; +    } + +    KnownZero &= KZF(KnownZero2, ShiftAmt); +    KnownOne  &= KOF(KnownOne2, ShiftAmt); +  } + +  // If there are no compatible shift amounts, then we've proven that the shift +  // amount must be >= the BitWidth, and the result is undefined. We could +  // return anything we'd like, but we need to make sure the sets of known bits +  // stay disjoint (it should be better for some other code to actually +  // propagate the undef than to pick a value here using known bits). +  if ((KnownZero & KnownOne) != 0) +    KnownZero.clearAllBits(), KnownOne.clearAllBits(); +} +  static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,                                           APInt &KnownOne, const DataLayout &DL,                                           unsigned Depth, const Query &Q) { @@ -951,7 +1066,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,    default: break;    case Instruction::Load:      if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) -      computeKnownBitsFromRangeMetadata(*MD, KnownZero); +      computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);      break;    case Instruction::And: {      // If either the LHS or the RHS are Zero, the result is zero. @@ -962,6 +1077,22 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,      KnownOne &= KnownOne2;      // Output known-0 are known to be clear if zero in either the LHS | RHS.      KnownZero |= KnownZero2; + +    // and(x, add (x, -1)) is a common idiom that always clears the low bit; +    // here we handle the more general case of adding any odd number by +    // matching the form add(x, add(x, y)) where y is odd. +    // TODO: This could be generalized to clearing any bit set in y where the +    // following bit is known to be unset in y. +    Value *Y = nullptr; +    if (match(I->getOperand(0), m_Add(m_Specific(I->getOperand(1)), +                                      m_Value(Y))) || +        match(I->getOperand(1), m_Add(m_Specific(I->getOperand(0)), +                                      m_Value(Y)))) { +      APInt KnownZero3(BitWidth, 0), KnownOne3(BitWidth, 0); +      computeKnownBits(Y, KnownZero3, KnownOne3, DL, Depth + 1, Q); +      if (KnownOne3.countTrailingOnes() > 0) +        KnownZero |= APInt::getLowBitsSet(BitWidth, 1); +    }      break;    }    case Instruction::Or: { @@ -1050,7 +1181,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,    }    case Instruction::BitCast: {      Type *SrcTy = I->getOperand(0)->getType(); -    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) && +    if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy() || +         SrcTy->isFloatingPointTy()) &&          // TODO: For now, not handling conversions like:          // (bitcast i64 %x to <2 x i32>)          !I->getType()->isVectorTy()) { @@ -1077,48 +1209,54 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,        KnownOne |= APInt::getHighBitsSet(BitWidth, BitWidth - SrcBitWidth);      break;    } -  case Instruction::Shl: +  case Instruction::Shl: {      // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0 -    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { -      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); -      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); -      KnownZero <<= ShiftAmt; -      KnownOne  <<= ShiftAmt; -      KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 -    } +    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { +      return (KnownZero << ShiftAmt) | +             APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0. +    }; + +    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { +      return KnownOne << ShiftAmt; +    }; + +    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, +                                      KnownZero2, KnownOne2, DL, Depth, Q, +                                      KZF, KOF);      break; -  case Instruction::LShr: +  } +  case Instruction::LShr: {      // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0 -    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { -      // Compute the new bits that are at the top now. -      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - -      // Unsigned shift right. -      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); -      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); -      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt); -      // high bits known zero. -      KnownZero |= APInt::getHighBitsSet(BitWidth, ShiftAmt); -    } +    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { +      return APIntOps::lshr(KnownZero, ShiftAmt) | +             // High bits known zero. +             APInt::getHighBitsSet(BitWidth, ShiftAmt); +    }; + +    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { +      return APIntOps::lshr(KnownOne, ShiftAmt); +    }; + +    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, +                                      KnownZero2, KnownOne2, DL, Depth, Q, +                                      KZF, KOF);      break; -  case Instruction::AShr: +  } +  case Instruction::AShr: {      // (ashr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0 -    if (ConstantInt *SA = dyn_cast<ConstantInt>(I->getOperand(1))) { -      // Compute the new bits that are at the top now. -      uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); +    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) { +      return APIntOps::ashr(KnownZero, ShiftAmt); +    }; -      // Signed shift right. -      computeKnownBits(I->getOperand(0), KnownZero, KnownOne, DL, Depth + 1, Q); -      KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); -      KnownOne  = APIntOps::lshr(KnownOne, ShiftAmt); +    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) { +      return APIntOps::ashr(KnownOne, ShiftAmt); +    }; -      APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); -      if (KnownZero[BitWidth-ShiftAmt-1])    // New bits are known zero. -        KnownZero |= HighBits; -      else if (KnownOne[BitWidth-ShiftAmt-1])  // New bits are known one. -        KnownOne |= HighBits; -    } +    computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne, +                                      KnownZero2, KnownOne2, DL, Depth, Q, +                                      KZF, KOF);      break; +  }    case Instruction::Sub: {      bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();      computeKnownBitsAddSub(false, I->getOperand(0), I->getOperand(1), NSW, @@ -1336,13 +1474,19 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,    case Instruction::Call:    case Instruction::Invoke:      if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range)) -      computeKnownBitsFromRangeMetadata(*MD, KnownZero); +      computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);      // If a range metadata is attached to this IntrinsicInst, intersect the      // explicit range specified by the metadata and the implicit range of      // the intrinsic.      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {        switch (II->getIntrinsicID()) {        default: break; +      case Intrinsic::bswap: +        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, +                         Depth + 1, Q); +        KnownZero |= KnownZero2.byteSwap(); +        KnownOne |= KnownOne2.byteSwap(); +        break;        case Intrinsic::ctlz:        case Intrinsic::cttz: {          unsigned LowBits = Log2_32(BitWidth)+1; @@ -1353,8 +1497,24 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,          break;        }        case Intrinsic::ctpop: { -        unsigned LowBits = Log2_32(BitWidth)+1; -        KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); +        computeKnownBits(I->getOperand(0), KnownZero2, KnownOne2, DL, +                         Depth + 1, Q); +        // We can bound the space the count needs.  Also, bits known to be zero +        // can't contribute to the population. +        unsigned BitsPossiblySet = BitWidth - KnownZero2.countPopulation(); +        unsigned LeadingZeros = +          APInt(BitWidth, BitsPossiblySet).countLeadingZeros(); +        assert(LeadingZeros <= BitWidth); +        KnownZero |= APInt::getHighBitsSet(BitWidth, LeadingZeros); +        KnownOne &= ~KnownZero; +        // TODO: we could bound KnownOne using the lower bound on the number +        // of bits which might be set provided by popcnt KnownOne2. +        break; +      } +      case Intrinsic::fabs: { +        Type *Ty = II->getType(); +        APInt SignBit = APInt::getSignBit(Ty->getScalarSizeInBits()); +        KnownZero |= APInt::getSplat(Ty->getPrimitiveSizeInBits(), SignBit);          break;        }        case Intrinsic::x86_sse42_crc32_64_64: @@ -1394,6 +1554,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,    }  } +static unsigned getAlignment(const Value *V, const DataLayout &DL) { +  unsigned Align = 0; +  if (auto *GO = dyn_cast<GlobalObject>(V)) { +    Align = GO->getAlignment(); +    if (Align == 0) { +      if (auto *GVar = dyn_cast<GlobalVariable>(GO)) { +        Type *ObjectType = GVar->getType()->getElementType(); +        if (ObjectType->isSized()) { +          // If the object is defined in the current Module, we'll be giving +          // it the preferred alignment. Otherwise, we have to assume that it +          // may only have the minimum ABI alignment. +          if (GVar->isStrongDefinitionForLinker()) +            Align = DL.getPreferredAlignment(GVar); +          else +            Align = DL.getABITypeAlignment(ObjectType); +        } +      } +    } +  } else if (const Argument *A = dyn_cast<Argument>(V)) { +    Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; + +    if (!Align && A->hasStructRetAttr()) { +      // An sret parameter has at least the ABI alignment of the return type. +      Type *EltTy = cast<PointerType>(A->getType())->getElementType(); +      if (EltTy->isSized()) +        Align = DL.getABITypeAlignment(EltTy); +    } +  } else if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) +    Align = AI->getAlignment(); +  else if (auto CS = ImmutableCallSite(V)) +    Align = CS.getAttributes().getParamAlignment(AttributeSet::ReturnIndex); +  else if (const LoadInst *LI = dyn_cast<LoadInst>(V)) +    if (MDNode *MD = LI->getMetadata(LLVMContext::MD_align)) { +      ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(0)); +      Align = CI->getLimitedValue(); +    } + +  return Align; +} +  /// Determine which bits of V are known to be either zero or one and return  /// them in the KnownZero/KnownOne bit sets.  /// @@ -1416,8 +1616,9 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,    unsigned BitWidth = KnownZero.getBitWidth();    assert((V->getType()->isIntOrIntVectorTy() || +          V->getType()->isFPOrFPVectorTy() ||            V->getType()->getScalarType()->isPointerTy()) && -         "Not integer or pointer type!"); +         "Not integer, floating point, or pointer type!");    assert((DL.getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&           (!V->getType()->isIntOrIntVectorTy() ||            V->getType()->getScalarSizeInBits() == BitWidth) && @@ -1454,59 +1655,6 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,      return;    } -  // The address of an aligned GlobalValue has trailing zeros. -  if (auto *GO = dyn_cast<GlobalObject>(V)) { -    unsigned Align = GO->getAlignment(); -    if (Align == 0) { -      if (auto *GVar = dyn_cast<GlobalVariable>(GO)) { -        Type *ObjectType = GVar->getType()->getElementType(); -        if (ObjectType->isSized()) { -          // If the object is defined in the current Module, we'll be giving -          // it the preferred alignment. Otherwise, we have to assume that it -          // may only have the minimum ABI alignment. -          if (GVar->isStrongDefinitionForLinker()) -            Align = DL.getPreferredAlignment(GVar); -          else -            Align = DL.getABITypeAlignment(ObjectType); -        } -      } -    } -    if (Align > 0) -      KnownZero = APInt::getLowBitsSet(BitWidth, -                                       countTrailingZeros(Align)); -    else -      KnownZero.clearAllBits(); -    KnownOne.clearAllBits(); -    return; -  } - -  if (Argument *A = dyn_cast<Argument>(V)) { -    unsigned Align = A->getType()->isPointerTy() ? A->getParamAlignment() : 0; - -    if (!Align && A->hasStructRetAttr()) { -      // An sret parameter has at least the ABI alignment of the return type. -      Type *EltTy = cast<PointerType>(A->getType())->getElementType(); -      if (EltTy->isSized()) -        Align = DL.getABITypeAlignment(EltTy); -    } - -    if (Align) -      KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); -    else -      KnownZero.clearAllBits(); -    KnownOne.clearAllBits(); - -    // Don't give up yet... there might be an assumption that provides more -    // information... -    computeKnownBitsFromAssume(V, KnownZero, KnownOne, DL, Depth, Q); - -    // Or a dominating condition for that matter -    if (EnableDomConditions && Depth <= DomConditionsMaxDepth) -      computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, DL, -                                              Depth, Q); -    return; -  } -    // Start out not knowing anything.    KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -1525,6 +1673,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,    if (Operator *I = dyn_cast<Operator>(V))      computeKnownBitsFromOperator(I, KnownZero, KnownOne, DL, Depth, Q); + +  // Aligned pointers have trailing zeros - refine KnownZero set +  if (V->getType()->isPointerTy()) { +    unsigned Align = getAlignment(V, DL); +    if (Align) +      KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); +  } +    // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition    // strictly refines KnownZero and KnownOne. Therefore, we run them after    // computeKnownBitsFromOperator. @@ -1812,6 +1968,23 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,      ComputeSignBit(X, XKnownNonNegative, XKnownNegative, DL, Depth, Q);      if (XKnownNegative)        return true; + +    // If the shifter operand is a constant, and all of the bits shifted +    // out are known to be zero, and X is known non-zero then at least one +    // non-zero bit must remain. +    if (ConstantInt *Shift = dyn_cast<ConstantInt>(Y)) { +      APInt KnownZero(BitWidth, 0); +      APInt KnownOne(BitWidth, 0); +      computeKnownBits(X, KnownZero, KnownOne, DL, Depth, Q); +       +      auto ShiftVal = Shift->getLimitedValue(BitWidth - 1); +      // Is there a known one in the portion not shifted out? +      if (KnownOne.countLeadingZeros() < BitWidth - ShiftVal) +        return true; +      // Are all the bits to be shifted out known zero? +      if (KnownZero.countTrailingOnes() >= ShiftVal) +        return isKnownNonZero(X, DL, Depth, Q); +    }    }    // div exact can only produce a zero if the dividend is zero.    else if (match(V, m_Exact(m_IDiv(m_Value(X), m_Value())))) { @@ -1871,6 +2044,26 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,          isKnownNonZero(SI->getFalseValue(), DL, Depth, Q))        return true;    } +  // PHI +  else if (PHINode *PN = dyn_cast<PHINode>(V)) { +    // Try and detect a recurrence that monotonically increases from a +    // starting value, as these are common as induction variables. +    if (PN->getNumIncomingValues() == 2) { +      Value *Start = PN->getIncomingValue(0); +      Value *Induction = PN->getIncomingValue(1); +      if (isa<ConstantInt>(Induction) && !isa<ConstantInt>(Start)) +        std::swap(Start, Induction); +      if (ConstantInt *C = dyn_cast<ConstantInt>(Start)) { +        if (!C->isZero() && !C->isNegative()) { +          ConstantInt *X; +          if ((match(Induction, m_NSWAdd(m_Specific(PN), m_ConstantInt(X))) || +               match(Induction, m_NUWAdd(m_Specific(PN), m_ConstantInt(X)))) && +              !X->isNegative()) +            return true; +        } +      } +    } +  }    if (!BitWidth) return false;    APInt KnownZero(BitWidth, 0); @@ -1879,6 +2072,51 @@ bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,    return KnownOne != 0;  } +/// Return true if V2 == V1 + X, where X is known non-zero. +static bool isAddOfNonZero(Value *V1, Value *V2, const DataLayout &DL, +                           const Query &Q) { +  BinaryOperator *BO = dyn_cast<BinaryOperator>(V1); +  if (!BO || BO->getOpcode() != Instruction::Add) +    return false; +  Value *Op = nullptr; +  if (V2 == BO->getOperand(0)) +    Op = BO->getOperand(1); +  else if (V2 == BO->getOperand(1)) +    Op = BO->getOperand(0); +  else +    return false; +  return isKnownNonZero(Op, DL, 0, Q); +} + +/// Return true if it is known that V1 != V2. +static bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL, +                            const Query &Q) { +  if (V1->getType()->isVectorTy() || V1 == V2) +    return false; +  if (V1->getType() != V2->getType()) +    // We can't look through casts yet. +    return false; +  if (isAddOfNonZero(V1, V2, DL, Q) || isAddOfNonZero(V2, V1, DL, Q)) +    return true; + +  if (IntegerType *Ty = dyn_cast<IntegerType>(V1->getType())) { +    // Are any known bits in V1 contradictory to known bits in V2? If V1 +    // has a known zero where V2 has a known one, they must not be equal. +    auto BitWidth = Ty->getBitWidth(); +    APInt KnownZero1(BitWidth, 0); +    APInt KnownOne1(BitWidth, 0); +    computeKnownBits(V1, KnownZero1, KnownOne1, DL, 0, Q); +    APInt KnownZero2(BitWidth, 0); +    APInt KnownOne2(BitWidth, 0); +    computeKnownBits(V2, KnownZero2, KnownOne2, DL, 0, Q); + +    auto OppositeBits = (KnownZero1 & KnownOne2) | (KnownZero2 & KnownOne1); +    if (OppositeBits.getBoolValue()) +      return true; +  } +  return false; +} +  /// Return true if 'V & Mask' is known to be zero.  We use this predicate to  /// simplify operations downstream. Mask is known to be zero for bits that V  /// cannot have. @@ -2545,7 +2783,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,        }        // This insert value inserts something else than what we are looking for. -      // See if the (aggregrate) value inserted into has the value we are +      // See if the (aggregate) value inserted into has the value we are        // looking for, then.        if (*req_idx != *i)          return FindInsertedValue(I->getAggregateOperand(), idx_range, @@ -2560,7 +2798,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef<unsigned> idx_range,    }    if (ExtractValueInst *I = dyn_cast<ExtractValueInst>(V)) { -    // If we're extracting a value from an aggregrate that was extracted from +    // If we're extracting a value from an aggregate that was extracted from      // something else, we can extract from that something else directly instead.      // However, we will need to chain I's indices with the requested indices. @@ -2935,20 +3173,42 @@ static bool isDereferenceableFromAttribute(const Value *V, const DataLayout &DL,    return isDereferenceableFromAttribute(V, Offset, Ty, DL, CtxI, DT, TLI);  } -/// Return true if Value is always a dereferenceable pointer. -/// +static bool isAligned(const Value *Base, APInt Offset, unsigned Align, +                      const DataLayout &DL) { +  APInt BaseAlign(Offset.getBitWidth(), getAlignment(Base, DL)); + +  if (!BaseAlign) { +    Type *Ty = Base->getType()->getPointerElementType(); +    if (!Ty->isSized()) +      return false; +    BaseAlign = DL.getABITypeAlignment(Ty); +  } + +  APInt Alignment(Offset.getBitWidth(), Align); + +  assert(Alignment.isPowerOf2() && "must be a power of 2!"); +  return BaseAlign.uge(Alignment) && !(Offset & (Alignment-1)); +} + +static bool isAligned(const Value *Base, unsigned Align, const DataLayout &DL) { +  Type *Ty = Base->getType(); +  assert(Ty->isSized() && "must be sized"); +  APInt Offset(DL.getTypeStoreSizeInBits(Ty), 0); +  return isAligned(Base, Offset, Align, DL); +} +  /// Test if V is always a pointer to allocated and suitably aligned memory for  /// a simple load or store. -static bool isDereferenceablePointer(const Value *V, const DataLayout &DL, -                                     const Instruction *CtxI, -                                     const DominatorTree *DT, -                                     const TargetLibraryInfo *TLI, -                                     SmallPtrSetImpl<const Value *> &Visited) { +static bool isDereferenceableAndAlignedPointer( +    const Value *V, unsigned Align, const DataLayout &DL, +    const Instruction *CtxI, const DominatorTree *DT, +    const TargetLibraryInfo *TLI, SmallPtrSetImpl<const Value *> &Visited) {    // Note that it is not safe to speculate into a malloc'd region because    // malloc may return null. -  // These are obviously ok. -  if (isa<AllocaInst>(V)) return true; +  // These are obviously ok if aligned. +  if (isa<AllocaInst>(V)) +    return isAligned(V, Align, DL);    // It's not always safe to follow a bitcast, for example:    //   bitcast i8* (alloca i8) to i32* @@ -2963,21 +3223,22 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,      if (STy->isSized() && DTy->isSized() &&          (DL.getTypeStoreSize(STy) >= DL.getTypeStoreSize(DTy)) &&          (DL.getABITypeAlignment(STy) >= DL.getABITypeAlignment(DTy))) -      return isDereferenceablePointer(BC->getOperand(0), DL, CtxI, -                                      DT, TLI, Visited); +      return isDereferenceableAndAlignedPointer(BC->getOperand(0), Align, DL, +                                                CtxI, DT, TLI, Visited);    }    // Global variables which can't collapse to null are ok.    if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) -    return !GV->hasExternalWeakLinkage(); +    if (!GV->hasExternalWeakLinkage()) +      return isAligned(V, Align, DL);    // byval arguments are okay.    if (const Argument *A = dyn_cast<Argument>(V))      if (A->hasByValAttr()) -      return true; -     +      return isAligned(V, Align, DL); +    if (isDereferenceableFromAttribute(V, DL, CtxI, DT, TLI)) -    return true; +    return isAligned(V, Align, DL);    // For GEPs, determine if the indexing lands within the allocated object.    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V)) { @@ -2985,61 +3246,79 @@ static bool isDereferenceablePointer(const Value *V, const DataLayout &DL,      Type *Ty = VTy->getPointerElementType();      const Value *Base = GEP->getPointerOperand(); -    // Conservatively require that the base pointer be fully dereferenceable. +    // Conservatively require that the base pointer be fully dereferenceable +    // and aligned.      if (!Visited.insert(Base).second)        return false; -    if (!isDereferenceablePointer(Base, DL, CtxI, -                                  DT, TLI, Visited)) +    if (!isDereferenceableAndAlignedPointer(Base, Align, DL, CtxI, DT, TLI, +                                            Visited))        return false; -     +      APInt Offset(DL.getPointerTypeSizeInBits(VTy), 0);      if (!GEP->accumulateConstantOffset(DL, Offset))        return false; -     -    // Check if the load is within the bounds of the underlying object. + +    // Check if the load is within the bounds of the underlying object +    // and offset is aligned.      uint64_t LoadSize = DL.getTypeStoreSize(Ty);      Type *BaseType = Base->getType()->getPointerElementType(); -    return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)); +    assert(isPowerOf2_32(Align) && "must be a power of 2!"); +    return (Offset + LoadSize).ule(DL.getTypeAllocSize(BaseType)) &&  +           !(Offset & APInt(Offset.getBitWidth(), Align-1));    }    // For gc.relocate, look through relocations    if (const IntrinsicInst *I = dyn_cast<IntrinsicInst>(V))      if (I->getIntrinsicID() == Intrinsic::experimental_gc_relocate) {        GCRelocateOperands RelocateInst(I); -      return isDereferenceablePointer(RelocateInst.getDerivedPtr(), DL, CtxI, -                                      DT, TLI, Visited); +      return isDereferenceableAndAlignedPointer( +          RelocateInst.getDerivedPtr(), Align, DL, CtxI, DT, TLI, Visited);      }    if (const AddrSpaceCastInst *ASC = dyn_cast<AddrSpaceCastInst>(V)) -    return isDereferenceablePointer(ASC->getOperand(0), DL, CtxI, -                                    DT, TLI, Visited); +    return isDereferenceableAndAlignedPointer(ASC->getOperand(0), Align, DL, +                                              CtxI, DT, TLI, Visited);    // If we don't know, assume the worst.    return false;  } -bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, -                                    const Instruction *CtxI, -                                    const DominatorTree *DT, -                                    const TargetLibraryInfo *TLI) { +bool llvm::isDereferenceableAndAlignedPointer(const Value *V, unsigned Align, +                                              const DataLayout &DL, +                                              const Instruction *CtxI, +                                              const DominatorTree *DT, +                                              const TargetLibraryInfo *TLI) {    // When dereferenceability information is provided by a dereferenceable    // attribute, we know exactly how many bytes are dereferenceable. If we can    // determine the exact offset to the attributed variable, we can use that    // information here.    Type *VTy = V->getType();    Type *Ty = VTy->getPointerElementType(); + +  // Require ABI alignment for loads without alignment specification +  if (Align == 0) +    Align = DL.getABITypeAlignment(Ty); +    if (Ty->isSized()) {      APInt Offset(DL.getTypeStoreSizeInBits(VTy), 0);      const Value *BV = V->stripAndAccumulateInBoundsConstantOffsets(DL, Offset); -     +      if (Offset.isNonNegative()) -      if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, -                                         CtxI, DT, TLI)) +      if (isDereferenceableFromAttribute(BV, Offset, Ty, DL, CtxI, DT, TLI) && +          isAligned(BV, Offset, Align, DL))          return true;    }    SmallPtrSet<const Value *, 32> Visited; -  return ::isDereferenceablePointer(V, DL, CtxI, DT, TLI, Visited); +  return ::isDereferenceableAndAlignedPointer(V, Align, DL, CtxI, DT, TLI, +                                              Visited); +} + +bool llvm::isDereferenceablePointer(const Value *V, const DataLayout &DL, +                                    const Instruction *CtxI, +                                    const DominatorTree *DT, +                                    const TargetLibraryInfo *TLI) { +  return isDereferenceableAndAlignedPointer(V, 1, DL, CtxI, DT, TLI);  }  bool llvm::isSafeToSpeculativelyExecute(const Value *V, @@ -3089,10 +3368,15 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,      const LoadInst *LI = cast<LoadInst>(Inst);      if (!LI->isUnordered() ||          // Speculative load may create a race that did not exist in the source. -        LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) +        LI->getParent()->getParent()->hasFnAttribute( +            Attribute::SanitizeThread) || +        // Speculative load may load data from dirty regions. +        LI->getParent()->getParent()->hasFnAttribute( +            Attribute::SanitizeAddress))        return false;      const DataLayout &DL = LI->getModule()->getDataLayout(); -    return isDereferenceablePointer(LI->getPointerOperand(), DL, CtxI, DT, TLI); +    return isDereferenceableAndAlignedPointer( +        LI->getPointerOperand(), LI->getAlignment(), DL, CtxI, DT, TLI);    }    case Instruction::Call: {      if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { @@ -3147,16 +3431,27 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V,    case Instruction::Switch:    case Instruction::Unreachable:    case Instruction::Fence: -  case Instruction::LandingPad:    case Instruction::AtomicRMW:    case Instruction::AtomicCmpXchg: +  case Instruction::LandingPad:    case Instruction::Resume: +  case Instruction::CatchSwitch: +  case Instruction::CatchPad: +  case Instruction::CatchRet: +  case Instruction::CleanupPad: +  case Instruction::CleanupRet:      return false; // Misc instructions which have effects    }  } +bool llvm::mayBeMemoryDependent(const Instruction &I) { +  return I.mayReadOrWriteMemory() || !isSafeToSpeculativelyExecute(&I); +} +  /// Return true if we know that the specified value is never null.  bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { +  assert(V->getType()->isPointerTy() && "V must be pointer type"); +    // Alloca never returns null, malloc might.    if (isa<AllocaInst>(V)) return true; @@ -3164,9 +3459,12 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {    if (const Argument *A = dyn_cast<Argument>(V))      return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr(); -  // Global values are not null unless extern weak. +  // A global variable in address space 0 is non null unless extern weak. +  // Other address spaces may have null as a valid address for a global, +  // so we can't assume anything.    if (const GlobalValue *GV = dyn_cast<GlobalValue>(V)) -    return !GV->hasExternalWeakLinkage(); +    return !GV->hasExternalWeakLinkage() && +           GV->getType()->getAddressSpace() == 0;    // A Load tagged w/nonnull metadata is never null.     if (const LoadInst *LI = dyn_cast<LoadInst>(V)) @@ -3186,6 +3484,8 @@ bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) {  static bool isKnownNonNullFromDominatingCondition(const Value *V,                                                    const Instruction *CtxI,                                                    const DominatorTree *DT) { +  assert(V->getType()->isPointerTy() && "V must be pointer type"); +    unsigned NumUsesExplored = 0;    for (auto U : V->users()) {      // Avoid massive lists @@ -3316,40 +3616,339 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,    return OverflowResult::MayOverflow;  } -static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred, +static OverflowResult computeOverflowForSignedAdd( +    Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL, +    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) { +  if (Add && Add->hasNoSignedWrap()) { +    return OverflowResult::NeverOverflows; +  } + +  bool LHSKnownNonNegative, LHSKnownNegative; +  bool RHSKnownNonNegative, RHSKnownNegative; +  ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, /*Depth=*/0, +                 AC, CxtI, DT); +  ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, /*Depth=*/0, +                 AC, CxtI, DT); + +  if ((LHSKnownNonNegative && RHSKnownNegative) || +      (LHSKnownNegative && RHSKnownNonNegative)) { +    // The sign bits are opposite: this CANNOT overflow. +    return OverflowResult::NeverOverflows; +  } + +  // The remaining code needs Add to be available. Early returns if not so. +  if (!Add) +    return OverflowResult::MayOverflow; + +  // If the sign of Add is the same as at least one of the operands, this add +  // CANNOT overflow. This is particularly useful when the sum is +  // @llvm.assume'ed non-negative rather than proved so from analyzing its +  // operands. +  bool LHSOrRHSKnownNonNegative = +      (LHSKnownNonNegative || RHSKnownNonNegative); +  bool LHSOrRHSKnownNegative = (LHSKnownNegative || RHSKnownNegative); +  if (LHSOrRHSKnownNonNegative || LHSOrRHSKnownNegative) { +    bool AddKnownNonNegative, AddKnownNegative; +    ComputeSignBit(Add, AddKnownNonNegative, AddKnownNegative, DL, +                   /*Depth=*/0, AC, CxtI, DT); +    if ((AddKnownNonNegative && LHSOrRHSKnownNonNegative) || +        (AddKnownNegative && LHSOrRHSKnownNegative)) { +      return OverflowResult::NeverOverflows; +    } +  } + +  return OverflowResult::MayOverflow; +} + +OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add, +                                                 const DataLayout &DL, +                                                 AssumptionCache *AC, +                                                 const Instruction *CxtI, +                                                 const DominatorTree *DT) { +  return ::computeOverflowForSignedAdd(Add->getOperand(0), Add->getOperand(1), +                                       Add, DL, AC, CxtI, DT); +} + +OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS, +                                                 const DataLayout &DL, +                                                 AssumptionCache *AC, +                                                 const Instruction *CxtI, +                                                 const DominatorTree *DT) { +  return ::computeOverflowForSignedAdd(LHS, RHS, nullptr, DL, AC, CxtI, DT); +} + +bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) { +  // FIXME: This conservative implementation can be relaxed. E.g. most +  // atomic operations are guaranteed to terminate on most platforms +  // and most functions terminate. + +  return !I->isAtomic() &&       // atomics may never succeed on some platforms +         !isa<CallInst>(I) &&    // could throw and might not terminate +         !isa<InvokeInst>(I) &&  // might not terminate and could throw to +                                 //   non-successor (see bug 24185 for details). +         !isa<ResumeInst>(I) &&  // has no successors +         !isa<ReturnInst>(I);    // has no successors +} + +bool llvm::isGuaranteedToExecuteForEveryIteration(const Instruction *I, +                                                  const Loop *L) { +  // The loop header is guaranteed to be executed for every iteration. +  // +  // FIXME: Relax this constraint to cover all basic blocks that are +  // guaranteed to be executed at every iteration. +  if (I->getParent() != L->getHeader()) return false; + +  for (const Instruction &LI : *L->getHeader()) { +    if (&LI == I) return true; +    if (!isGuaranteedToTransferExecutionToSuccessor(&LI)) return false; +  } +  llvm_unreachable("Instruction not contained in its own parent basic block."); +} + +bool llvm::propagatesFullPoison(const Instruction *I) { +  switch (I->getOpcode()) { +    case Instruction::Add: +    case Instruction::Sub: +    case Instruction::Xor: +    case Instruction::Trunc: +    case Instruction::BitCast: +    case Instruction::AddrSpaceCast: +      // These operations all propagate poison unconditionally. Note that poison +      // is not any particular value, so xor or subtraction of poison with +      // itself still yields poison, not zero. +      return true; + +    case Instruction::AShr: +    case Instruction::SExt: +      // For these operations, one bit of the input is replicated across +      // multiple output bits. A replicated poison bit is still poison. +      return true; + +    case Instruction::Shl: { +      // Left shift *by* a poison value is poison. The number of +      // positions to shift is unsigned, so no negative values are +      // possible there. Left shift by zero places preserves poison. So +      // it only remains to consider left shift of poison by a positive +      // number of places. +      // +      // A left shift by a positive number of places leaves the lowest order bit +      // non-poisoned. However, if such a shift has a no-wrap flag, then we can +      // make the poison operand violate that flag, yielding a fresh full-poison +      // value. +      auto *OBO = cast<OverflowingBinaryOperator>(I); +      return OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap(); +    } + +    case Instruction::Mul: { +      // A multiplication by zero yields a non-poison zero result, so we need to +      // rule out zero as an operand. Conservatively, multiplication by a +      // non-zero constant is not multiplication by zero. +      // +      // Multiplication by a non-zero constant can leave some bits +      // non-poisoned. For example, a multiplication by 2 leaves the lowest +      // order bit unpoisoned. So we need to consider that. +      // +      // Multiplication by 1 preserves poison. If the multiplication has a +      // no-wrap flag, then we can make the poison operand violate that flag +      // when multiplied by any integer other than 0 and 1. +      auto *OBO = cast<OverflowingBinaryOperator>(I); +      if (OBO->hasNoUnsignedWrap() || OBO->hasNoSignedWrap()) { +        for (Value *V : OBO->operands()) { +          if (auto *CI = dyn_cast<ConstantInt>(V)) { +            // A ConstantInt cannot yield poison, so we can assume that it is +            // the other operand that is poison. +            return !CI->isZero(); +          } +        } +      } +      return false; +    } + +    case Instruction::GetElementPtr: +      // A GEP implicitly represents a sequence of additions, subtractions, +      // truncations, sign extensions and multiplications. The multiplications +      // are by the non-zero sizes of some set of types, so we do not have to be +      // concerned with multiplication by zero. If the GEP is in-bounds, then +      // these operations are implicitly no-signed-wrap so poison is propagated +      // by the arguments above for Add, Sub, Trunc, SExt and Mul. +      return cast<GEPOperator>(I)->isInBounds(); + +    default: +      return false; +  } +} + +const Value *llvm::getGuaranteedNonFullPoisonOp(const Instruction *I) { +  switch (I->getOpcode()) { +    case Instruction::Store: +      return cast<StoreInst>(I)->getPointerOperand(); + +    case Instruction::Load: +      return cast<LoadInst>(I)->getPointerOperand(); + +    case Instruction::AtomicCmpXchg: +      return cast<AtomicCmpXchgInst>(I)->getPointerOperand(); + +    case Instruction::AtomicRMW: +      return cast<AtomicRMWInst>(I)->getPointerOperand(); + +    case Instruction::UDiv: +    case Instruction::SDiv: +    case Instruction::URem: +    case Instruction::SRem: +      return I->getOperand(1); + +    default: +      return nullptr; +  } +} + +bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) { +  // We currently only look for uses of poison values within the same basic +  // block, as that makes it easier to guarantee that the uses will be +  // executed given that PoisonI is executed. +  // +  // FIXME: Expand this to consider uses beyond the same basic block. To do +  // this, look out for the distinction between post-dominance and strong +  // post-dominance. +  const BasicBlock *BB = PoisonI->getParent(); + +  // Set of instructions that we have proved will yield poison if PoisonI +  // does. +  SmallSet<const Value *, 16> YieldsPoison; +  YieldsPoison.insert(PoisonI); + +  for (BasicBlock::const_iterator I = PoisonI->getIterator(), E = BB->end(); +       I != E; ++I) { +    if (&*I != PoisonI) { +      const Value *NotPoison = getGuaranteedNonFullPoisonOp(&*I); +      if (NotPoison != nullptr && YieldsPoison.count(NotPoison)) return true; +      if (!isGuaranteedToTransferExecutionToSuccessor(&*I)) +        return false; +    } + +    // Mark poison that propagates from I through uses of I. +    if (YieldsPoison.count(&*I)) { +      for (const User *User : I->users()) { +        const Instruction *UserI = cast<Instruction>(User); +        if (UserI->getParent() == BB && propagatesFullPoison(UserI)) +          YieldsPoison.insert(User); +      } +    } +  } +  return false; +} + +static bool isKnownNonNaN(Value *V, FastMathFlags FMF) { +  if (FMF.noNaNs()) +    return true; + +  if (auto *C = dyn_cast<ConstantFP>(V)) +    return !C->isNaN(); +  return false; +} + +static bool isKnownNonZero(Value *V) { +  if (auto *C = dyn_cast<ConstantFP>(V)) +    return !C->isZero(); +  return false; +} + +static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred, +                                              FastMathFlags FMF,                                                Value *CmpLHS, Value *CmpRHS,                                                Value *TrueVal, Value *FalseVal,                                                Value *&LHS, Value *&RHS) {    LHS = CmpLHS;    RHS = CmpRHS; -  // (icmp X, Y) ? X : Y -  if (TrueVal == CmpLHS && FalseVal == CmpRHS) { -    switch (Pred) { -    default: return SPF_UNKNOWN; // Equality. -    case ICmpInst::ICMP_UGT: -    case ICmpInst::ICMP_UGE: return SPF_UMAX; -    case ICmpInst::ICMP_SGT: -    case ICmpInst::ICMP_SGE: return SPF_SMAX; -    case ICmpInst::ICMP_ULT: -    case ICmpInst::ICMP_ULE: return SPF_UMIN; -    case ICmpInst::ICMP_SLT: -    case ICmpInst::ICMP_SLE: return SPF_SMIN; +  // If the predicate is an "or-equal"  (FP) predicate, then signed zeroes may +  // return inconsistent results between implementations. +  //   (0.0 <= -0.0) ? 0.0 : -0.0 // Returns 0.0 +  //   minNum(0.0, -0.0)          // May return -0.0 or 0.0 (IEEE 754-2008 5.3.1) +  // Therefore we behave conservatively and only proceed if at least one of the +  // operands is known to not be zero, or if we don't care about signed zeroes. +  switch (Pred) { +  default: break; +  case CmpInst::FCMP_OGE: case CmpInst::FCMP_OLE: +  case CmpInst::FCMP_UGE: case CmpInst::FCMP_ULE: +    if (!FMF.noSignedZeros() && !isKnownNonZero(CmpLHS) && +        !isKnownNonZero(CmpRHS)) +      return {SPF_UNKNOWN, SPNB_NA, false}; +  } + +  SelectPatternNaNBehavior NaNBehavior = SPNB_NA; +  bool Ordered = false; + +  // When given one NaN and one non-NaN input: +  //   - maxnum/minnum (C99 fmaxf()/fminf()) return the non-NaN input. +  //   - A simple C99 (a < b ? a : b) construction will return 'b' (as the +  //     ordered comparison fails), which could be NaN or non-NaN. +  // so here we discover exactly what NaN behavior is required/accepted. +  if (CmpInst::isFPPredicate(Pred)) { +    bool LHSSafe = isKnownNonNaN(CmpLHS, FMF); +    bool RHSSafe = isKnownNonNaN(CmpRHS, FMF); + +    if (LHSSafe && RHSSafe) { +      // Both operands are known non-NaN. +      NaNBehavior = SPNB_RETURNS_ANY; +    } else if (CmpInst::isOrdered(Pred)) { +      // An ordered comparison will return false when given a NaN, so it +      // returns the RHS. +      Ordered = true; +      if (LHSSafe) +        // LHS is non-NaN, so if RHS is NaN then NaN will be returned. +        NaNBehavior = SPNB_RETURNS_NAN; +      else if (RHSSafe) +        NaNBehavior = SPNB_RETURNS_OTHER; +      else +        // Completely unsafe. +        return {SPF_UNKNOWN, SPNB_NA, false}; +    } else { +      Ordered = false; +      // An unordered comparison will return true when given a NaN, so it +      // returns the LHS. +      if (LHSSafe) +        // LHS is non-NaN, so if RHS is NaN then non-NaN will be returned. +        NaNBehavior = SPNB_RETURNS_OTHER; +      else if (RHSSafe) +        NaNBehavior = SPNB_RETURNS_NAN; +      else +        // Completely unsafe. +        return {SPF_UNKNOWN, SPNB_NA, false};      }    } -  // (icmp X, Y) ? Y : X    if (TrueVal == CmpRHS && FalseVal == CmpLHS) { +    std::swap(CmpLHS, CmpRHS); +    Pred = CmpInst::getSwappedPredicate(Pred); +    if (NaNBehavior == SPNB_RETURNS_NAN) +      NaNBehavior = SPNB_RETURNS_OTHER; +    else if (NaNBehavior == SPNB_RETURNS_OTHER) +      NaNBehavior = SPNB_RETURNS_NAN; +    Ordered = !Ordered; +  } + +  // ([if]cmp X, Y) ? X : Y +  if (TrueVal == CmpLHS && FalseVal == CmpRHS) {      switch (Pred) { -    default: return SPF_UNKNOWN; // Equality. +    default: return {SPF_UNKNOWN, SPNB_NA, false}; // Equality.      case ICmpInst::ICMP_UGT: -    case ICmpInst::ICMP_UGE: return SPF_UMIN; +    case ICmpInst::ICMP_UGE: return {SPF_UMAX, SPNB_NA, false};      case ICmpInst::ICMP_SGT: -    case ICmpInst::ICMP_SGE: return SPF_SMIN; +    case ICmpInst::ICMP_SGE: return {SPF_SMAX, SPNB_NA, false};      case ICmpInst::ICMP_ULT: -    case ICmpInst::ICMP_ULE: return SPF_UMAX; +    case ICmpInst::ICMP_ULE: return {SPF_UMIN, SPNB_NA, false};      case ICmpInst::ICMP_SLT: -    case ICmpInst::ICMP_SLE: return SPF_SMAX; +    case ICmpInst::ICMP_SLE: return {SPF_SMIN, SPNB_NA, false}; +    case FCmpInst::FCMP_UGT: +    case FCmpInst::FCMP_UGE: +    case FCmpInst::FCMP_OGT: +    case FCmpInst::FCMP_OGE: return {SPF_FMAXNUM, NaNBehavior, Ordered}; +    case FCmpInst::FCMP_ULT: +    case FCmpInst::FCMP_ULE: +    case FCmpInst::FCMP_OLT: +    case FCmpInst::FCMP_OLE: return {SPF_FMINNUM, NaNBehavior, Ordered};      }    } @@ -3360,13 +3959,13 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,        // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X        // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X        if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) { -        return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS; +        return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};        }        // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X        // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X        if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) { -        return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS; +        return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};        }      } @@ -3377,24 +3976,36 @@ static SelectPatternFlavor matchSelectPattern(ICmpInst::Predicate Pred,             match(CmpLHS, m_Not(m_Specific(TrueVal))))) {          LHS = TrueVal;          RHS = FalseVal; -        return SPF_SMIN; +        return {SPF_SMIN, SPNB_NA, false};        }      }    }    // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5) -  return SPF_UNKNOWN; +  return {SPF_UNKNOWN, SPNB_NA, false};  } -static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2, -                                 Instruction::CastOps *CastOp) { +static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2, +                              Instruction::CastOps *CastOp) {    CastInst *CI = dyn_cast<CastInst>(V1);    Constant *C = dyn_cast<Constant>(V2); -  if (!CI || !C) +  CastInst *CI2 = dyn_cast<CastInst>(V2); +  if (!CI)      return nullptr;    *CastOp = CI->getOpcode(); +  if (CI2) { +    // If V1 and V2 are both the same cast from the same type, we can look +    // through V1. +    if (CI2->getOpcode() == CI->getOpcode() && +        CI2->getSrcTy() == CI->getSrcTy()) +      return CI2->getOperand(0); +    return nullptr; +  } else if (!C) { +    return nullptr; +  } +    if (isa<SExtInst>(CI) && CmpI->isSigned()) {      Constant *T = ConstantExpr::getTrunc(C, CI->getSrcTy());      // This is only valid if the truncated value can be sign-extended @@ -3409,39 +4020,200 @@ static Constant *lookThroughCast(ICmpInst *CmpI, Value *V1, Value *V2,    if (isa<TruncInst>(CI))      return ConstantExpr::getIntegerCast(C, CI->getSrcTy(), CmpI->isSigned()); +  if (isa<FPToUIInst>(CI)) +    return ConstantExpr::getUIToFP(C, CI->getSrcTy(), true); + +  if (isa<FPToSIInst>(CI)) +    return ConstantExpr::getSIToFP(C, CI->getSrcTy(), true); + +  if (isa<UIToFPInst>(CI)) +    return ConstantExpr::getFPToUI(C, CI->getSrcTy(), true); + +  if (isa<SIToFPInst>(CI)) +    return ConstantExpr::getFPToSI(C, CI->getSrcTy(), true); + +  if (isa<FPTruncInst>(CI)) +    return ConstantExpr::getFPExtend(C, CI->getSrcTy(), true); + +  if (isa<FPExtInst>(CI)) +    return ConstantExpr::getFPTrunc(C, CI->getSrcTy(), true); +    return nullptr;  } -SelectPatternFlavor llvm::matchSelectPattern(Value *V, +SelectPatternResult llvm::matchSelectPattern(Value *V,                                               Value *&LHS, Value *&RHS,                                               Instruction::CastOps *CastOp) {    SelectInst *SI = dyn_cast<SelectInst>(V); -  if (!SI) return SPF_UNKNOWN; +  if (!SI) return {SPF_UNKNOWN, SPNB_NA, false}; -  ICmpInst *CmpI = dyn_cast<ICmpInst>(SI->getCondition()); -  if (!CmpI) return SPF_UNKNOWN; +  CmpInst *CmpI = dyn_cast<CmpInst>(SI->getCondition()); +  if (!CmpI) return {SPF_UNKNOWN, SPNB_NA, false}; -  ICmpInst::Predicate Pred = CmpI->getPredicate(); +  CmpInst::Predicate Pred = CmpI->getPredicate();    Value *CmpLHS = CmpI->getOperand(0);    Value *CmpRHS = CmpI->getOperand(1);    Value *TrueVal = SI->getTrueValue();    Value *FalseVal = SI->getFalseValue(); +  FastMathFlags FMF; +  if (isa<FPMathOperator>(CmpI)) +    FMF = CmpI->getFastMathFlags();    // Bail out early.    if (CmpI->isEquality()) -    return SPF_UNKNOWN; +    return {SPF_UNKNOWN, SPNB_NA, false};    // Deal with type mismatches.    if (CastOp && CmpLHS->getType() != TrueVal->getType()) { -    if (Constant *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) -      return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, +    if (Value *C = lookThroughCast(CmpI, TrueVal, FalseVal, CastOp)) +      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,                                    cast<CastInst>(TrueVal)->getOperand(0), C,                                    LHS, RHS); -    if (Constant *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) -      return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, +    if (Value *C = lookThroughCast(CmpI, FalseVal, TrueVal, CastOp)) +      return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS,                                    C, cast<CastInst>(FalseVal)->getOperand(0),                                    LHS, RHS);    } -  return ::matchSelectPattern(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, +  return ::matchSelectPattern(Pred, FMF, CmpLHS, CmpRHS, TrueVal, FalseVal,                                LHS, RHS);  } + +ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) { +  const unsigned NumRanges = Ranges.getNumOperands() / 2; +  assert(NumRanges >= 1 && "Must have at least one range!"); +  assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs"); + +  auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0)); +  auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1)); + +  ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue()); + +  for (unsigned i = 1; i < NumRanges; ++i) { +    auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0)); +    auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1)); + +    // Note: unionWith will potentially create a range that contains values not +    // contained in any of the original N ranges. +    CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue())); +  } + +  return CR; +} + +/// Return true if "icmp Pred LHS RHS" is always true. +static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS, +                            const DataLayout &DL, unsigned Depth, +                            AssumptionCache *AC, const Instruction *CxtI, +                            const DominatorTree *DT) { +  assert(!LHS->getType()->isVectorTy() && "TODO: extend to handle vectors!"); +  if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) +    return true; + +  switch (Pred) { +  default: +    return false; + +  case CmpInst::ICMP_SLE: { +    const APInt *C; + +    // LHS s<= LHS +_{nsw} C   if C >= 0 +    if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) +      return !C->isNegative(); +    return false; +  } + +  case CmpInst::ICMP_ULE: { +    const APInt *C; + +    // LHS u<= LHS +_{nuw} C   for any C +    if (match(RHS, m_NUWAdd(m_Specific(LHS), m_APInt(C)))) +      return true; + +    // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) +    auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X, +                                       const APInt *&CA, const APInt *&CB) { +      if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && +          match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) +        return true; + +      // If X & C == 0 then (X | C) == X +_{nuw} C +      if (match(A, m_Or(m_Value(X), m_APInt(CA))) && +          match(B, m_Or(m_Specific(X), m_APInt(CB)))) { +        unsigned BitWidth = CA->getBitWidth(); +        APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); +        computeKnownBits(X, KnownZero, KnownOne, DL, Depth + 1, AC, CxtI, DT); + +        if ((KnownZero & *CA) == *CA && (KnownZero & *CB) == *CB) +          return true; +      } + +      return false; +    }; + +    Value *X; +    const APInt *CLHS, *CRHS; +    if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) +      return CLHS->ule(*CRHS); + +    return false; +  } +  } +} + +/// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred +/// ALHS ARHS" is true. +static bool isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, +                                  Value *ARHS, Value *BLHS, Value *BRHS, +                                  const DataLayout &DL, unsigned Depth, +                                  AssumptionCache *AC, const Instruction *CxtI, +                                  const DominatorTree *DT) { +  switch (Pred) { +  default: +    return false; + +  case CmpInst::ICMP_SLT: +  case CmpInst::ICMP_SLE: +    return isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth, AC, CxtI, +                           DT) && +           isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth, AC, CxtI, +                           DT); + +  case CmpInst::ICMP_ULT: +  case CmpInst::ICMP_ULE: +    return isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth, AC, CxtI, +                           DT) && +           isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth, AC, CxtI, +                           DT); +  } +} + +bool llvm::isImpliedCondition(Value *LHS, Value *RHS, const DataLayout &DL, +                              unsigned Depth, AssumptionCache *AC, +                              const Instruction *CxtI, +                              const DominatorTree *DT) { +  assert(LHS->getType() == RHS->getType() && "mismatched type"); +  Type *OpTy = LHS->getType(); +  assert(OpTy->getScalarType()->isIntegerTy(1)); + +  // LHS ==> RHS by definition +  if (LHS == RHS) return true; + +  if (OpTy->isVectorTy()) +    // TODO: extending the code below to handle vectors +    return false; +  assert(OpTy->isIntegerTy(1) && "implied by above"); + +  ICmpInst::Predicate APred, BPred; +  Value *ALHS, *ARHS; +  Value *BLHS, *BRHS; + +  if (!match(LHS, m_ICmp(APred, m_Value(ALHS), m_Value(ARHS))) || +      !match(RHS, m_ICmp(BPred, m_Value(BLHS), m_Value(BRHS)))) +    return false; + +  if (APred == BPred) +    return isImpliedCondOperands(APred, ALHS, ARHS, BLHS, BRHS, DL, Depth, AC, +                                 CxtI, DT); + +  return false; +} diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp index 8c671ef0ef0e..4b244ec5e1f6 100644 --- a/contrib/llvm/lib/Analysis/VectorUtils.cpp +++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp @@ -11,13 +11,20 @@  //  //===----------------------------------------------------------------------===// +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/Analysis/DemandedBits.h"  #include "llvm/Analysis/LoopInfo.h"  #include "llvm/Analysis/ScalarEvolutionExpressions.h"  #include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/TargetTransformInfo.h"  #include "llvm/Analysis/VectorUtils.h"  #include "llvm/IR/GetElementPtrTypeIterator.h"  #include "llvm/IR/PatternMatch.h"  #include "llvm/IR/Value.h" +#include "llvm/IR/Constants.h" + +using namespace llvm; +using namespace llvm::PatternMatch;  /// \brief Identify if the intrinsic is trivially vectorizable.  /// This method returns true if the intrinsic's argument types are all @@ -79,7 +86,7 @@ bool llvm::hasVectorInstrinsicScalarOpd(Intrinsic::ID ID,  /// d) call should only reads memory.  /// If all these condition is met then return ValidIntrinsicID  /// else return not_intrinsic. -llvm::Intrinsic::ID +Intrinsic::ID  llvm::checkUnaryFloatSignature(const CallInst &I,                                 Intrinsic::ID ValidIntrinsicID) {    if (I.getNumArgOperands() != 1 || @@ -98,7 +105,7 @@ llvm::checkUnaryFloatSignature(const CallInst &I,  /// d) call should only reads memory.  /// If all these condition is met then return ValidIntrinsicID  /// else return not_intrinsic. -llvm::Intrinsic::ID +Intrinsic::ID  llvm::checkBinaryFloatSignature(const CallInst &I,                                  Intrinsic::ID ValidIntrinsicID) {    if (I.getNumArgOperands() != 2 || @@ -114,8 +121,8 @@ llvm::checkBinaryFloatSignature(const CallInst &I,  /// \brief Returns intrinsic ID for call.  /// For the input call instruction it finds mapping intrinsic and returns  /// its ID, in case it does not found it return not_intrinsic. -llvm::Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI, -                                                const TargetLibraryInfo *TLI) { +Intrinsic::ID llvm::getIntrinsicIDForCall(CallInst *CI, +                                          const TargetLibraryInfo *TLI) {    // If we have an intrinsic call, check if it is trivially vectorizable.    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {      Intrinsic::ID ID = II->getIntrinsicID(); @@ -228,8 +235,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {        cast<PointerType>(Gep->getType()->getScalarType())->getElementType());    // Walk backwards and try to peel off zeros. -  while (LastOperand > 1 && -         match(Gep->getOperand(LastOperand), llvm::PatternMatch::m_Zero())) { +  while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {      // Find the type we're currently indexing into.      gep_type_iterator GEPTI = gep_type_begin(Gep);      std::advance(GEPTI, LastOperand - 1); @@ -247,8 +253,7 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {  /// \brief If the argument is a GEP, then returns the operand identified by  /// getGEPInductionOperand. However, if there is some other non-loop-invariant  /// operand, it returns that instead. -llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE, -                                      Loop *Lp) { +Value *llvm::stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {    GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);    if (!GEP)      return Ptr; @@ -265,8 +270,8 @@ llvm::Value *llvm::stripGetElementPtr(llvm::Value *Ptr, ScalarEvolution *SE,  }  /// \brief If a value has only one user that is a CastInst, return it. -llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) { -  llvm::Value *UniqueCast = nullptr; +Value *llvm::getUniqueCastUse(Value *Ptr, Loop *Lp, Type *Ty) { +  Value *UniqueCast = nullptr;    for (User *U : Ptr->users()) {      CastInst *CI = dyn_cast<CastInst>(U);      if (CI && CI->getType() == Ty) { @@ -281,16 +286,15 @@ llvm::Value *llvm::getUniqueCastUse(llvm::Value *Ptr, Loop *Lp, Type *Ty) {  /// \brief Get the stride of a pointer access in a loop. Looks for symbolic  /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. -llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE, -                                        Loop *Lp) { -  const PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); +Value *llvm::getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { +  auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());    if (!PtrTy || PtrTy->isAggregateType())      return nullptr;    // Try to remove a gep instruction to make the pointer (actually index at this    // point) easier analyzable. If OrigPtr is equal to Ptr we are analzying the    // pointer, otherwise, we are analyzing the index. -  llvm::Value *OrigPtr = Ptr; +  Value *OrigPtr = Ptr;    // The size of the pointer access.    int64_t PtrAccessSize = 1; @@ -320,8 +324,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,        if (M->getOperand(0)->getSCEVType() != scConstant)          return nullptr; -      const APInt &APStepVal = -          cast<SCEVConstant>(M->getOperand(0))->getValue()->getValue(); +      const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();        // Huge step value - give up.        if (APStepVal.getBitWidth() > 64) @@ -346,7 +349,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,    if (!U)      return nullptr; -  llvm::Value *Stride = U->getValue(); +  Value *Stride = U->getValue();    if (!Lp->isLoopInvariant(Stride))      return nullptr; @@ -361,7 +364,7 @@ llvm::Value *llvm::getStrideFromPointer(llvm::Value *Ptr, ScalarEvolution *SE,  /// \brief Given a vector and an element number, see if the scalar value is  /// already around as a register, for example if it were inserted then extracted  /// from the vector. -llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) { +Value *llvm::findScalarElement(Value *V, unsigned EltNo) {    assert(V->getType()->isVectorTy() && "Not looking at a vector?");    VectorType *VTy = cast<VectorType>(V->getType());    unsigned Width = VTy->getNumElements(); @@ -399,14 +402,166 @@ llvm::Value *llvm::findScalarElement(llvm::Value *V, unsigned EltNo) {    // Extract a value from a vector add operation with a constant zero.    Value *Val = nullptr; Constant *Con = nullptr; -  if (match(V, -            llvm::PatternMatch::m_Add(llvm::PatternMatch::m_Value(Val), -                                      llvm::PatternMatch::m_Constant(Con)))) { +  if (match(V, m_Add(m_Value(Val), m_Constant(Con))))      if (Constant *Elt = Con->getAggregateElement(EltNo))        if (Elt->isNullValue())          return findScalarElement(Val, EltNo); -  }    // Otherwise, we don't know.    return nullptr;  } + +/// \brief Get splat value if the input is a splat vector or return nullptr. +/// This function is not fully general. It checks only 2 cases: +/// the input value is (1) a splat constants vector or (2) a sequence +/// of instructions that broadcast a single value into a vector. +/// +const llvm::Value *llvm::getSplatValue(const Value *V) { + +  if (auto *C = dyn_cast<Constant>(V)) +    if (isa<VectorType>(V->getType())) +      return C->getSplatValue(); + +  auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V); +  if (!ShuffleInst) +    return nullptr; +  // All-zero (or undef) shuffle mask elements. +  for (int MaskElt : ShuffleInst->getShuffleMask()) +    if (MaskElt != 0 && MaskElt != -1) +      return nullptr; +  // The first shuffle source is 'insertelement' with index 0. +  auto *InsertEltInst = +    dyn_cast<InsertElementInst>(ShuffleInst->getOperand(0)); +  if (!InsertEltInst || !isa<ConstantInt>(InsertEltInst->getOperand(2)) || +      !cast<ConstantInt>(InsertEltInst->getOperand(2))->isNullValue()) +    return nullptr; + +  return InsertEltInst->getOperand(1); +} + +MapVector<Instruction *, uint64_t> +llvm::computeMinimumValueSizes(ArrayRef<BasicBlock *> Blocks, DemandedBits &DB, +                               const TargetTransformInfo *TTI) { + +  // DemandedBits will give us every value's live-out bits. But we want +  // to ensure no extra casts would need to be inserted, so every DAG +  // of connected values must have the same minimum bitwidth. +  EquivalenceClasses<Value *> ECs; +  SmallVector<Value *, 16> Worklist; +  SmallPtrSet<Value *, 4> Roots; +  SmallPtrSet<Value *, 16> Visited; +  DenseMap<Value *, uint64_t> DBits; +  SmallPtrSet<Instruction *, 4> InstructionSet; +  MapVector<Instruction *, uint64_t> MinBWs; + +  // Determine the roots. We work bottom-up, from truncs or icmps. +  bool SeenExtFromIllegalType = false; +  for (auto *BB : Blocks) +    for (auto &I : *BB) { +      InstructionSet.insert(&I); + +      if (TTI && (isa<ZExtInst>(&I) || isa<SExtInst>(&I)) && +          !TTI->isTypeLegal(I.getOperand(0)->getType())) +        SeenExtFromIllegalType = true; + +      // Only deal with non-vector integers up to 64-bits wide. +      if ((isa<TruncInst>(&I) || isa<ICmpInst>(&I)) && +          !I.getType()->isVectorTy() && +          I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) { +        // Don't make work for ourselves. If we know the loaded type is legal, +        // don't add it to the worklist. +        if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType())) +          continue; + +        Worklist.push_back(&I); +        Roots.insert(&I); +      } +    } +  // Early exit. +  if (Worklist.empty() || (TTI && !SeenExtFromIllegalType)) +    return MinBWs; + +  // Now proceed breadth-first, unioning values together. +  while (!Worklist.empty()) { +    Value *Val = Worklist.pop_back_val(); +    Value *Leader = ECs.getOrInsertLeaderValue(Val); + +    if (Visited.count(Val)) +      continue; +    Visited.insert(Val); + +    // Non-instructions terminate a chain successfully. +    if (!isa<Instruction>(Val)) +      continue; +    Instruction *I = cast<Instruction>(Val); + +    // If we encounter a type that is larger than 64 bits, we can't represent +    // it so bail out. +    if (DB.getDemandedBits(I).getBitWidth() > 64) +      return MapVector<Instruction *, uint64_t>(); + +    uint64_t V = DB.getDemandedBits(I).getZExtValue(); +    DBits[Leader] |= V; + +    // Casts, loads and instructions outside of our range terminate a chain +    // successfully. +    if (isa<SExtInst>(I) || isa<ZExtInst>(I) || isa<LoadInst>(I) || +        !InstructionSet.count(I)) +      continue; + +    // Unsafe casts terminate a chain unsuccessfully. We can't do anything +    // useful with bitcasts, ptrtoints or inttoptrs and it'd be unsafe to +    // transform anything that relies on them. +    if (isa<BitCastInst>(I) || isa<PtrToIntInst>(I) || isa<IntToPtrInst>(I) || +        !I->getType()->isIntegerTy()) { +      DBits[Leader] |= ~0ULL; +      continue; +    } + +    // We don't modify the types of PHIs. Reductions will already have been +    // truncated if possible, and inductions' sizes will have been chosen by +    // indvars. +    if (isa<PHINode>(I)) +      continue; + +    if (DBits[Leader] == ~0ULL) +      // All bits demanded, no point continuing. +      continue; + +    for (Value *O : cast<User>(I)->operands()) { +      ECs.unionSets(Leader, O); +      Worklist.push_back(O); +    } +  } + +  // Now we've discovered all values, walk them to see if there are +  // any users we didn't see. If there are, we can't optimize that +  // chain. +  for (auto &I : DBits) +    for (auto *U : I.first->users()) +      if (U->getType()->isIntegerTy() && DBits.count(U) == 0) +        DBits[ECs.getOrInsertLeaderValue(I.first)] |= ~0ULL; + +  for (auto I = ECs.begin(), E = ECs.end(); I != E; ++I) { +    uint64_t LeaderDemandedBits = 0; +    for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) +      LeaderDemandedBits |= DBits[*MI]; + +    uint64_t MinBW = (sizeof(LeaderDemandedBits) * 8) - +                     llvm::countLeadingZeros(LeaderDemandedBits); +    // Round up to a power of 2 +    if (!isPowerOf2_64((uint64_t)MinBW)) +      MinBW = NextPowerOf2(MinBW); +    for (auto MI = ECs.member_begin(I), ME = ECs.member_end(); MI != ME; ++MI) { +      if (!isa<Instruction>(*MI)) +        continue; +      Type *Ty = (*MI)->getType(); +      if (Roots.count(*MI)) +        Ty = cast<Instruction>(*MI)->getOperand(0)->getType(); +      if (MinBW < Ty->getScalarSizeInBits()) +        MinBWs[cast<Instruction>(*MI)] = MinBW; +    } +  } + +  return MinBWs; +}  | 
